From a2f5c19d29f5867349bae294c462533ef3a01d5b Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 29 Sep 2023 11:51:22 -0700 Subject: [PATCH 1/7] fix pagination, basically a complete rewrite --- scripts/delete-unused-users.py | 153 ++++++++++++++++++++++++--------- 1 file changed, 112 insertions(+), 41 deletions(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 1192750e5..5880fccf1 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -12,58 +12,129 @@ ok. """ import argparse -from jhub_client.api import JupyterHubAPI -from dateutil.parser import parse import asyncio from datetime import timedelta, datetime +from dateutil.parser import parse +from jhub_client.api import JupyterHubAPI +import logging +import os +import requests +import sys + +logging.basicConfig(stream=sys.stdout, level=logging.WARNING) +logger = logging.getLogger(__name__) + +token = os.environ["JUPYTERHUB_API_TOKEN"] +headers = { + "Accept": "application/jupyterhub-pagination+json", + "Authorization": f"Bearer {token}", +} + +def retrieve_users(hub_url): + """Returns generator of user models that should be deleted""" + url = hub_url.rstrip("/") + "/hub/api/users" + next_page = True + params = {} + + while next_page: + r = requests.get(url, headers=headers, params=params) + r.raise_for_status() + resp = r.json() + user_list = resp["items"] + for user in user_list: + # only yield users that should be deleted + if should_delete(user): + yield user + + pagination = resp["_pagination"] + next_page = pagination["next"] + if next_page: + params = { + "offset": next_page["offset"], + "limit": next_page["limit"], + } + +def should_delete(user): + """ + Returns a boolean if user is to be deleted. The critera are: + - was the user active in the past 24 hours? + - is there a current user server running? + """ + last_activity_str = user.get('last_activity', False) + if last_activity_str: + try: + last_activity = parse(user['last_activity']) + except: + logger.error(f"Unexpected value for user['last_activity']: {user['last_activity']}") + raise + if isinstance(last_activity, datetime): + was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24) + else: + logger.error(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.") + raise + + logger.debug(f"User: {user['name']}") + logger.debug(f"Last login: {last_activity}") + logger.debug(f"24hrs since last login: {was_active_last_day}") + logger.debug(f"Running server: {user['server']}") + if was_active_last_day or user['server'] is not None: + logger.info(f"Not deleting {user['name']}") + return False + else: + logger.info(f"Flagged {user['name']} for deletion.") + return True -async def main(): +async def delete_user(hub, user, count): + """Delete a user from the hub ORM""" + username = user['name'] + print(f"{count}: deleting {username}") + if not args.dry_run: + await hub.delete_user(username) + else: + logger.warning("Skipped due to dry run.") + +async def main(args): + """ + Get users from a hub, check to see if they should be deleted from the ORM + and if so, delete them! + """ + hub = JupyterHubAPI(hub_url=args.hub_url) + count = 1 + for user in list(retrieve_users(args.hub_url)): + await delete_user(hub, user, count) + count += 1 + count -= 1 + print(f"Deleted {count} total users.") + +if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument( - 'hub_url', - help='Fully qualified URL to the JupyterHub' + '--hub_url', + help='Fully qualified URL to the JupyterHub', + required=True ) argparser.add_argument( '--dry_run', action='store_true', help='Dry run without deleting users' ) + argparser.add_argument( + '-v', + dest='verbose', + action='store_true', + help='Set info log level' + ) + argparser.add_argument( + '-d', + dest='debug', + action='store_true', + help='Set debug log level' + ) args = argparser.parse_args() - to_delete = [] - async with JupyterHubAPI(hub_url=args.hub_url) as hub: - users = await hub.list_users() - for user in users: - last_activity_str = user.get('last_activity', False) - if last_activity_str: - try: - last_activity = parse(user['last_activity']) - except: - print(user['last_activity']) - raise - if isinstance(last_activity, datetime): - was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24) - else: - print(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.") - raise - - print(f"User: {user['name']}") - print(f"Last login: {last_activity}") - print(f"24hrs since last login: {was_active_last_day}") - print(f"Running server: {user['server']}") - if was_active_last_day or user['server'] is not None: - print(f"Not deleting {user['name']}") - else: - to_delete.append(user['name']) - print(f"Deleting {user['name']}") - print("") - - for i, username in enumerate(to_delete): - print(f'{i+1} of {len(to_delete)}: deleting {username}') - if not args.dry_run: - await hub.delete_user(username) - else: - print('Skipped due to dry run.') + if args.verbose: + logger.setLevel(logging.INFO) + elif args.debug: + logger.setLevel(logging.DEBUG) -if __name__ == '__main__': - asyncio.run(main()) + asyncio.run(main(args)) From 9c610d104c65da6a7f74fd468a30b6a25e3faab8 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 29 Sep 2023 19:51:47 -0700 Subject: [PATCH 2/7] basically just stolen code lol --- scripts/delete-unused-users.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 5880fccf1..46ff75a33 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -84,25 +84,29 @@ def should_delete(user): logger.info(f"Flagged {user['name']} for deletion.") return True -async def delete_user(hub, user, count): - """Delete a user from the hub ORM""" - username = user['name'] - print(f"{count}: deleting {username}") - if not args.dry_run: - await hub.delete_user(username) - else: - logger.warning("Skipped due to dry run.") +def delete_user(hub_url, name): + """Delete a given user by name via JupyterHub API""" + r = requests.delete( + hub_url.rstrip("/") + f"/hub/api/users/{name}", + headers=headers, + ) + r.raise_for_status() -async def main(args): +def main(args): """ Get users from a hub, check to see if they should be deleted from the ORM and if so, delete them! """ - hub = JupyterHubAPI(hub_url=args.hub_url) count = 1 for user in list(retrieve_users(args.hub_url)): - await delete_user(hub, user, count) - count += 1 + print(f"{count}: deleting {user['name']}") + if not args.dry_run: + delete_user(args.hub_url, user['name']) + count += 1 + else: + logger.warning(f"Skipped {user['name']} due to dry run.") + # await delete_user(hub, user, count) + count -= 1 print(f"Deleted {count} total users.") @@ -137,4 +141,4 @@ async def main(args): elif args.debug: logger.setLevel(logging.DEBUG) - asyncio.run(main(args)) + main(args) From 6b42240684e7181c0c837256c18568e8a91ef0d2 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 29 Sep 2023 19:53:05 -0700 Subject: [PATCH 3/7] give attribution --- scripts/delete-unused-users.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 46ff75a33..bbfe1214f 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -10,6 +10,9 @@ make sure they aren't active right now. This will require users to log in again the next time they use the hub, but that's probably ok. + +Core functionality from @minrk: +https://discourse.jupyter.org/t/is-there-a-way-to-bulk-delete-old-users/20866/3 """ import argparse import asyncio From a3399df5662bb2e79532788b09a2fc220e73b80d Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 4 Oct 2023 14:24:18 -0700 Subject: [PATCH 4/7] address comments, quick bugfix w/indentation --- scripts/delete-unused-users.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index bbfe1214f..fbfe41b9a 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -15,15 +15,15 @@ https://discourse.jupyter.org/t/is-there-a-way-to-bulk-delete-old-users/20866/3 """ import argparse -import asyncio from datetime import timedelta, datetime -from dateutil.parser import parse -from jhub_client.api import JupyterHubAPI import logging import os import requests import sys +from dateutil.parser import parse +from jhub_client.api import JupyterHubAPI + logging.basicConfig(stream=sys.stdout, level=logging.WARNING) logger = logging.getLogger(__name__) @@ -103,10 +103,10 @@ def main(args): count = 1 for user in list(retrieve_users(args.hub_url)): print(f"{count}: deleting {user['name']}") + count += 1 if not args.dry_run: delete_user(args.hub_url, user['name']) - count += 1 - else: + else: logger.warning(f"Skipped {user['name']} due to dry run.") # await delete_user(hub, user, count) @@ -127,12 +127,14 @@ def main(args): ) argparser.add_argument( '-v', + '--verbose', dest='verbose', action='store_true', help='Set info log level' ) argparser.add_argument( '-d', + '--debug', dest='debug', action='store_true', help='Set debug log level' From c2de00c84c13eba72cf324c24bc25d291c4b893a Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 4 Oct 2023 14:25:53 -0700 Subject: [PATCH 5/7] at the end of the run, state exactly where these users were deleted from --- scripts/delete-unused-users.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index fbfe41b9a..243ad5260 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -111,7 +111,7 @@ def main(args): # await delete_user(hub, user, count) count -= 1 - print(f"Deleted {count} total users.") + print(f"Deleted {count} total users from the ORM.") if __name__ == "__main__": argparser = argparse.ArgumentParser() From df941bdd176dcfc85d8dd42c618f68a8e7c4a696 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 4 Oct 2023 14:36:18 -0700 Subject: [PATCH 6/7] update docs --- docs/admins/howto/remove-users-orm.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/admins/howto/remove-users-orm.rst b/docs/admins/howto/remove-users-orm.rst index 538f9b6bc..ab11565dc 100644 --- a/docs/admins/howto/remove-users-orm.rst +++ b/docs/admins/howto/remove-users-orm.rst @@ -25,6 +25,4 @@ You can run the script on your own device. The script depends on the `jhub_clien #. You will need to acquire a JupyterHub API token with administrative rights. A hub admin can go to {hub_url}/hub/token to create a new one. #. Set the environment variable `JUPYTERHUB_API_TOKEN` to the token. -#. Run `python scripts/delete-unused-users.py {hub_url}` - -The script currently does not paginate properly, meaning that it operates on the first 200 users provided by the hub. If there are less then 200 active users it is sufficient to keep running the script in a loop until all inactive users are removed. If there are more than 200 active users this procedure will be inadequate. (the script needs to be fixed!) +#. Run `python scripts/delete-unused-users.py --hub_url {hub_url}` From 015ae85f845860e49d09e1254a4ed582483b0e5b Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 4 Oct 2023 14:37:42 -0700 Subject: [PATCH 7/7] add short form arg for hub url --- scripts/delete-unused-users.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 243ad5260..9c23445b9 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -116,6 +116,7 @@ def main(args): if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument( + '-h', '--hub_url', help='Fully qualified URL to the JupyterHub', required=True