diff --git a/docs/admins/howto/remove-users-orm.rst b/docs/admins/howto/remove-users-orm.rst index 538f9b6bc..ab11565dc 100644 --- a/docs/admins/howto/remove-users-orm.rst +++ b/docs/admins/howto/remove-users-orm.rst @@ -25,6 +25,4 @@ You can run the script on your own device. The script depends on the `jhub_clien #. You will need to acquire a JupyterHub API token with administrative rights. A hub admin can go to {hub_url}/hub/token to create a new one. #. Set the environment variable `JUPYTERHUB_API_TOKEN` to the token. -#. Run `python scripts/delete-unused-users.py {hub_url}` - -The script currently does not paginate properly, meaning that it operates on the first 200 users provided by the hub. If there are less then 200 active users it is sufficient to keep running the script in a loop until all inactive users are removed. If there are more than 200 active users this procedure will be inadequate. (the script needs to be fixed!) +#. Run `python scripts/delete-unused-users.py --hub_url {hub_url}` diff --git a/scripts/delete-unused-users.py b/scripts/delete-unused-users.py index 1192750e5..9c23445b9 100755 --- a/scripts/delete-unused-users.py +++ b/scripts/delete-unused-users.py @@ -10,60 +10,141 @@ make sure they aren't active right now. This will require users to log in again the next time they use the hub, but that's probably ok. + +Core functionality from @minrk: +https://discourse.jupyter.org/t/is-there-a-way-to-bulk-delete-old-users/20866/3 """ import argparse -from jhub_client.api import JupyterHubAPI -from dateutil.parser import parse -import asyncio from datetime import timedelta, datetime +import logging +import os +import requests +import sys + +from dateutil.parser import parse +from jhub_client.api import JupyterHubAPI + +logging.basicConfig(stream=sys.stdout, level=logging.WARNING) +logger = logging.getLogger(__name__) + +token = os.environ["JUPYTERHUB_API_TOKEN"] +headers = { + "Accept": "application/jupyterhub-pagination+json", + "Authorization": f"Bearer {token}", +} + +def retrieve_users(hub_url): + """Returns generator of user models that should be deleted""" + url = hub_url.rstrip("/") + "/hub/api/users" + next_page = True + params = {} + + while next_page: + r = requests.get(url, headers=headers, params=params) + r.raise_for_status() + resp = r.json() + user_list = resp["items"] + for user in user_list: + # only yield users that should be deleted + if should_delete(user): + yield user + + pagination = resp["_pagination"] + next_page = pagination["next"] + if next_page: + params = { + "offset": next_page["offset"], + "limit": next_page["limit"], + } + +def should_delete(user): + """ + Returns a boolean if user is to be deleted. The critera are: + - was the user active in the past 24 hours? + - is there a current user server running? + """ + last_activity_str = user.get('last_activity', False) + if last_activity_str: + try: + last_activity = parse(user['last_activity']) + except: + logger.error(f"Unexpected value for user['last_activity']: {user['last_activity']}") + raise + if isinstance(last_activity, datetime): + was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24) + else: + logger.error(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.") + raise -async def main(): + logger.debug(f"User: {user['name']}") + logger.debug(f"Last login: {last_activity}") + logger.debug(f"24hrs since last login: {was_active_last_day}") + logger.debug(f"Running server: {user['server']}") + if was_active_last_day or user['server'] is not None: + logger.info(f"Not deleting {user['name']}") + return False + else: + logger.info(f"Flagged {user['name']} for deletion.") + return True + +def delete_user(hub_url, name): + """Delete a given user by name via JupyterHub API""" + r = requests.delete( + hub_url.rstrip("/") + f"/hub/api/users/{name}", + headers=headers, + ) + r.raise_for_status() + +def main(args): + """ + Get users from a hub, check to see if they should be deleted from the ORM + and if so, delete them! + """ + count = 1 + for user in list(retrieve_users(args.hub_url)): + print(f"{count}: deleting {user['name']}") + count += 1 + if not args.dry_run: + delete_user(args.hub_url, user['name']) + else: + logger.warning(f"Skipped {user['name']} due to dry run.") + # await delete_user(hub, user, count) + + count -= 1 + print(f"Deleted {count} total users from the ORM.") + +if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument( - 'hub_url', - help='Fully qualified URL to the JupyterHub' + '-h', + '--hub_url', + help='Fully qualified URL to the JupyterHub', + required=True ) argparser.add_argument( '--dry_run', action='store_true', help='Dry run without deleting users' ) + argparser.add_argument( + '-v', + '--verbose', + dest='verbose', + action='store_true', + help='Set info log level' + ) + argparser.add_argument( + '-d', + '--debug', + dest='debug', + action='store_true', + help='Set debug log level' + ) args = argparser.parse_args() - to_delete = [] - async with JupyterHubAPI(hub_url=args.hub_url) as hub: - users = await hub.list_users() - for user in users: - last_activity_str = user.get('last_activity', False) - if last_activity_str: - try: - last_activity = parse(user['last_activity']) - except: - print(user['last_activity']) - raise - if isinstance(last_activity, datetime): - was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24) - else: - print(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.") - raise - - print(f"User: {user['name']}") - print(f"Last login: {last_activity}") - print(f"24hrs since last login: {was_active_last_day}") - print(f"Running server: {user['server']}") - if was_active_last_day or user['server'] is not None: - print(f"Not deleting {user['name']}") - else: - to_delete.append(user['name']) - print(f"Deleting {user['name']}") - print("") - - for i, username in enumerate(to_delete): - print(f'{i+1} of {len(to_delete)}: deleting {username}') - if not args.dry_run: - await hub.delete_user(username) - else: - print('Skipped due to dry run.') - -if __name__ == '__main__': - asyncio.run(main()) + if args.verbose: + logger.setLevel(logging.INFO) + elif args.debug: + logger.setLevel(logging.DEBUG) + + main(args)