Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DH-145] fix pagination, basically a complete rewrite #5093

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions docs/admins/howto/remove-users-orm.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,4 @@ You can run the script on your own device. The script depends on the `jhub_clien

#. You will need to acquire a JupyterHub API token with administrative rights. A hub admin can go to {hub_url}/hub/token to create a new one.
#. Set the environment variable `JUPYTERHUB_API_TOKEN` to the token.
#. Run `python scripts/delete-unused-users.py {hub_url}`

The script currently does not paginate properly, meaning that it operates on the first 200 users provided by the hub. If there are less then 200 active users it is sufficient to keep running the script in a loop until all inactive users are removed. If there are more than 200 active users this procedure will be inadequate. (the script needs to be fixed!)
#. Run `python scripts/delete-unused-users.py --hub_url {hub_url}`
167 changes: 124 additions & 43 deletions scripts/delete-unused-users.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,60 +10,141 @@
make sure they aren't active right now. This will require users to
log in again the next time they use the hub, but that's probably
ok.

Core functionality from @minrk:
https://discourse.jupyter.org/t/is-there-a-way-to-bulk-delete-old-users/20866/3
"""
import argparse
from jhub_client.api import JupyterHubAPI
from dateutil.parser import parse
import asyncio
from datetime import timedelta, datetime
import logging
import os
import requests
import sys

from dateutil.parser import parse
from jhub_client.api import JupyterHubAPI

logging.basicConfig(stream=sys.stdout, level=logging.WARNING)
logger = logging.getLogger(__name__)

token = os.environ["JUPYTERHUB_API_TOKEN"]
headers = {
"Accept": "application/jupyterhub-pagination+json",
"Authorization": f"Bearer {token}",
}

def retrieve_users(hub_url):
"""Returns generator of user models that should be deleted"""
url = hub_url.rstrip("/") + "/hub/api/users"
next_page = True
params = {}

while next_page:
r = requests.get(url, headers=headers, params=params)
r.raise_for_status()
resp = r.json()
user_list = resp["items"]
for user in user_list:
# only yield users that should be deleted
if should_delete(user):
yield user

pagination = resp["_pagination"]
next_page = pagination["next"]
if next_page:
params = {
"offset": next_page["offset"],
"limit": next_page["limit"],
}

def should_delete(user):
"""
Returns a boolean if user is to be deleted. The critera are:
- was the user active in the past 24 hours?
- is there a current user server running?
"""
last_activity_str = user.get('last_activity', False)
if last_activity_str:
try:
last_activity = parse(user['last_activity'])
except:
logger.error(f"Unexpected value for user['last_activity']: {user['last_activity']}")
raise
if isinstance(last_activity, datetime):
was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24)
else:
logger.error(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.")
raise

async def main():
logger.debug(f"User: {user['name']}")
logger.debug(f"Last login: {last_activity}")
logger.debug(f"24hrs since last login: {was_active_last_day}")
logger.debug(f"Running server: {user['server']}")
if was_active_last_day or user['server'] is not None:
logger.info(f"Not deleting {user['name']}")
return False
else:
logger.info(f"Flagged {user['name']} for deletion.")
return True

def delete_user(hub_url, name):
"""Delete a given user by name via JupyterHub API"""
r = requests.delete(
hub_url.rstrip("/") + f"/hub/api/users/{name}",
headers=headers,
)
r.raise_for_status()

def main(args):
"""
Get users from a hub, check to see if they should be deleted from the ORM
and if so, delete them!
"""
count = 1
for user in list(retrieve_users(args.hub_url)):
print(f"{count}: deleting {user['name']}")
count += 1
if not args.dry_run:
delete_user(args.hub_url, user['name'])
else:
logger.warning(f"Skipped {user['name']} due to dry run.")
# await delete_user(hub, user, count)

count -= 1
print(f"Deleted {count} total users from the ORM.")

if __name__ == "__main__":
argparser = argparse.ArgumentParser()
argparser.add_argument(
'hub_url',
help='Fully qualified URL to the JupyterHub'
'-h',
'--hub_url',
help='Fully qualified URL to the JupyterHub',
required=True
)
argparser.add_argument(
'--dry_run',
action='store_true',
help='Dry run without deleting users'
)
argparser.add_argument(
'-v',
'--verbose',
dest='verbose',
action='store_true',
help='Set info log level'
)
argparser.add_argument(
'-d',
'--debug',
dest='debug',
action='store_true',
help='Set debug log level'
)
args = argparser.parse_args()

to_delete = []
async with JupyterHubAPI(hub_url=args.hub_url) as hub:
users = await hub.list_users()
for user in users:
last_activity_str = user.get('last_activity', False)
if last_activity_str:
try:
last_activity = parse(user['last_activity'])
except:
print(user['last_activity'])
raise
if isinstance(last_activity, datetime):
was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24)
else:
print(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.")
raise

print(f"User: {user['name']}")
print(f"Last login: {last_activity}")
print(f"24hrs since last login: {was_active_last_day}")
print(f"Running server: {user['server']}")
if was_active_last_day or user['server'] is not None:
print(f"Not deleting {user['name']}")
else:
to_delete.append(user['name'])
print(f"Deleting {user['name']}")
print("")

for i, username in enumerate(to_delete):
print(f'{i+1} of {len(to_delete)}: deleting {username}')
if not args.dry_run:
await hub.delete_user(username)
else:
print('Skipped due to dry run.')

if __name__ == '__main__':
asyncio.run(main())
if args.verbose:
logger.setLevel(logging.INFO)
elif args.debug:
logger.setLevel(logging.DEBUG)

main(args)