Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DH-145] fix pagination, basically a complete rewrite #5093

Merged
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 119 additions & 41 deletions scripts/delete-unused-users.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,60 +10,138 @@
make sure they aren't active right now. This will require users to
log in again the next time they use the hub, but that's probably
ok.

Core functionality from @minrk:
https://discourse.jupyter.org/t/is-there-a-way-to-bulk-delete-old-users/20866/3
"""
import argparse
from jhub_client.api import JupyterHubAPI
from dateutil.parser import parse
import asyncio
from datetime import timedelta, datetime
from dateutil.parser import parse
from jhub_client.api import JupyterHubAPI
import logging
import os
import requests
import sys

logging.basicConfig(stream=sys.stdout, level=logging.WARNING)
logger = logging.getLogger(__name__)

token = os.environ["JUPYTERHUB_API_TOKEN"]
headers = {
"Accept": "application/jupyterhub-pagination+json",
"Authorization": f"Bearer {token}",
}

def retrieve_users(hub_url):
"""Returns generator of user models that should be deleted"""
url = hub_url.rstrip("/") + "/hub/api/users"
next_page = True
params = {}

while next_page:
r = requests.get(url, headers=headers, params=params)
r.raise_for_status()
resp = r.json()
user_list = resp["items"]
for user in user_list:
# only yield users that should be deleted
if should_delete(user):
yield user

pagination = resp["_pagination"]
next_page = pagination["next"]
if next_page:
params = {
"offset": next_page["offset"],
"limit": next_page["limit"],
}

def should_delete(user):
"""
Returns a boolean if user is to be deleted. The critera are:
- was the user active in the past 24 hours?
- is there a current user server running?
"""
last_activity_str = user.get('last_activity', False)
if last_activity_str:
try:
last_activity = parse(user['last_activity'])
except:
logger.error(f"Unexpected value for user['last_activity']: {user['last_activity']}")
raise
if isinstance(last_activity, datetime):
was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24)
else:
logger.error(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.")
raise

logger.debug(f"User: {user['name']}")
logger.debug(f"Last login: {last_activity}")
logger.debug(f"24hrs since last login: {was_active_last_day}")
logger.debug(f"Running server: {user['server']}")
if was_active_last_day or user['server'] is not None:
logger.info(f"Not deleting {user['name']}")
return False
else:
logger.info(f"Flagged {user['name']} for deletion.")
return True

async def main():
def delete_user(hub_url, name):
"""Delete a given user by name via JupyterHub API"""
r = requests.delete(
hub_url.rstrip("/") + f"/hub/api/users/{name}",
headers=headers,
)
r.raise_for_status()

def main(args):
"""
Get users from a hub, check to see if they should be deleted from the ORM
and if so, delete them!
"""
count = 1
for user in list(retrieve_users(args.hub_url)):
print(f"{count}: deleting {user['name']}")
if not args.dry_run:
delete_user(args.hub_url, user['name'])
count += 1
else:
logger.warning(f"Skipped {user['name']} due to dry run.")
# await delete_user(hub, user, count)

count -= 1
print(f"Deleted {count} total users.")

if __name__ == "__main__":
argparser = argparse.ArgumentParser()
argparser.add_argument(
'hub_url',
help='Fully qualified URL to the JupyterHub'
'--hub_url',
help='Fully qualified URL to the JupyterHub',
required=True
)
argparser.add_argument(
'--dry_run',
action='store_true',
help='Dry run without deleting users'
)
argparser.add_argument(
'-v',
dest='verbose',
action='store_true',
help='Set info log level'
)
argparser.add_argument(
'-d',
dest='debug',
action='store_true',
help='Set debug log level'
)
args = argparser.parse_args()

to_delete = []
async with JupyterHubAPI(hub_url=args.hub_url) as hub:
users = await hub.list_users()
for user in users:
last_activity_str = user.get('last_activity', False)
if last_activity_str:
try:
last_activity = parse(user['last_activity'])
except:
print(user['last_activity'])
raise
if isinstance(last_activity, datetime):
was_active_last_day = datetime.now().astimezone() - last_activity < timedelta(hours=24)
else:
print(f"For user {user['name']}, expected datetime.datetime class for last_activity but got {type(last_activity)} instead.")
raise

print(f"User: {user['name']}")
print(f"Last login: {last_activity}")
print(f"24hrs since last login: {was_active_last_day}")
print(f"Running server: {user['server']}")
if was_active_last_day or user['server'] is not None:
print(f"Not deleting {user['name']}")
else:
to_delete.append(user['name'])
print(f"Deleting {user['name']}")
print("")

for i, username in enumerate(to_delete):
print(f'{i+1} of {len(to_delete)}: deleting {username}')
if not args.dry_run:
await hub.delete_user(username)
else:
print('Skipped due to dry run.')
if args.verbose:
logger.setLevel(logging.INFO)
elif args.debug:
logger.setLevel(logging.DEBUG)

if __name__ == '__main__':
asyncio.run(main())
main(args)