diff --git a/lookup-service/service/caches/portals.py b/lookup-service/service/caches/portals.py index 6ee0f157..3f835c8f 100644 --- a/lookup-service/service/caches/portals.py +++ b/lookup-service/service/caches/portals.py @@ -2,9 +2,9 @@ import logging from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union +from typing import TYPE_CHECKING, Dict, List, Tuple, Union -from aiohttp import BasicAuth, ClientSession +from aiohttp import BasicAuth, ClientSession, ClientConnectorError from .clusters import ClusterConfig @@ -179,31 +179,43 @@ def connected(self): async def login(self) -> bool: """Login to the portal service .""" - async with self.session.post( - f"{self.portal.url}/oauth2/token/", - data={ - "grant_type": "password", - "username": self.portal.credentials.username, - "password": self.portal.credentials.password, - }, - auth=BasicAuth( - self.portal.credentials.client_id, self.portal.credentials.client_secret - ), - ) as response: - if response.status != 200: - logger.error( - "Failed to login to portal %s of cluster %s.", - self.portal.name, - self.portal.cluster.name, - ) + try: + async with self.session.post( + f"{self.portal.url}/oauth2/token/", + data={ + "grant_type": "password", + "username": self.portal.credentials.username, + "password": self.portal.credentials.password, + }, + auth=BasicAuth( + self.portal.credentials.client_id, + self.portal.credentials.client_secret, + ), + ) as response: + if response.status != 200: + logger.error( + "Failed to login to portal %s of cluster %s.", + self.portal.name, + self.portal.cluster.name, + ) + + return False + + data = await response.json() + + self.access_token = data.get("access_token") - return False - - data = await response.json() + return True - self.access_token = data.get("access_token") + except ClientConnectorError as exc: + logger.error( + "Failed to connect to portal %s of cluster %s when attempting to login: %s", + self.portal.name, + self.portal.cluster.name, + exc, + ) - return True + return False async def logout(self) -> None: """Logout from the portal service.""" @@ -211,44 +223,29 @@ async def logout(self) -> None: if not self.connected: return - async with self.session.post( - f"{self.portal.url}/oauth2/revoke-token/", - data={ - "client_id": self.portal.credentials.client_id, - "client_secret": self.portal.credentials.client_secret, - "token": self.access_token, - }, - ) as response: - if response.status != 200: - logger.error( - "Failed to logout from portal %s of cluster %s.", - self.portal.name, - self.portal.cluster.name, - ) - - async def user_sessions(self, user_id: str) -> List[Dict[str, Any]]: - """Fetches the list of active sessions for a user.""" - - if not self.connected: - return {} - - headers = {"Authorization": f"Bearer {self.access_token}"} - - async with self.session.get( - f"{self.portal.url}/workshops/user/{user_id}/sessions/", - headers=headers, - ) as response: - if response.status != 200: - logger.error( - "Failed to get sessions from portal %s of cluster %s for user %s.", - self.portal.name, - self.portal.cluster.name, - user_id, - ) - - return {} - - return await response.json() + try: + async with self.session.post( + f"{self.portal.url}/oauth2/revoke-token/", + data={ + "client_id": self.portal.credentials.client_id, + "client_secret": self.portal.credentials.client_secret, + "token": self.access_token, + }, + ) as response: + if response.status != 200: + logger.error( + "Failed to logout from portal %s of cluster %s.", + self.portal.name, + self.portal.cluster.name, + ) + + except ClientConnectorError as exc: + logger.error( + "Failed to connect to portal %s of cluster %s when attempting to logout: %s", + self.portal.name, + self.portal.cluster.name, + exc, + ) async def reacquire_workshop_session( self, user_id: str, environment_name: str, session_name: str, index_url: str @@ -263,39 +260,50 @@ async def reacquire_workshop_session( headers = {"Authorization": f"Bearer {self.access_token}"} - async with self.session.get( - f"{self.portal.url}/workshops/environment/{environment_name}/request/", - headers=headers, - params={ - "index_url": index_url, - "user": user_id, - "session": session_name, - }, - ) as response: - if response.status != 200: - logger.error( - "Failed to reacquire session %s from portal %s of cluster %s for user %s.", - session_name, - self.portal.name, - self.portal.cluster.name, - user_id, - ) - - return - - data = await response.json() - - url = data.get("url") - - if url: - return { - "clusterName": self.portal.cluster.name, - "portalName": self.portal.name, - "environmentName": environment_name, - "sessionName": session_name, - "clientUserId": user_id, - "sessionActionvationUrl": f"{self.portal.url}{url}", - } + try: + async with self.session.get( + f"{self.portal.url}/workshops/environment/{environment_name}/request/", + headers=headers, + params={ + "index_url": index_url, + "user": user_id, + "session": session_name, + }, + ) as response: + if response.status != 200: + logger.error( + "Failed to reacquire session %s from portal %s of cluster %s for user %s.", + session_name, + self.portal.name, + self.portal.cluster.name, + user_id, + ) + + return + + data = await response.json() + + url = data.get("url") + + if url: + return { + "clusterName": self.portal.cluster.name, + "portalName": self.portal.name, + "environmentName": environment_name, + "sessionName": session_name, + "clientUserId": user_id, + "sessionActionvationUrl": f"{self.portal.url}{url}", + } + + except ClientConnectorError as exc: + logger.error( + "Failed to connect to portal %s of cluster %s when attempting to reacquire session %s for user %s: %s", # pylint: disable=line-too-long + self.portal.name, + self.portal.cluster.name, + session_name, + user_id, + exc, + ) async def request_workshop_session( self, @@ -311,36 +319,46 @@ async def request_workshop_session( headers = {"Authorization": f"Bearer {self.access_token}"} - async with self.session.get( - f"{self.portal.url}/workshops/environment/{environment_name}/request/", - headers=headers, - params={ - "user": user_id, - "parameters": parameters, - "index_url": index_url, - }, - ) as response: - if response.status != 200: - logger.error( - "Failed to request session from portal %s of cluster %s for user %s.", - self.portal.name, - self.portal.cluster.name, - user_id, - ) - - return - - data = await response.json() - - url = data.get("url") - session_name = data.get("name") - - if url: - return { - "clusterName": self.portal.cluster.name, - "portalName": self.portal.name, - "environmentName": environment_name, - "sessionName": session_name, - "clientUserId": user_id, - "sessionActionvationUrl": f"{self.portal.url}{url}", - } + try: + async with self.session.get( + f"{self.portal.url}/workshops/environment/{environment_name}/request/", + headers=headers, + params={ + "user": user_id, + "parameters": parameters, + "index_url": index_url, + }, + ) as response: + if response.status != 200: + logger.error( + "Failed to request session from portal %s of cluster %s for user %s.", + self.portal.name, + self.portal.cluster.name, + user_id, + ) + + return + + data = await response.json() + + url = data.get("url") + session_name = data.get("name") + + if url: + return { + "clusterName": self.portal.cluster.name, + "portalName": self.portal.name, + "environmentName": environment_name, + "sessionName": session_name, + "clientUserId": user_id, + "sessionActionvationUrl": f"{self.portal.url}{url}", + } + + except ClientConnectorError as exc: + logger.error( + "Failed to connect to portal %s of cluster %s when attempting to request session for user %s: %s", # pylint: disable=line-too-long + self.portal.name, + self.portal.cluster.name, + user_id, + exc, + ) diff --git a/lookup-service/service/routes/workshops.py b/lookup-service/service/routes/workshops.py index 4a5071f6..eacd303f 100644 --- a/lookup-service/service/routes/workshops.py +++ b/lookup-service/service/routes/workshops.py @@ -1,9 +1,11 @@ """REST API handlers for workshop requests.""" import logging +from typing import List from aiohttp import web +from ..caches.environments import WorkshopEnvironment from .authnz import login_required, roles_accepted logger = logging.getLogger("educates") @@ -28,7 +30,9 @@ async def api_get_v1_workshops(request: web.Request) -> web.Response: if "tenant" in client_roles: if not tenant_name: - logger.warning("Missing tenant name in request from client %r.", client_name) + logger.warning( + "Missing tenant name in request from client %r.", client_name + ) return web.Response(text="Missing tenant name", status=400) @@ -202,12 +206,14 @@ async def api_post_v1_workshops(request: web.Request) -> web.Response: return web.Response(text="Workshop not available", status=503) - # Choose the best workshop environment to allocate a session from based on - # available capacity of the workshop environment and the portal hosting it. + # Sort the workshop environments so that those deemed to be the best + # candidates for running a workshop session are at the front of the list. + + environments = sort_workshop_environments(environments) - environment = choose_best_workshop_environment(environments) + # Loop over the workshop environments and try to allocate a session. - if environment: + for environment in environments: data = await environment.request_workshop_session( user_id, parameters, index_url ) @@ -217,113 +223,124 @@ async def api_post_v1_workshops(request: web.Request) -> web.Response: return web.json_response(data) # If we get here, then we don't believe there is any available capacity for - # creating a workshop session. Even so, attempt to create a session against - # any workshop environment, just make sure that we don't try and use the - # same workshop environment we just tried to get a session from. + # creating a workshop session. - if environment: - environments.remove(environment) + logger.warning( + "Workshop %r requested by client %r not available", workshop_name, client_name + ) - if not environments: - logger.warning( - "Workshop %r requested by client %r not available", - workshop_name, - client_name, - ) + return web.Response(text="Workshop not available", status=503) - return web.Response(text="Workshop not available", status=503) - environment = environments[0] +def sort_workshop_environments( + environments: List[WorkshopEnvironment], +) -> List[WorkshopEnvironment]: + """Sort the list of workshop environments such that those deemed to be the + best candidates for running a workshop session are at the front of the + list.""" - data = await environment.request_workshop_session(user_id, parameters, index_url) + def score_based_on_portal_availability(environment: WorkshopEnvironment) -> int: + """Return a score based on the remaining capacity of the portal hosting + the workshop environment. Note that at this point we only return 0 or 1 + indicating whether there is any capacity left or not and not how much + capacity.""" - if data: - data["tenantName"] = tenant_name - return web.json_response(data) + # If the portal doesn't have a maximum capacity specified and as such + # there is no limit to the number of workshop sessions return 1. - # If we get here, then we don't believe there is any available capacity for - # creating a workshop session. + if not environment.portal.capacity: + return 1 - logger.warning( - "Workshop %r requested by client %r not available", workshop_name, client_name - ) + # If the portal has a maximum capacity specified and there is no more + # capacity left, return 0. - return web.Response(text="Workshop not available", status=503) + if environment.portal.capacity - environment.portal.allocated <= 0: + return 0 + # Otherwise return 1 indicating there is capacity. -def choose_best_workshop_environment(environments): - """Choose the best workshop environment to allocate a session from.""" + return 1 - if len(environments) == 1: - return environments[0] + def score_based_on_environment_availability( + environment: WorkshopEnvironment, + ) -> int: + """Return a score based on the remaining capacity of the workshop + environment. Note that at this point we only return 0 or 1 indicating + whether there is any capacity left or not and not how much capacity.""" - # First discard any workshop environment which have no more space available. + # If the environment doesn't have a maximum capacity specified and as + # such there is no limit to the number of workshop sessions return 1. - environments = [ - environment - for environment in environments - if environment.capacity and (environment.capacity - environment.allocated > 0) - ] + if not environment.capacity: + return 1 - # Also discard any workshop environments where the portal as a whole has - # no more capacity. + # If the environment has a maximum capacity specified and there is no + # more capacity left, return 0. - environments = [ - environment - for environment in environments - if environment.portal.capacity - and (environment.portal.capacity - environment.portal.allocated > 0) - ] + if environment.capacity - environment.allocated <= 0: + return 0 - # If there is only one workshop environment left, return it. + # Otherwise return 1 indicating there is capacity. - if len(environments) == 1: - return environments[0] + return 1 - # If there are no workshop environments left, return None. + def score_based_on_reserved_sessions(environment: WorkshopEnvironment) -> int: + """Return a score based on the number of reserved sessions currently + available for the workshop environment. Where as we didn't before, we + also take into account the actual available capacity of the portal + hosting the workshop environment.""" - if len(environments) == 0: - return None + # If the portal doesn't have a maximum capacity specified we treat it + # as if there is only 1 spot left so that we give priority to portals + # that do specify an actual capacity. - # If there are multiple workshop environments left, starting with the portal - # with the most capacity remaining, look at number of reserved sessions - # available for a workshop environment and if any, allocate it from the - # workshop environment with the most. In other words, sort based on the - # number of reserved sessions and if the first in the resulting list has - # reserved sessions, use that. + capacity = 1 - def score_based_on_reserved_sessions(environment): - return ( - environment.portal.capacity - and (environment.portal.capacity - environment.portal.allocated) - or 1, - environment.available, - ) + if environment.portal.capacity: + capacity = environment.portal.capacity - environment.portal.allocated - environments.sort(key=score_based_on_reserved_sessions, reverse=True) - - if environments[0].available > 0: - return environments[0] - - # If there are no reserved sessions available, starting with the portal - # with the most capacity remaining, look at the available capacity within - # the workshop environment. In other words, sort based on the number of free - # spots in the workshop environment and if the first in the resulting list - # has free spots, use that. - - def score_based_on_available_capacity(environment): - return ( - environment.portal.capacity - and (environment.portal.capacity - environment.portal.allocated) - or 1, - environment.capacity - and (environment.capacity - environment.allocated) - or 1, - ) + # Return the capacity of the portal in conjunction with the number of + # reserved sessions which are currently available. + + return (capacity, environment.available) + + def score_based_on_available_capacity(environment: WorkshopEnvironment) -> int: + """Return a score based on the available capacity of the workshop + environment. Where as we didn't before, we also take into account the + actual available capacity of the portal hosting the workshop + environment.""" - environments.sort(key=score_based_on_available_capacity, reverse=True) + # If the portal doesn't have a maximum capacity specified we treat it + # as if there is only 1 spot left so that we give priority to portals + # that do specify an actual capacity. - return environments[0] + capacity = 1 + + if environment.portal.capacity: + capacity = environment.portal.capacity - environment.portal.allocated + + # If the environment doesn't have a maximum capacity specified we treat + # it as if there is only 1 spot left so that we give priority to + # environments that do specify an actual capacity. + + if not environment.capacity: + return (capacity, 1) + + # Return the capacity of the portal in conjunction with the available + # capacity of the workshop environment. + + return (capacity, environment.capacity - environment.allocated) + + return sorted( + environments, + key=lambda environment: ( + score_based_on_portal_availability(environment), + score_based_on_environment_availability(environment), + score_based_on_reserved_sessions(environment), + score_based_on_available_capacity(environment), + ), + reverse=True, + ) # Set up the routes for the workshop management API.