From 0c5fbb74188be7a79f1f0664e04b57f0a6e13b3a Mon Sep 17 00:00:00 2001 From: Tony Bartsits Date: Wed, 30 Oct 2024 15:50:37 +0000 Subject: [PATCH] agent options check --- Dockerfile | 3 +-- agent/main.py | 22 ++++++++++++++++------ agent/worker/agent_utils.py | 20 +++++++++++++++++--- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index d8cc26e..551dd4c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,5 +46,4 @@ COPY agent /workdir/agent #ENV PYTHONPATH /workdir:/workdir/src:/workdir/supervisely_lib/worker_proto:$PYTHONPATH WORKDIR /workdir/agent -ENTRYPOINT ["sh", "-c", "python -u /workdir/agent/main.py"] - +ENTRYPOINT ["python", "-u", "/workdir/agent/main.py"] diff --git a/agent/main.py b/agent/main.py index 3eedb52..104f490 100644 --- a/agent/main.py +++ b/agent/main.py @@ -213,12 +213,22 @@ def init_envs(): try: agent_utils.check_instance_version() new_envs, new_volumes, ca_cert = agent_utils.updated_agent_options() - except agent_utils.AgentOptionsNotAvailable: - sly.logger.debug("Can not update agent options", exc_info=True) - sly.logger.warning( - "Can not update agent options. Agent will be started with current options" - ) - return + except Exception as e: + if not agent_utils.is_agent_container_ready_to_continue(): + sly.logger.error( + "Agent options are not available. Agent will be stopped. Please, check the connection to the server" + ) + raise + + if isinstance(e, agent_utils.AgentOptionsNotAvailable): + sly.logger.debug("Can not update agent options", exc_info=True) + sly.logger.warning( + "Can not update agent options. Agent will be started with current options" + ) + return + + raise + if new_envs.get(constants._FORCE_CPU_ONLY, "false") == "true": runtime = "runc" runtime_changed = _is_runtime_changed(runtime) diff --git a/agent/worker/agent_utils.py b/agent/worker/agent_utils.py index ae8b30b..32565ce 100644 --- a/agent/worker/agent_utils.py +++ b/agent/worker/agent_utils.py @@ -623,7 +623,7 @@ def get_agent_options(server_address=None, token=None, timeout=60) -> dict: method, data={"token": token}, ) - if resp.status_code != requests.codes.ok: # pylint: disable=no-member + if resp is None or resp.status_code != requests.codes.ok: # pylint: disable=no-member try: text = resp.text except: @@ -641,8 +641,8 @@ def get_instance_version(server_address=None, timeout=60): api = sly.Api(server_address=server_address) resp = api.get("instance.version", {}) - if resp.status_code != requests.codes.ok: # pylint: disable=no-member - if resp.status_code in (400, 401, 403, 404): + if resp is None or resp.status_code != requests.codes.ok: # pylint: disable=no-member + if resp is not None and resp.status_code in (400, 401, 403, 404): return None try: text = resp.text @@ -934,6 +934,20 @@ def _ca_cert_changed(ca_cert) -> str: return cert_path +def is_agent_container_ready_to_continue(): + container_info = get_container_info() + volumes = binds_to_volumes_dict(container_info.get("HostConfig", {}).get("Binds", [])) + + # should contain at least 3 volumes: + # docker socket + # agent data files + # apps data files + if len(volumes) < 3: + return False + + return True + + def get_options_changes(envs: dict, volumes: dict, ca_cert: str) -> Tuple[dict, dict, str]: return _envs_changes(envs), _volumes_changes(volumes), _ca_cert_changed(ca_cert)