From ed4f8760b390455d49c0a1a3b3f36036abb15720 Mon Sep 17 00:00:00 2001
From: Nikolai Petukhov <pnikolay01@icloud.com>
Date: Wed, 30 Oct 2024 12:54:50 -0300
Subject: [PATCH] Major fixes and improvements (#90)

* docker login rework to store auth in task memory

* add retries on docker image pull

* add https redirect to requests

* docker pull rework

* change requests to sly Api

* bugfix

* add timeout for tasks

* pull + timeout

* bugfixes + timeout error

* agent options check

---------

Co-authored-by: Tony Bartsits <tony@supervisely.com>
---
 Dockerfile                             |   5 +-
 agent/main.py                          |  22 +-
 agent/worker/agent.py                  |  35 +---
 agent/worker/agent_utils.py            |  51 ++---
 agent/worker/docker_utils.py           | 269 ++++++++++++++++---------
 agent/worker/task_app.py               | 145 ++++++++-----
 agent/worker/task_pull_docker_image.py |  20 +-
 requirements.txt                       |   2 +-
 8 files changed, 326 insertions(+), 223 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 79fad83..551dd4c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -41,10 +41,9 @@ RUN apt-get update \
 COPY requirements.txt /workdir/requirements.txt
 RUN pip install --no-cache-dir -r /workdir/requirements.txt
 
-COPY . /workdir
+COPY agent /workdir/agent
 
 #ENV PYTHONPATH /workdir:/workdir/src:/workdir/supervisely_lib/worker_proto:$PYTHONPATH
 WORKDIR /workdir/agent
 
-ENTRYPOINT ["sh", "-c", "python -u /workdir/agent/main.py"]
-
+ENTRYPOINT ["python", "-u", "/workdir/agent/main.py"]
diff --git a/agent/main.py b/agent/main.py
index 3eedb52..104f490 100644
--- a/agent/main.py
+++ b/agent/main.py
@@ -213,12 +213,22 @@ def init_envs():
     try:
         agent_utils.check_instance_version()
         new_envs, new_volumes, ca_cert = agent_utils.updated_agent_options()
-    except agent_utils.AgentOptionsNotAvailable:
-        sly.logger.debug("Can not update agent options", exc_info=True)
-        sly.logger.warning(
-            "Can not update agent options. Agent will be started with current options"
-        )
-        return
+    except Exception as e:
+        if not agent_utils.is_agent_container_ready_to_continue():
+            sly.logger.error(
+                "Agent options are not available. Agent will be stopped. Please, check the connection to the server"
+            )
+            raise
+
+        if isinstance(e, agent_utils.AgentOptionsNotAvailable):
+            sly.logger.debug("Can not update agent options", exc_info=True)
+            sly.logger.warning(
+                "Can not update agent options. Agent will be started with current options"
+            )
+            return
+
+        raise
+
     if new_envs.get(constants._FORCE_CPU_ONLY, "false") == "true":
         runtime = "runc"
         runtime_changed = _is_runtime_changed(runtime)
diff --git a/agent/worker/agent.py b/agent/worker/agent.py
index adcfaff..f1e780c 100644
--- a/agent/worker/agent.py
+++ b/agent/worker/agent.py
@@ -84,7 +84,6 @@ def __init__(self):
         self.docker_api = docker.from_env(
             version="auto", timeout=constants.DOCKER_API_CALL_TIMEOUT()
         )
-        self._docker_login()
 
         self.logger.info("Agent is ready to get tasks.")
         self.api = sly.AgentAPI(
@@ -451,9 +450,6 @@ def _stop_missed_containers(self, ecosystem_token):
                 },
             )
 
-    def _docker_login(self):
-        agent_utils.docker_login(self.docker_api, self.logger)
-
     def submit_log(self):
         while True:
             log_lines = self.log_queue.get_log_batch_nowait()
@@ -657,30 +653,13 @@ def update_base_layers(self):
                     )
                     image = f"{constants.SLY_APPS_DOCKER_REGISTRY()}/{image}"
 
-                try:
-                    docker_utils.docker_pull_if_needed(
-                        self.docker_api,
-                        image,
-                        policy=docker_utils.PullPolicy.ALWAYS,
-                        logger=self.logger,
-                        progress=False,
-                    )
-                except DockerException as e:
-                    if "no basic auth credentials" in str(e).lower():
-                        self.logger.warn(
-                            f"Failed to pull docker image '{image}'. Will try to login and pull again",
-                            exc_info=True,
-                        )
-                        self._docker_login()
-                        docker_utils.docker_pull_if_needed(
-                            self.docker_api,
-                            image,
-                            policy=docker_utils.PullPolicy.ALWAYS,
-                            logger=self.logger,
-                            progress=False,
-                        )
-                    else:
-                        raise e
+                docker_utils.docker_pull_if_needed(
+                    self.docker_api,
+                    image,
+                    policy=docker_utils.PullPolicy.ALWAYS,
+                    logger=self.logger,
+                    progress=False,
+                )
 
                 self.logger.info(f"Docker image '{image}' has been pulled successfully")
                 pulled.append(image)
diff --git a/agent/worker/agent_utils.py b/agent/worker/agent_utils.py
index e01dd16..32565ce 100644
--- a/agent/worker/agent_utils.py
+++ b/agent/worker/agent_utils.py
@@ -616,9 +616,14 @@ def get_agent_options(server_address=None, token=None, timeout=60) -> dict:
     if token is None:
         token = constants.TOKEN()
 
-    url = constants.PUBLIC_API_SERVER_ADDRESS() + "agents.options.info"
-    resp = requests.post(url=url, json={"token": token}, timeout=timeout)
-    if resp.status_code != requests.codes.ok:  # pylint: disable=no-member
+    api = sly.Api(server_address=server_address)
+    method = "agents.options.info"
+
+    resp = api.post(
+        method,
+        data={"token": token},
+    )
+    if resp is None or resp.status_code != requests.codes.ok:  # pylint: disable=no-member
         try:
             text = resp.text
         except:
@@ -633,10 +638,11 @@ def get_agent_options(server_address=None, token=None, timeout=60) -> dict:
 def get_instance_version(server_address=None, timeout=60):
     if server_address is None:
         server_address = constants.SERVER_ADDRESS()
-    url = constants.PUBLIC_API_SERVER_ADDRESS() + "instance.version"
-    resp = requests.get(url=url, timeout=timeout)
-    if resp.status_code != requests.codes.ok:  # pylint: disable=no-member
-        if resp.status_code in (400, 401, 403, 404):
+
+    api = sly.Api(server_address=server_address)
+    resp = api.get("instance.version", {})
+    if resp is None or resp.status_code != requests.codes.ok:  # pylint: disable=no-member
+        if resp is not None and resp.status_code in (400, 401, 403, 404):
             return None
         try:
             text = resp.text
@@ -928,6 +934,20 @@ def _ca_cert_changed(ca_cert) -> str:
     return cert_path
 
 
+def is_agent_container_ready_to_continue():
+    container_info = get_container_info()
+    volumes = binds_to_volumes_dict(container_info.get("HostConfig", {}).get("Binds", []))
+
+    # should contain at least 3 volumes:
+    # docker socket
+    # agent data files
+    # apps data files
+    if len(volumes) < 3:
+        return False
+
+    return True
+
+
 def get_options_changes(envs: dict, volumes: dict, ca_cert: str) -> Tuple[dict, dict, str]:
     return _envs_changes(envs), _volumes_changes(volumes), _ca_cert_changed(ca_cert)
 
@@ -1080,20 +1100,3 @@ def maybe_update_runtime():
 def convert_millicores_to_cpu_quota(millicores, cpu_period=100000):
     cpu_quota = (millicores / 1000) * cpu_period
     return int(cpu_quota)
-
-
-def docker_login(docker_api, logger):
-    doc_logs = constants.DOCKER_LOGIN().split(",")
-    doc_pasws = constants.DOCKER_PASSWORD().split(",")
-    doc_regs = constants.DOCKER_REGISTRY().split(",")
-
-    for login, password, registry in zip(doc_logs, doc_pasws, doc_regs):
-        if registry:
-            try:
-                doc_login = docker_api.login(username=login, password=password, registry=registry)
-                logger.info(
-                    "DOCKER_CLIENT_LOGIN_SUCCESS", extra={**doc_login, "registry": registry}
-                )
-            except Exception as e:
-                if not constants.OFFLINE_MODE():
-                    raise e
diff --git a/agent/worker/docker_utils.py b/agent/worker/docker_utils.py
index a3c34eb..7d14a13 100644
--- a/agent/worker/docker_utils.py
+++ b/agent/worker/docker_utils.py
@@ -3,11 +3,18 @@
 
 import json
 from enum import Enum
-from typing import Optional
+import time
+from typing import Dict, Optional
 
 from supervisely.app import DialogWindowError
 from supervisely.task.progress import Progress
 
+from worker import constants
+
+
+PULL_RETRIES = 5
+PULL_RETRY_DELAY = 5
+
 
 class PullPolicy(Enum):
     def __str__(self):
@@ -42,6 +49,21 @@ def from_str(cls, status: Optional[str]) -> PullStatus:
         return dct.get(status, PullStatus.OTHER)
 
 
+def _auths_from_env() -> Dict:
+    doc_logs = constants.DOCKER_LOGIN().split(",")
+    doc_pasws = constants.DOCKER_PASSWORD().split(",")
+    doc_regs = constants.DOCKER_REGISTRY().split(",")
+    auths = {}
+    for login, pasw, reg in zip(doc_logs, doc_pasws, doc_regs):
+        auths.update({reg: {"username": login, "password": pasw}})
+    return auths
+
+
+def _registry_auth_from_env(registry: str) -> Dict:
+    auths = _auths_from_env()
+    return auths.get(registry, None)
+
+
 def docker_pull_if_needed(docker_api, docker_image_name, policy, logger, progress=True):
     logger.info(
         "docker_pull_if_needed args",
@@ -69,9 +91,19 @@ def docker_pull_if_needed(docker_api, docker_image_name, policy, logger, progres
                 _docker_pull_progress(docker_api, docker_image_name, logger)
     elif str(policy) == str(PullPolicy.IF_AVAILABLE):
         if progress is False:
-            _docker_pull(docker_api, docker_image_name, logger, raise_exception=True)
+            _docker_pull(
+                docker_api,
+                docker_image_name,
+                logger,
+                raise_exception=True,
+            )
         else:
-            _docker_pull_progress(docker_api, docker_image_name, logger, raise_exception=True)
+            _docker_pull_progress(
+                docker_api,
+                docker_image_name,
+                logger,
+                raise_exception=True,
+            )
     else:
         raise RuntimeError(f"Unknown pull policy {str(policy)}")
     if not _docker_image_exists(docker_api, docker_image_name):
@@ -84,107 +116,162 @@ def docker_pull_if_needed(docker_api, docker_image_name, policy, logger, progres
         )
 
 
+def resolve_registry(docker_image_name):
+    from docker.utils import parse_repository_tag
+    from docker.auth import resolve_repository_name
+
+    try:
+        repository, _ = parse_repository_tag(docker_image_name)
+        registry, _ = resolve_repository_name(repository)
+        return registry
+    except Exception:
+        return None
+
+
 def _docker_pull(docker_api, docker_image_name, logger, raise_exception=True):
     from docker.errors import DockerException
 
     logger.info("Docker image will be pulled", extra={"image_name": docker_image_name})
-    progress_dummy = Progress("Pulling image...", 1, ext_logger=logger)
-    progress_dummy.iter_done_report()
-    try:
-        pulled_img = docker_api.images.pull(docker_image_name)
-        logger.info(
-            "Docker image has been pulled",
-            extra={"pulled": {"tags": pulled_img.tags, "id": pulled_img.id}},
+    registry = resolve_registry(docker_image_name)
+    auth = _registry_auth_from_env(registry)
+    for i in range(0, PULL_RETRIES + 1):
+        progress_dummy = Progress(
+            "Pulling image..." + f" (retry {i}/{PULL_RETRIES})" if i > 0 else "",
+            1,
+            ext_logger=logger,
         )
-    except DockerException as e:
-        if raise_exception is True:
-            raise e
-            # raise DockerException(
-            #     "Unable to pull image: see actual error above. "
-            #     "Please, run the task again or contact support team."
-            # )
-        else:
-            logger.warn("Pulling step is skipped. Unable to pull image: {!r}.".format(str(e)))
+        progress_dummy.iter_done_report()
+        try:
+
+            pulled_img = docker_api.images.pull(docker_image_name, auth_config=auth)
+            logger.info(
+                "Docker image has been pulled",
+                extra={"pulled": {"tags": pulled_img.tags, "id": pulled_img.id}},
+            )
+            return
+        except DockerException as e:
+            if i >= PULL_RETRIES:
+                if raise_exception is True:
+                    raise e
+                    # raise DockerException(
+                    #     "Unable to pull image: see actual error above. "
+                    #     "Please, run the task again or contact support team."
+                    # )
+                else:
+                    logger.warn(
+                        "Pulling step is skipped. Unable to pull image: {!r}.".format(str(e))
+                    )
+                    return
+            logger.warning("Unable to pull image: %s", str(e))
+            logger.info("Retrying in %d seconds...", PULL_RETRY_DELAY)
+            time.sleep(PULL_RETRY_DELAY)
 
 
 def _docker_pull_progress(docker_api, docker_image_name, logger, raise_exception=True):
     logger.info("Docker image will be pulled", extra={"image_name": docker_image_name})
     from docker.errors import DockerException
 
-    try:
-        layers_total_load = {}
-        layers_current_load = {}
-        layers_total_extract = {}
-        layers_current_extract = {}
-        started = set()
-        loaded = set()
-        pulled = set()
-
-        progress_full = Progress("Preparing dockerimage", 1, ext_logger=logger)
-        progres_ext = Progress("Extracting layers", 1, is_size=True, ext_logger=logger)
-        progress_load = Progress("Downloading layers", 1, is_size=True, ext_logger=logger)
-
-        for line in docker_api.api.pull(docker_image_name, stream=True, decode=True):
-            status = PullStatus.from_str(line.get("status", None))
-            layer_id = line.get("id", None)
-            progress_details = line.get("progressDetail", {})
-            need_report = True
-
-            if status is PullStatus.START:
-                started.add(layer_id)
-                need_report = False
-            elif status is PullStatus.DOWNLOAD:
-                layers_current_load[layer_id] = progress_details.get("current", 0)
-                layers_total_load[layer_id] = progress_details.get(
-                    "total", layers_current_load[layer_id]
-                )
-                total_load = sum(layers_total_load.values())
-                current_load = sum(layers_current_load.values())
-                if total_load > progress_load.total:
-                    progress_load.set(current_load, total_load)
-                elif (current_load - progress_load.current) / total_load > 0.01:
-                    progress_load.set(current_load, total_load)
-                else:
-                    need_report = False
-            elif status is PullStatus.COMPLETE_LOAD:
-                loaded.add(layer_id)
-            elif status is PullStatus.EXTRACT:
-                layers_current_extract[layer_id] = progress_details.get("current", 0)
-                layers_total_extract[layer_id] = progress_details.get(
-                    "total", layers_current_extract[layer_id]
-                )
-                total_ext = sum(layers_total_extract.values())
-                current_ext = sum(layers_current_extract.values())
-                if total_ext > progres_ext.total:
-                    progres_ext.set(current_ext, total_ext)
-                elif (current_ext - progres_ext.current) / total_ext > 0.01:
-                    progres_ext.set(current_ext, total_ext)
-                else:
-                    need_report = False
-            elif status is PullStatus.COMPLETE_PULL:
-                pulled.add(layer_id)
+    registry = resolve_registry(docker_image_name)
+    auth = _registry_auth_from_env(registry)
+    for i in range(0, PULL_RETRIES + 1):
+        try:
+            layers_total_load = {}
+            layers_current_load = {}
+            layers_total_extract = {}
+            layers_current_extract = {}
+            started = set()
+            loaded = set()
+            pulled = set()
+
+            progress_full = Progress(
+                "Preparing dockerimage" + f" (retry {i}/{PULL_RETRIES})" if i > 0 else "",
+                1,
+                ext_logger=logger,
+            )
+            progres_ext = Progress(
+                "Extracting layers" + f" (retry {i}/{PULL_RETRIES})" if i > 0 else "",
+                1,
+                is_size=True,
+                ext_logger=logger,
+            )
+            progress_load = Progress(
+                "Downloading layers" + f" (retry {i}/{PULL_RETRIES})" if i > 0 else "",
+                1,
+                is_size=True,
+                ext_logger=logger,
+            )
 
-            if started != pulled:
-                if need_report:
-                    if started == loaded:
-                        progres_ext.report_progress()
+            for line in docker_api.api.pull(
+                docker_image_name, stream=True, decode=True, auth_config=auth
+            ):
+                status = PullStatus.from_str(line.get("status", None))
+                layer_id = line.get("id", None)
+                progress_details = line.get("progressDetail", {})
+                need_report = True
+
+                if status is PullStatus.START:
+                    started.add(layer_id)
+                    need_report = False
+                elif status is PullStatus.DOWNLOAD:
+                    layers_current_load[layer_id] = progress_details.get("current", 0)
+                    layers_total_load[layer_id] = progress_details.get(
+                        "total", layers_current_load[layer_id]
+                    )
+                    total_load = sum(layers_total_load.values())
+                    current_load = sum(layers_current_load.values())
+                    if total_load > progress_load.total:
+                        progress_load.set(current_load, total_load)
+                    elif (current_load - progress_load.current) / total_load > 0.01:
+                        progress_load.set(current_load, total_load)
                     else:
-                        progress_load.report_progress()
-            elif len(pulled) > 0:
-                progress_full.report_progress()
-
-        progress_full.iter_done()
-        progress_full.report_progress()
-        logger.info("Docker image has been pulled", extra={"image_name": docker_image_name})
-    except DockerException as e:
-        if raise_exception is True:
-            raise e
-            # raise DockerException(
-            #     "Unable to pull image: see actual error above. "
-            #     "Please, run the task again or contact support team."
-            # )
-        else:
-            logger.warn("Pulling step is skipped. Unable to pull image: {!r}.".format(repr(e)))
+                        need_report = False
+                elif status is PullStatus.COMPLETE_LOAD:
+                    loaded.add(layer_id)
+                elif status is PullStatus.EXTRACT:
+                    layers_current_extract[layer_id] = progress_details.get("current", 0)
+                    layers_total_extract[layer_id] = progress_details.get(
+                        "total", layers_current_extract[layer_id]
+                    )
+                    total_ext = sum(layers_total_extract.values())
+                    current_ext = sum(layers_current_extract.values())
+                    if total_ext > progres_ext.total:
+                        progres_ext.set(current_ext, total_ext)
+                    elif (current_ext - progres_ext.current) / total_ext > 0.01:
+                        progres_ext.set(current_ext, total_ext)
+                    else:
+                        need_report = False
+                elif status is PullStatus.COMPLETE_PULL:
+                    pulled.add(layer_id)
+
+                if started != pulled:
+                    if need_report:
+                        if started == loaded:
+                            progres_ext.report_progress()
+                        else:
+                            progress_load.report_progress()
+                elif len(pulled) > 0:
+                    progress_full.report_progress()
+
+            progress_full.iter_done()
+            progress_full.report_progress()
+            logger.info("Docker image has been pulled", extra={"image_name": docker_image_name})
+            return
+        except DockerException as e:
+            if i >= PULL_RETRIES:
+                if raise_exception is True:
+                    raise e
+                    # raise DockerException(
+                    #     "Unable to pull image: see actual error above. "
+                    #     "Please, run the task again or contact support team."
+                    # )
+                else:
+                    logger.warn(
+                        "Pulling step is skipped. Unable to pull image: {!r}.".format(repr(e))
+                    )
+                    return
+            logger.warning("Unable to pull image: %s", str(e))
+            logger.info("Retrying in %d seconds...", PULL_RETRY_DELAY)
+            time.sleep(PULL_RETRY_DELAY)
 
 
 def _docker_image_exists(docker_api, docker_image_name):
diff --git a/agent/worker/task_app.py b/agent/worker/task_app.py
index 375fd55..8790d13 100644
--- a/agent/worker/task_app.py
+++ b/agent/worker/task_app.py
@@ -110,6 +110,7 @@ def __init__(self, *args, **kwargs):
         self.tmp_data_dir = None
         self.data_dir = None
         self.agent_id = None
+        self._logs_output = None
         self._gpu_config: Optional[GPUFlag] = None
         self._log_filters = [pip_req_satisfied_filter]  # post_get_request_filter
 
@@ -524,28 +525,13 @@ def sync_pip_cache(self):
     @handle_exceptions
     def find_or_run_container(self):
         add_labels = {"sly_app": "1", "app_session_id": str(self.info["task_id"])}
-        try:
-            docker_utils.docker_pull_if_needed(
-                self._docker_api,
-                self.docker_image_name,
-                constants.PULL_POLICY(),
-                self.logger,
-            )
-        except DockerException as e:
-            if "no basic auth credentials" in str(e).lower():
-                self.logger.warn(
-                    f"Failed to pull docker image '{self.docker_image_name}'. Will try to login and pull again",
-                    exc_info=True,
-                )
-                agent_utils.docker_login(self.docker_api, self.logger)
-                docker_utils.docker_pull_if_needed(
-                    self._docker_api,
-                    self.docker_image_name,
-                    constants.PULL_POLICY(),
-                    self.logger,
-                )
-            else:
-                raise e
+        docker_utils.docker_pull_if_needed(
+            self._docker_api,
+            self.docker_image_name,
+            constants.PULL_POLICY(),
+            self.logger,
+        )
+
         self.sync_pip_cache()
         if self._container is None:
             try:
@@ -602,7 +588,12 @@ def find_or_run_container(self):
     def get_spawn_entrypoint(self):
         inf_command = "while true; do sleep 30; done;"
         self.logger.info("Infinite command", extra={"command": inf_command})
-        return ["sh", "-c", inf_command]
+        entrypoint = ["sh", "-c", inf_command]
+        timeout = self.info.get("activeDeadlineSeconds", None)
+        if timeout is not None and timeout > 0:
+            self.logger.info(f"Task Timeout is set to {timeout} seconds")
+            entrypoint = ["/usr/bin/timeout", "--kill-after", "30s", f"{timeout}s"] + entrypoint
+        return entrypoint
 
     def _exec_command(self, command, add_envs=None, container_id=None):
         add_envs = sly.take_with_default(add_envs, {})
@@ -677,6 +668,16 @@ def install_pip_requirements(self, container_id=None):
 
             self.logger.info("Requirements are installed")
 
+    def is_container_alive(self):
+        if self._container is None:
+            return False
+
+        try:
+            self._container.reload()
+            return self._container.status == "running"
+        except NotFound:
+            return False
+
     def main_step(self):
         api = Api(self.info["server_address"], self.info["api_token"])
         task_info_from_server = api.task.get_info_by_id(int(self.info["task_id"]))
@@ -691,11 +692,18 @@ def main_step(self):
         else:
             self.logger.warn("baseUrl not found in task info")
 
-        self.find_or_run_container()
-        self.exec_command(add_envs=self.main_step_envs())
-        self.process_logs()
-
         try:
+            self.find_or_run_container()
+
+            if self.is_container_alive():
+                self.exec_command(add_envs=self.main_step_envs())
+
+            parsed_logs = self.parse_logs()
+            self._container.reload()
+            self._logs_output = self._container.logs(stream=True)
+            parsed_logs += self.parse_logs()
+
+            self.process_logs(parsed_logs)
             self.drop_container_and_check_status()
         except:
             if self.tmp_data_dir is not None and sly.fs.dir_exists(self.tmp_data_dir):
@@ -804,29 +812,11 @@ def main_step_envs(self):
 
         return final_envs
 
-    def process_logs(self):
-        logs_found = False
-
-        def _process_line(log_line):
-            # log_line = log_line.decode("utf-8")
-            msg, res_log, lvl = self.parse_log_line(log_line)
-            if msg is None:
-                self.logger.warn(
-                    "Received empty (none) message in log line, will be handled automatically"
-                )
-                msg = "empty message"
-            self._process_report(msg)
-            output = self.call_event_function(res_log)
-
-            lvl_description = sly.LOGGING_LEVELS.get(lvl, None)
-            if lvl_description is not None:
-                lvl_int = lvl_description.int
-            else:
-                lvl_int = sly.LOGGING_LEVELS["INFO"].int
+    def parse_logs(self):
+        result_logs = []
 
-            lvl_int = filter_log_line(msg, lvl_int, self._log_filters)
-            if lvl_int != -1:
-                self.logger.log(lvl_int, msg, extra=res_log)
+        if self._logs_output is None:
+            return result_logs
 
         def _decode(bytes: bytes):
             decode_args = [
@@ -843,15 +833,45 @@ def _decode(bytes: bytes):
             return bytes.decode(*decode_args[0])
 
         # @TODO: parse multiline logs correctly (including exceptions)
-        log_line = ""
 
         for log_line_arr in self._logs_output:
             for log_part in _decode(log_line_arr).splitlines():
-                logs_found = True
-                _process_line(log_part)
+                result_logs.append(log_part)
 
-        if not logs_found:
+        return result_logs
+
+
+    def process_logs(self, logs_arr = None):
+        result_logs = logs_arr
+
+        if logs_arr is None:
+            result_logs = self.parse_logs()
+
+        if len(result_logs) == 0:
             self.logger.warn("No logs obtained from container.")  # check if bug occurred
+        else:
+            for log_line in result_logs:
+                msg, res_log, lvl = self.parse_log_line(log_line)
+                if msg is None:
+                    self.logger.warn(
+                        "Received empty (none) message in log line, will be handled automatically"
+                    )
+                    msg = "empty message"
+                self._process_report(msg)
+                output = self.call_event_function(res_log)
+
+                lvl_description = sly.LOGGING_LEVELS.get(lvl, None)
+                if lvl_description is not None:
+                    lvl_int = lvl_description.int
+                else:
+                    lvl_int = sly.LOGGING_LEVELS["INFO"].int
+
+                lvl_int = filter_log_line(msg, lvl_int, self._log_filters)
+                if lvl_int != -1:
+                    self.logger.log(lvl_int, msg, extra=res_log)
+
+        return result_logs
+
 
     def _stop_wait_container(self):
         if self.is_isolate():
@@ -874,15 +894,32 @@ def _drop_container(self):
             self.exec_stop()
 
     def drop_container_and_check_status(self):
-        status = self._docker_api.api.exec_inspect(self._exec_id)["ExitCode"]
+        self._container.reload()
+        status = self._container.attrs["State"]["ExitCode"]
+
         if self.is_isolate():
             self._drop_container()
+
         self.logger.debug("Task container finished with status: {}".format(str(status)))
+
         if status != 0:
+            last_report = None
             if len(self._task_reports) > 0:
                 last_report = self._task_reports[-1].to_dict()
                 self.logger.debug("Founded error report.", extra=last_report)
+
+            instance_type = self.info.get("instance_type", "")
+            timeout = self.info.get("activeDeadlineSeconds", None)
+            if timeout > 0 and (status == 124 or status == 137):
+                msg = f"Task deadline exceeded. This task is only allowed to run for {timeout} seconds."
+                if instance_type == "community":
+                    msg += " If you require more time, please contact support or run the task on your agent."
+
+                raise RuntimeError(msg)
+
+            if last_report is not None:
                 raise sly.app.exceptions.DialogWindowError(**last_report)
+
             raise RuntimeError(
                 # self.logger.warn(
                 "Task container finished with non-zero status: {}".format(str(status))
diff --git a/agent/worker/task_pull_docker_image.py b/agent/worker/task_pull_docker_image.py
index 9193171..b118d8d 100644
--- a/agent/worker/task_pull_docker_image.py
+++ b/agent/worker/task_pull_docker_image.py
@@ -31,22 +31,10 @@ def docker_api(self, val):
 
     def task_main_func(self):
         self.logger.info("TASK_START", extra={"event_type": sly.EventType.TASK_STARTED})
-        try:
-            docker_utils.docker_pull_if_needed(
-                self._docker_api, self.docker_image_name, self.info["pull_policy"], self.logger
-            )
-        except DockerException as e:
-            if "no basic auth credentials" in str(e).lower():
-                self.logger.warn(
-                    f"Failed to pull docker image '{self.docker_image_name}'. Will try to login and pull again",
-                    exc_info=True,
-                )
-                agent_utils.docker_login(self.docker_api, self.logger)
-                docker_utils.docker_pull_if_needed(
-                    self._docker_api, self.docker_image_name, self.info["pull_policy"], self.logger
-                )
-            else:
-                raise e
+        docker_utils.docker_pull_if_needed(
+            self._docker_api, self.docker_image_name, self.info["pull_policy"], self.logger
+        )
+
         docker_img = self._docker_api.images.get(self.docker_image_name)
         if constants.CHECK_VERSION_COMPATIBILITY():
             self._validate_version(
diff --git a/requirements.txt b/requirements.txt
index 904444d..acc5fd6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,6 +17,6 @@ filelock==3.13.1
 torch==1.7.1+cu110
 torchvision==0.8.2+cu110
 -f https://download.pytorch.org/whl/torch_stable.html
-supervisely==6.73.149
+supervisely==6.73.220
 # for development
 # supervisely @ git+https://github.com/supervisely/supervisely.git@dev-branch