From a9bf1ffc256d2b1c98a9bff9ae32a92e6f1cd4d0 Mon Sep 17 00:00:00 2001
From: Nikolai Petukhov <petukhov.niko@gmail.com>
Date: Tue, 27 Feb 2024 22:59:05 -0300
Subject: [PATCH] add log messages

---
 train/src/dataset_cache.py | 49 ++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/train/src/dataset_cache.py b/train/src/dataset_cache.py
index ef196a7..91277b9 100644
--- a/train/src/dataset_cache.py
+++ b/train/src/dataset_cache.py
@@ -21,11 +21,17 @@ def split_by_cache(project_id, dataset_ids) -> Tuple[set, set]:
         if sly.fs.dir_exists(cache_dataset_dir):
             cached.add(dataset_id)
             to_download.remove(dataset_id)
-    
+
     return to_download, cached
 
 
-def download_project(api: sly.Api, project_info: sly.ProjectInfo, dataset_infos: List[sly.DatasetInfo], use_cache: bool, progress: Progress):
+def download_project(
+    api: sly.Api,
+    project_info: sly.ProjectInfo,
+    dataset_infos: List[sly.DatasetInfo],
+    use_cache: bool,
+    progress: Progress,
+):
     dataset_ids = [dataset_info.id for dataset_info in dataset_infos]
     if not use_cache:
         total = sum([dataset_info.images_count for dataset_info in dataset_infos])
@@ -40,17 +46,34 @@ def download_project(api: sly.Api, project_info: sly.ProjectInfo, dataset_infos:
             )
         return
 
-    dataset_infos_dict = {dataset_info.id:dataset_info for dataset_info in dataset_infos}
+    dataset_infos_dict = {
+        dataset_info.id: dataset_info for dataset_info in dataset_infos
+    }
     # get datasets to download and cached
     to_download, cached = split_by_cache(project_info.id, dataset_ids)
+    if len(cached) == 0:
+        log_msg = "No cached datasets found"
+    else:
+        log_msg = "Using cached datasets: " + ", ".join(
+            f"{dataset_infos_dict[dataset_id].name} ({dataset_id})"
+            for dataset_id in cached
+        )
+    sly.logger.info(log_msg)
+    if len(to_download) == 0:
+        log_msg = "All datasets are cached. No datasets to download"
+    else:
+        log_msg = "Downloading datasets: " + ", ".join(
+            f"{dataset_infos_dict[dataset_id].name} ({dataset_id})"
+            for dataset_id in to_download
+        )
+    sly.logger.info(log_msg)
     # get images count
-    total = sum([dataset_infos_dict[dataset_id].items_count for dataset_id in to_download])
+    total = sum(
+        [dataset_infos_dict[dataset_id].items_count for dataset_id in to_download]
+    )
     # clean project dir
     if os.path.exists(g.project_dir):
         sly.fs.clean_dir(g.project_dir)
-
-    # TODO Check if to_download is empty
-
     # download
     with progress(message="Downloading input data...", total=total) as pbar:
         sly.download(
@@ -69,8 +92,12 @@ def download_project(api: sly.Api, project_info: sly.ProjectInfo, dataset_infos:
     total = sum([sly.fs.get_directory_size(dp) for dp in downloaded_dirs])
     with progress(message="Saving data to cache...", total=total) as pbar:
         for dataset_id, dataset_dir in zip(to_download, downloaded_dirs):
-            cache_dataset_dir = os.path.join(g.cache_dir, str(project_info.id), str(dataset_id))
-            sly.fs.copy_dir_recursively(dataset_dir, cache_dataset_dir, progress_cb=pbar.update)
+            cache_dataset_dir = os.path.join(
+                g.cache_dir, str(project_info.id), str(dataset_id)
+            )
+            sly.fs.copy_dir_recursively(
+                dataset_dir, cache_dataset_dir, progress_cb=pbar.update
+            )
     # copy cached datasets
     cached_dirs = [
         os.path.join(g.cache_dir, str(project_info.id), str(dataset_id))
@@ -81,4 +108,6 @@ def download_project(api: sly.Api, project_info: sly.ProjectInfo, dataset_infos:
         for dataset_id, cache_dataset_dir in zip(cached, cached_dirs):
             dataset_name = dataset_infos_dict[dataset_id].name
             dataset_dir = os.path.join(g.project_dir, dataset_name)
-            sly.fs.copy_dir_recursively(cache_dataset_dir, dataset_dir, progress_cb=pbar.update)
+            sly.fs.copy_dir_recursively(
+                cache_dataset_dir, dataset_dir, progress_cb=pbar.update
+            )