From b746830938891d6427d98ae2aca4338f9d385ce7 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 22 Jan 2025 11:43:01 +0100
Subject: [PATCH 1/7] base processing of run dirs on pattern matching for all
 platforms and remove superfluous .abspath method

---
 taca/utils/bioinfo_tab.py | 50 +++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/taca/utils/bioinfo_tab.py b/taca/utils/bioinfo_tab.py
index 14e68681..5c16fcf1 100644
--- a/taca/utils/bioinfo_tab.py
+++ b/taca/utils/bioinfo_tab.py
@@ -27,9 +27,13 @@ def __init__(self, value=None):
 def collect_runs():
     """Update command."""
     found_runs = {"illumina": [], "element": []}
+
     # Pattern explained:
     # 6-8Digits_(maybe ST-)AnythingLetterornumberNumber_Number_AorBLetterornumberordash
     illumina_rundir_re = re.compile("\d{6,8}_[ST-]*\w+\d+_\d+_[AB]?[A-Z0-9\-]+")
+    # E.g. 20250121_AV242106_B2425434199
+    element_rundir_re = re.compile("\d{8}_AV242106_[AB]\d+")
+
     for inst_brand in CONFIG["bioinfo_tab"]["data_dirs"]:
         for data_dir in CONFIG["bioinfo_tab"]["data_dirs"][inst_brand]:
             if os.path.exists(data_dir):
@@ -37,25 +41,21 @@ def collect_runs():
                 for run_dir in potential_run_dirs:
                     if os.path.isdir(run_dir):
                         if inst_brand == "illumina" and illumina_rundir_re.match(
-                            os.path.basename(os.path.abspath(run_dir))
+                            os.path.basename(run_dir)
                         ):
                             found_runs[inst_brand].append(os.path.basename(run_dir))
                             logger.info(f"Working on {run_dir}")
                             update_statusdb(run_dir, inst_brand)
-                        elif inst_brand == "element":
-                            # Skip no sync dirs, they will be checked below
-                            if run_dir == os.path.join(data_dir, "nosync"):
-                                continue
+                        elif inst_brand == "element" and element_rundir_re.match(
+                            os.path.basename(run_dir)
+                        ):
+                            logger.info(f"Working on {run_dir}")
+                            update_statusdb(run_dir, inst_brand)
+                        elif inst_brand == "ont" and ONT_RUN_PATTERN.match(
+                            os.path.basename(run_dir)
+                        ):
                             logger.info(f"Working on {run_dir}")
                             update_statusdb(run_dir, inst_brand)
-                        elif inst_brand == "ont":
-                            # Skip archived, no_backup, nosync and qc folders
-                            if re.match(
-                                ONT_RUN_PATTERN,
-                                os.path.basename(os.path.abspath(run_dir)),
-                            ):
-                                logger.info(f"Working on {run_dir}")
-                                update_statusdb(run_dir, inst_brand)
 
                 nosync_data_dir = os.path.join(data_dir, "nosync")
                 potential_nosync_run_dirs = glob.glob(
@@ -64,21 +64,26 @@ def collect_runs():
                 for run_dir in potential_nosync_run_dirs:
                     if os.path.isdir(run_dir):
                         if (
-                            inst_brand == "illumina"
-                            and illumina_rundir_re.match(
-                                os.path.basename(os.path.abspath(run_dir))
+                            (
+                                inst_brand == "illumina"
+                                and illumina_rundir_re.match(os.path.basename(run_dir))
                             )
-                        ) or (inst_brand == "element" or inst_brand == "ont"):
-                            # Skip archived dirs
-                            if run_dir == os.path.join(nosync_data_dir, "archived"):
-                                continue
+                            or (
+                                inst_brand == "element"
+                                and element_rundir_re.match(os.path.basename(run_dir))
+                            )
+                            or (
+                                inst_brand == "ont"
+                                and ONT_RUN_PATTERN.match(os.path.basename(run_dir))
+                            )
+                        ):
                             update_statusdb(run_dir, inst_brand)
 
 
 def update_statusdb(run_dir, inst_brand):
     """Gets status for a project."""
     if inst_brand == "illumina":
-        run_id = os.path.basename(os.path.abspath(run_dir))
+        run_id = os.path.basename(run_dir)
     elif inst_brand == "element":
         try:
             aviti_run = Aviti_Run(run_dir, CONFIG)
@@ -89,7 +94,6 @@ def update_statusdb(run_dir, inst_brand):
             # WARNING - Run parameters file not found for ElementRun(<run_dir>), might not be ready yet
             return
     elif inst_brand == "ont":
-        run_dir = os.path.abspath(run_dir)
         try:
             ont_run = ONT_run(run_dir)
         except AssertionError as e:
@@ -320,7 +324,7 @@ def get_ss_projects_illumina(run_dir):
     proj_tree = Tree()
     lane_pattern = re.compile("^([1-8]{1,2})$")
     sample_proj_pattern = re.compile("^((P[0-9]{3,5})_[0-9]{3,5})")
-    run_name = os.path.basename(os.path.abspath(run_dir))
+    run_name = os.path.basename(run_dir)
     run_date = run_name.split("_")[0]
     if len(run_date) == 6:
         current_year = "20" + run_date[0:2]

From 14955e8746b40fa14b408ce66300b86a6685e285 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 22 Jan 2025 11:43:24 +0100
Subject: [PATCH 2/7] remove unused dict

---
 taca/utils/bioinfo_tab.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/taca/utils/bioinfo_tab.py b/taca/utils/bioinfo_tab.py
index 5c16fcf1..a4c5ef01 100644
--- a/taca/utils/bioinfo_tab.py
+++ b/taca/utils/bioinfo_tab.py
@@ -26,7 +26,6 @@ def __init__(self, value=None):
 
 def collect_runs():
     """Update command."""
-    found_runs = {"illumina": [], "element": []}
 
     # Pattern explained:
     # 6-8Digits_(maybe ST-)AnythingLetterornumberNumber_Number_AorBLetterornumberordash
@@ -43,7 +42,6 @@ def collect_runs():
                         if inst_brand == "illumina" and illumina_rundir_re.match(
                             os.path.basename(run_dir)
                         ):
-                            found_runs[inst_brand].append(os.path.basename(run_dir))
                             logger.info(f"Working on {run_dir}")
                             update_statusdb(run_dir, inst_brand)
                         elif inst_brand == "element" and element_rundir_re.match(

From 115849a743ea9878ff0dc6e23c4a6086592b92ca Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 22 Jan 2025 11:52:36 +0100
Subject: [PATCH 3/7] simplify logic

---
 taca/utils/bioinfo_tab.py | 25 +++----------------------
 1 file changed, 3 insertions(+), 22 deletions(-)

diff --git a/taca/utils/bioinfo_tab.py b/taca/utils/bioinfo_tab.py
index a4c5ef01..77b3669c 100644
--- a/taca/utils/bioinfo_tab.py
+++ b/taca/utils/bioinfo_tab.py
@@ -37,29 +37,9 @@ def collect_runs():
         for data_dir in CONFIG["bioinfo_tab"]["data_dirs"][inst_brand]:
             if os.path.exists(data_dir):
                 potential_run_dirs = glob.glob(os.path.join(data_dir, "*"))
-                for run_dir in potential_run_dirs:
-                    if os.path.isdir(run_dir):
-                        if inst_brand == "illumina" and illumina_rundir_re.match(
-                            os.path.basename(run_dir)
-                        ):
-                            logger.info(f"Working on {run_dir}")
-                            update_statusdb(run_dir, inst_brand)
-                        elif inst_brand == "element" and element_rundir_re.match(
-                            os.path.basename(run_dir)
-                        ):
-                            logger.info(f"Working on {run_dir}")
-                            update_statusdb(run_dir, inst_brand)
-                        elif inst_brand == "ont" and ONT_RUN_PATTERN.match(
-                            os.path.basename(run_dir)
-                        ):
-                            logger.info(f"Working on {run_dir}")
-                            update_statusdb(run_dir, inst_brand)
+                potential_run_dirs += glob.glob(os.path.join(data_dir, "nosync", "*"))
 
-                nosync_data_dir = os.path.join(data_dir, "nosync")
-                potential_nosync_run_dirs = glob.glob(
-                    os.path.join(nosync_data_dir, "*")
-                )
-                for run_dir in potential_nosync_run_dirs:
+                for run_dir in potential_run_dirs:
                     if os.path.isdir(run_dir):
                         if (
                             (
@@ -75,6 +55,7 @@ def collect_runs():
                                 and ONT_RUN_PATTERN.match(os.path.basename(run_dir))
                             )
                         ):
+                            logger.info(f"Working on {run_dir}")
                             update_statusdb(run_dir, inst_brand)
 
 

From 060516033abd8644df9febb4f8cdc3c513b17198 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 22 Jan 2025 11:55:46 +0100
Subject: [PATCH 4/7] vlog

---
 VERSIONLOG.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/VERSIONLOG.md b/VERSIONLOG.md
index 9343f670..dbc2522f 100644
--- a/VERSIONLOG.md
+++ b/VERSIONLOG.md
@@ -1,6 +1,10 @@
 # TACA Version Log
 
-## 20241216.1
+## 20250122.1
+
+Improve the way TACA identifies run dirs in the "bioinfo_deliveries --update" command (bioinfo_tab.py).
+
+## 20241216.2
 
 Do not run ToulligQC if its output directory can be found.
 

From 8b725296f85cc5d55fcf486db5f6e5ab434f3ae5 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 22 Jan 2025 11:59:23 +0100
Subject: [PATCH 5/7] ruff format

---
 taca/cleanup/cleanup.py                    |  8 ++++----
 taca/element/Element_Runs.py               |  8 ++++----
 taca/nanopore/ONT_run_classes.py           | 12 ++++++------
 taca/utils/misc.py                         |  2 +-
 taca/utils/statusdb.py                     |  3 +--
 taca/utils/transfer.py                     |  2 +-
 tests/nanopore/test_ONT_run_classes.py     |  2 +-
 tests/nanopore/test_instrument_transfer.py |  4 ++--
 8 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/taca/cleanup/cleanup.py b/taca/cleanup/cleanup.py
index e6958205..799fa8c6 100644
--- a/taca/cleanup/cleanup.py
+++ b/taca/cleanup/cleanup.py
@@ -571,13 +571,13 @@ def _def_get_size_unit(s):
     gb = mb * 1000
     tb = gb * 1000
     if s > tb:
-        s = f"~{int(s/tb)}tb"
+        s = f"~{int(s / tb)}tb"
     elif s > gb:
-        s = f"~{int(s/gb)}gb"
+        s = f"~{int(s / gb)}gb"
     elif s > mb:
-        s = f"~{int(s/mb)}mb"
+        s = f"~{int(s / mb)}mb"
     elif s > kb:
-        s = f"~{int(s/kb)}kb"
+        s = f"~{int(s / kb)}kb"
     elif s > 0:
         s = f"~{int(s)}b"
     return str(s)
diff --git a/taca/element/Element_Runs.py b/taca/element/Element_Runs.py
index 5e1c2716..84dbf3ca 100644
--- a/taca/element/Element_Runs.py
+++ b/taca/element/Element_Runs.py
@@ -437,9 +437,9 @@ def make_demux_manifests(
 
         # Get '[SAMPLES]' section
         split_contents = manifest_contents.split("[SAMPLES]")
-        assert (
-            len(split_contents) == 2
-        ), f"Could not split sample rows out of manifest {manifest_contents}"
+        assert len(split_contents) == 2, (
+            f"Could not split sample rows out of manifest {manifest_contents}"
+        )
         sample_section = split_contents[1].strip().split("\n")
 
         # Split into header and rows
@@ -560,7 +560,7 @@ def make_demux_manifests(
                     "[RUNVALUES]",
                     "KeyName, Value",
                     f"manifest_file, {file_name}",
-                    f"manifest_group, {n+1}/{len(grouped_df)}",
+                    f"manifest_group, {n + 1}/{len(grouped_df)}",
                     f"built_from, {manifest_to_split}",
                 ]
             )
diff --git a/taca/nanopore/ONT_run_classes.py b/taca/nanopore/ONT_run_classes.py
index 808417c2..f26904e2 100644
--- a/taca/nanopore/ONT_run_classes.py
+++ b/taca/nanopore/ONT_run_classes.py
@@ -37,9 +37,9 @@ def __init__(self, run_abspath: str):
             None  # This will be defined upon instantiation of a child class
         )
 
-        assert re.match(
-            ONT_RUN_PATTERN, self.run_name
-        ), f"Run {self.run_name} doesn't look like a run dir"
+        assert re.match(ONT_RUN_PATTERN, self.run_name), (
+            f"Run {self.run_name} doesn't look like a run dir"
+        )
 
         # Parse MinKNOW sample and experiment name
         with open(self.get_file("/run_path.txt")) as stream:
@@ -143,9 +143,9 @@ def touch_db_entry(self):
             pore_count_history_file = os.path.join(
                 self.run_abspath, "pore_count_history.csv"
             )
-            assert os.path.isfile(
-                pore_count_history_file
-            ), f"Couldn't find {pore_count_history_file}"
+            assert os.path.isfile(pore_count_history_file), (
+                f"Couldn't find {pore_count_history_file}"
+            )
 
             self.db.create_ongoing_run(self, run_path_file, pore_count_history_file)
             logger.info(
diff --git a/taca/utils/misc.py b/taca/utils/misc.py
index 8b443014..3dc52f91 100755
--- a/taca/utils/misc.py
+++ b/taca/utils/misc.py
@@ -176,7 +176,7 @@ def query_yes_no(question, default="yes", force=False):
         elif choice in valid:
             return valid[choice]
         else:
-            sys.stdout.write('Please respond with "yes" or "no" ' '(or "y" or "n").\n')
+            sys.stdout.write('Please respond with "yes" or "no" (or "y" or "n").\n')
 
 
 def return_unique(seq):
diff --git a/taca/utils/statusdb.py b/taca/utils/statusdb.py
index a2920550..85635864 100644
--- a/taca/utils/statusdb.py
+++ b/taca/utils/statusdb.py
@@ -225,8 +225,7 @@ def merge_dicts(d1, d2):
                 pass  # same leaf value
             else:
                 logger.debug(
-                    f"Values for key {key} in d1 and d2 differ, "
-                    "using the value of d1"
+                    f"Values for key {key} in d1 and d2 differ, using the value of d1"
                 )
         else:
             d1[key] = d2[key]
diff --git a/taca/utils/transfer.py b/taca/utils/transfer.py
index 8a5bf311..8456912d 100644
--- a/taca/utils/transfer.py
+++ b/taca/utils/transfer.py
@@ -269,7 +269,7 @@ def transfer(self):
             # If we are not overwriting, return False
             if not self.overwrite:
                 logger.debug(
-                    f'target "{self.dest_path}" exists and will not be ' "overwritten"
+                    f'target "{self.dest_path}" exists and will not be overwritten'
                 )
                 return False
             # If the target is a mount, let's not mess with it
diff --git a/tests/nanopore/test_ONT_run_classes.py b/tests/nanopore/test_ONT_run_classes.py
index 6e9d1d84..305f34b5 100644
--- a/tests/nanopore/test_ONT_run_classes.py
+++ b/tests/nanopore/test_ONT_run_classes.py
@@ -171,7 +171,7 @@ def create_ONT_run_dir(
                     "unknown_positive",
                     "zero",
                 ]:
-                    f.write(f"{state},{i},{i*100}\n")
+                    f.write(f"{state},{i},{i * 100}\n")
 
     if sync_finished:
         open(f"{run_path}/.sync_finished", "w").close()
diff --git a/tests/nanopore/test_instrument_transfer.py b/tests/nanopore/test_instrument_transfer.py
index ccede9fb..1c1270bf 100644
--- a/tests/nanopore/test_instrument_transfer.py
+++ b/tests/nanopore/test_instrument_transfer.py
@@ -398,7 +398,7 @@ def test_dump_pore_count_history(setup_test_fixture):
 
     # Nothing to add, no file
     tmp = tempfile.TemporaryDirectory()
-    run_path = tmp.name + f"/experiment/sample/{DUMMY_RUN_NAME.replace('TEST','FLG')}"
+    run_path = tmp.name + f"/experiment/sample/{DUMMY_RUN_NAME.replace('TEST', 'FLG')}"
     os.makedirs(run_path)
     new_file = instrument_transfer.dump_pore_count_history(run_path, pore_counts)
     assert open(new_file).read() == ""
@@ -406,7 +406,7 @@ def test_dump_pore_count_history(setup_test_fixture):
 
     # Nothing to add, file is present
     tmp = tempfile.TemporaryDirectory()
-    run_path = tmp.name + f"/experiment/sample/{DUMMY_RUN_NAME.replace('TEST','FLG')}"
+    run_path = tmp.name + f"/experiment/sample/{DUMMY_RUN_NAME.replace('TEST', 'FLG')}"
     os.makedirs(run_path)
     open(run_path + "/pore_count_history.csv", "w").write("test")
     new_file = instrument_transfer.dump_pore_count_history(run_path, pore_counts)

From d1d21f6a8318c1d3f7c9ca1b61d0461bda5da1d2 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 22 Jan 2025 12:01:35 +0100
Subject: [PATCH 6/7] vlog

---
 VERSIONLOG.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/VERSIONLOG.md b/VERSIONLOG.md
index 9343f670..a46ae070 100644
--- a/VERSIONLOG.md
+++ b/VERSIONLOG.md
@@ -1,6 +1,10 @@
 # TACA Version Log
 
-## 20241216.1
+## 20250122.1
+
+Ruff formatting.
+
+## 20241216.2
 
 Do not run ToulligQC if its output directory can be found.
 

From 0fc69ca63efd82d1edd38f192fec619c97b215de Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 22 Jan 2025 13:30:43 +0100
Subject: [PATCH 7/7] use abspath for potential manual rundir input

---
 taca/utils/bioinfo_tab.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/taca/utils/bioinfo_tab.py b/taca/utils/bioinfo_tab.py
index 77b3669c..14d6613d 100644
--- a/taca/utils/bioinfo_tab.py
+++ b/taca/utils/bioinfo_tab.py
@@ -62,7 +62,7 @@ def collect_runs():
 def update_statusdb(run_dir, inst_brand):
     """Gets status for a project."""
     if inst_brand == "illumina":
-        run_id = os.path.basename(run_dir)
+        run_id = os.path.basename(os.path.abspath(run_dir))
     elif inst_brand == "element":
         try:
             aviti_run = Aviti_Run(run_dir, CONFIG)