fix analyse on interactive steps

haddocking · Jan 29, 2025 · 4d68063 · 4d68063
1 parent 03d0143
commit 4d68063
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 42 deletions.
diff --git a/integration_tests/test_full_workflow.py b/integration_tests/test_full_workflow.py
@@ -2,8 +2,13 @@
 from pathlib import Path
 import os
 import shutil
-from haddock.libs.libworkflow import WorkflowManager
+
+from haddock.clis.cli import main as cli_main
+from haddock.clis.cli_analyse import main as cli_analyse
+from haddock.clis.cli_re import maincli
 from haddock.core.typing import Any
+from haddock.libs.libworkflow import WorkflowManager
+
 from integration_tests import GOLDEN_DATA
 
 
@@ -66,7 +71,7 @@ def test_interactive_analysis_on_workflow(monkeypatch):
 
         monkeypatch.chdir(tmpdir)
 
-        from haddock.clis.cli import main as cli_main
+
         cli_main(
             Path("workflow.cfg"),
         )
@@ -85,7 +90,7 @@ def test_interactive_analysis_on_workflow(monkeypatch):
 
         # now running interactive re-clustering
         clustfcc_dir = f"{run_dir}/2_clustfcc"
-        from haddock.clis.cli_re import maincli
+
         # faking sys.argv in input to haddock3-re
         monkeypatch.setattr("sys.argv",
                             ["haddock3-re", "clustfcc", clustfcc_dir, "-f", "0.7"]
@@ -106,14 +111,15 @@ def test_interactive_analysis_on_workflow(monkeypatch):
         assert Path(run_dir, "3_caprieval_interactive/capri_ss.tsv").exists() is True
 
         # now analyse the interactive folders
-        from haddock.clis.cli_analyse import main as cli_analyse
-        cli_analyse(run_dir,
-                    [2,3],
-                    10,
-                    format=None,
-                    scale=None,
-                    is_cleaned=True,
-                    inter=True)
+        cli_analyse(
+            run_dir,
+            [2, 3],
+            10,
+            format=None,
+            scale=None,
+            is_cleaned=True,
+            inter=True,
+            )
         exp_clustfcc_dir = Path(run_dir, "analysis", "2_clustfcc_interactive_analysis")
         exp_caprieval_dir = Path(run_dir, "analysis", "3_caprieval_interactive_analysis")
         assert os.path.isdir(exp_clustfcc_dir) is True

diff --git a/src/haddock/clis/cli_analyse.py b/src/haddock/clis/cli_analyse.py
@@ -370,9 +370,9 @@ def get_top_ranked_mapping(
         cluster_ranking: ClRank,
         clustered_topX: int = 4,
         unclustered_topX: int = 10,
-        ) -> dict[Path, str]:
+        ) -> dict[Path, Path]:
     # Set mapping of generated files
-    top_ranked_mapping: dict[Path, str] = {}
+    top_ranked_mapping: dict[Path, Path] = {}
 
     # Read table
     capri_df = read_capri_table(capri_filename, comment="#")
@@ -413,15 +413,15 @@ def get_top_ranked_mapping(
                 struct_gz = Path(f"{struct}.gz")
                 # copy the structure
                 if Path(struct).exists():
-                    top_ranked_mapping[struct] = target_name
+                    top_ranked_mapping[struct] = Path(target_name)
                 elif struct_gz.exists():
-                    top_ranked_mapping[struct_gz] = target_name
+                    top_ranked_mapping[struct_gz] = Path(target_name)
                 else:
                     log.warning(f"structure {struct} not found")
     return top_ranked_mapping
 
 def zip_top_ranked(
-        top_ranked_mapping: dict[Path, str],
+        top_ranked_mapping: dict[Path, Path],
         summary_name: str,
         gen_archive: bool,
         ) -> Optional[Path]:
@@ -451,9 +451,9 @@ def zip_top_ranked(
     for ori_fpath, new_name in top_ranked_mapping.items():
         # If already compressed
         if ori_fpath.suffix == ".gz":
-            copied_fpath = shutil.copy(ori_fpath, ".")
+            copied_fpath = Path(shutil.copy(ori_fpath, "."))
             # unpack the file
-            _unpack_gz(copied_fpath.name)
+            _unpack_gz(copied_fpath)
             # Rename it
             shutil.move(copied_fpath.name.replace(".gz", ""), new_name)
         else:
@@ -482,7 +482,7 @@ def zip_top_ranked(
             # Create new path
             next_filepath = Path(output_fname, str(new_name))
             # Hold it in mapping dict
-            top_ranked_mapping[ori_fpath] = str(next_filepath)
+            top_ranked_mapping[ori_fpath] = Path(next_filepath)
             # Displace file
             shutil.move(new_name, top_ranked_mapping[ori_fpath])
         log.info(f"Top structures copied into {output_fname}!")
@@ -778,13 +778,12 @@ def main(
                 offline=offline,
                 mode=mode,
                 ncores=ncores,
-                #self_contained=self_contained,
-                self_contained=True,
+                self_contained=self_contained,
                 )
         except Exception as e:
             log.warning(
                 f"Could not execute the analysis for step {step}. "
-                f"The following error occurred {e}"
+                f"The following error occurred: {e}"
                 )
             bad_folder_paths.append(target_path)
         else:

diff --git a/src/haddock/libs/libplots.py b/src/haddock/libs/libplots.py
@@ -884,7 +884,7 @@ def clt_table_handler(
         topX_clusters: int = 10,
         clustered_topX: int = 4,
         unclustered_topX: int = 10,
-        top_ranked_mapping: Optional[dict[Path, str]] = None,
+        top_ranked_mapping: Optional[dict[Path, Path]] = None,
         ) -> pd.DataFrame:
     """
     Create a dataframe including data for tables.
@@ -914,29 +914,30 @@ def clt_table_handler(
     clusters_df = clusters_df.round(2)
     structs_df = structs_df.round(2)
 
+    # if the run will be cleaned, the structures are going to be gzipped
     if not top_ranked_mapping:
-        # if the run will be cleaned, the structures are going to be gzipped
-        if is_cleaned and not top_ranked_mapping:
+        if is_cleaned:
             # substitute the values in the df by adding .gz at the end
             structs_df['model'] = structs_df['model'].replace(
                 to_replace=r"(\.pdb)$", value=r".pdb.gz", regex=True,
-                )
-
-    # ss_file is in NN_caprieval/ while report is in
-    # analysis/NN_caprieval_analysis/
-    # need to correct model paths by prepending ../
-    def correct_relative_paths(
-            path: str,
-            top_ranked_mapping: Optional[dict[Path, str]],
-            ) -> str:
-        try:
-            new_path = top_ranked_mapping[Path(path)]
-        except KeyError:
-            new_path = f"../{path}"
-        return new_path
-    structs_df['model'] = structs_df['model'].apply(
-        lambda x: correct_relative_paths(x, top_ranked_mapping)
-        )
+            )
+    else:
+        # ss_file is in NN_caprieval/ while report is in
+        # analysis/NN_caprieval_analysis/
+        # need to correct model paths by prepending ../
+        def correct_relative_paths(
+                path: str,
+                top_ranked_mapping: Optional[dict[Path, Path]],
+                ) -> str:
+            try:
+                new_path = top_ranked_mapping[path]
+            except (KeyError, TypeError, ):
+                new_path = f"../{path}"
+            return new_path
+
+        structs_df['model'] = structs_df['model'].apply(
+            lambda x: correct_relative_paths(x, top_ranked_mapping)
+            )
 
     is_unclustered = clusters_df["cluster_rank"].unique().tolist() == ["-"]
     # If unclustered, we only want to show the top 10 structures in a table.