diff --git a/integration_tests/test_full_workflow.py b/integration_tests/test_full_workflow.py index ad351ba1f..51aeb6278 100644 --- a/integration_tests/test_full_workflow.py +++ b/integration_tests/test_full_workflow.py @@ -2,8 +2,13 @@ from pathlib import Path import os import shutil -from haddock.libs.libworkflow import WorkflowManager + +from haddock.clis.cli import main as cli_main +from haddock.clis.cli_analyse import main as cli_analyse +from haddock.clis.cli_re import maincli from haddock.core.typing import Any +from haddock.libs.libworkflow import WorkflowManager + from integration_tests import GOLDEN_DATA @@ -66,7 +71,7 @@ def test_interactive_analysis_on_workflow(monkeypatch): monkeypatch.chdir(tmpdir) - from haddock.clis.cli import main as cli_main + cli_main( Path("workflow.cfg"), ) @@ -85,7 +90,7 @@ def test_interactive_analysis_on_workflow(monkeypatch): # now running interactive re-clustering clustfcc_dir = f"{run_dir}/2_clustfcc" - from haddock.clis.cli_re import maincli + # faking sys.argv in input to haddock3-re monkeypatch.setattr("sys.argv", ["haddock3-re", "clustfcc", clustfcc_dir, "-f", "0.7"] @@ -106,14 +111,15 @@ def test_interactive_analysis_on_workflow(monkeypatch): assert Path(run_dir, "3_caprieval_interactive/capri_ss.tsv").exists() is True # now analyse the interactive folders - from haddock.clis.cli_analyse import main as cli_analyse - cli_analyse(run_dir, - [2,3], - 10, - format=None, - scale=None, - is_cleaned=True, - inter=True) + cli_analyse( + run_dir, + [2, 3], + 10, + format=None, + scale=None, + is_cleaned=True, + inter=True, + ) exp_clustfcc_dir = Path(run_dir, "analysis", "2_clustfcc_interactive_analysis") exp_caprieval_dir = Path(run_dir, "analysis", "3_caprieval_interactive_analysis") assert os.path.isdir(exp_clustfcc_dir) is True diff --git a/src/haddock/clis/cli_analyse.py b/src/haddock/clis/cli_analyse.py index a2df10255..1b529d88d 100644 --- a/src/haddock/clis/cli_analyse.py +++ b/src/haddock/clis/cli_analyse.py @@ -370,9 +370,9 @@ def get_top_ranked_mapping( cluster_ranking: ClRank, clustered_topX: int = 4, unclustered_topX: int = 10, - ) -> dict[Path, str]: + ) -> dict[Path, Path]: # Set mapping of generated files - top_ranked_mapping: dict[Path, str] = {} + top_ranked_mapping: dict[Path, Path] = {} # Read table capri_df = read_capri_table(capri_filename, comment="#") @@ -413,15 +413,15 @@ def get_top_ranked_mapping( struct_gz = Path(f"{struct}.gz") # copy the structure if Path(struct).exists(): - top_ranked_mapping[struct] = target_name + top_ranked_mapping[struct] = Path(target_name) elif struct_gz.exists(): - top_ranked_mapping[struct_gz] = target_name + top_ranked_mapping[struct_gz] = Path(target_name) else: log.warning(f"structure {struct} not found") return top_ranked_mapping def zip_top_ranked( - top_ranked_mapping: dict[Path, str], + top_ranked_mapping: dict[Path, Path], summary_name: str, gen_archive: bool, ) -> Optional[Path]: @@ -451,9 +451,9 @@ def zip_top_ranked( for ori_fpath, new_name in top_ranked_mapping.items(): # If already compressed if ori_fpath.suffix == ".gz": - copied_fpath = shutil.copy(ori_fpath, ".") + copied_fpath = Path(shutil.copy(ori_fpath, ".")) # unpack the file - _unpack_gz(copied_fpath.name) + _unpack_gz(copied_fpath) # Rename it shutil.move(copied_fpath.name.replace(".gz", ""), new_name) else: @@ -482,7 +482,7 @@ def zip_top_ranked( # Create new path next_filepath = Path(output_fname, str(new_name)) # Hold it in mapping dict - top_ranked_mapping[ori_fpath] = str(next_filepath) + top_ranked_mapping[ori_fpath] = Path(next_filepath) # Displace file shutil.move(new_name, top_ranked_mapping[ori_fpath]) log.info(f"Top structures copied into {output_fname}!") @@ -778,13 +778,12 @@ def main( offline=offline, mode=mode, ncores=ncores, - #self_contained=self_contained, - self_contained=True, + self_contained=self_contained, ) except Exception as e: log.warning( f"Could not execute the analysis for step {step}. " - f"The following error occurred {e}" + f"The following error occurred: {e}" ) bad_folder_paths.append(target_path) else: diff --git a/src/haddock/libs/libplots.py b/src/haddock/libs/libplots.py index b85f2bae7..18e3bcd46 100644 --- a/src/haddock/libs/libplots.py +++ b/src/haddock/libs/libplots.py @@ -884,7 +884,7 @@ def clt_table_handler( topX_clusters: int = 10, clustered_topX: int = 4, unclustered_topX: int = 10, - top_ranked_mapping: Optional[dict[Path, str]] = None, + top_ranked_mapping: Optional[dict[Path, Path]] = None, ) -> pd.DataFrame: """ Create a dataframe including data for tables. @@ -914,29 +914,30 @@ def clt_table_handler( clusters_df = clusters_df.round(2) structs_df = structs_df.round(2) + # if the run will be cleaned, the structures are going to be gzipped if not top_ranked_mapping: - # if the run will be cleaned, the structures are going to be gzipped - if is_cleaned and not top_ranked_mapping: + if is_cleaned: # substitute the values in the df by adding .gz at the end structs_df['model'] = structs_df['model'].replace( to_replace=r"(\.pdb)$", value=r".pdb.gz", regex=True, - ) - - # ss_file is in NN_caprieval/ while report is in - # analysis/NN_caprieval_analysis/ - # need to correct model paths by prepending ../ - def correct_relative_paths( - path: str, - top_ranked_mapping: Optional[dict[Path, str]], - ) -> str: - try: - new_path = top_ranked_mapping[Path(path)] - except KeyError: - new_path = f"../{path}" - return new_path - structs_df['model'] = structs_df['model'].apply( - lambda x: correct_relative_paths(x, top_ranked_mapping) - ) + ) + else: + # ss_file is in NN_caprieval/ while report is in + # analysis/NN_caprieval_analysis/ + # need to correct model paths by prepending ../ + def correct_relative_paths( + path: str, + top_ranked_mapping: Optional[dict[Path, Path]], + ) -> str: + try: + new_path = top_ranked_mapping[path] + except (KeyError, TypeError, ): + new_path = f"../{path}" + return new_path + + structs_df['model'] = structs_df['model'].apply( + lambda x: correct_relative_paths(x, top_ranked_mapping) + ) is_unclustered = clusters_df["cluster_rank"].unique().tolist() == ["-"] # If unclustered, we only want to show the top 10 structures in a table.