Skip to content

Commit

Permalink
fix analyse on interactive steps
Browse files Browse the repository at this point in the history
  • Loading branch information
VGPReys committed Jan 29, 2025
1 parent 03d0143 commit 4d68063
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 42 deletions.
28 changes: 17 additions & 11 deletions integration_tests/test_full_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@
from pathlib import Path
import os
import shutil
from haddock.libs.libworkflow import WorkflowManager

from haddock.clis.cli import main as cli_main
from haddock.clis.cli_analyse import main as cli_analyse
from haddock.clis.cli_re import maincli
from haddock.core.typing import Any
from haddock.libs.libworkflow import WorkflowManager

from integration_tests import GOLDEN_DATA


Expand Down Expand Up @@ -66,7 +71,7 @@ def test_interactive_analysis_on_workflow(monkeypatch):

monkeypatch.chdir(tmpdir)

from haddock.clis.cli import main as cli_main

cli_main(
Path("workflow.cfg"),
)
Expand All @@ -85,7 +90,7 @@ def test_interactive_analysis_on_workflow(monkeypatch):

# now running interactive re-clustering
clustfcc_dir = f"{run_dir}/2_clustfcc"
from haddock.clis.cli_re import maincli

# faking sys.argv in input to haddock3-re
monkeypatch.setattr("sys.argv",
["haddock3-re", "clustfcc", clustfcc_dir, "-f", "0.7"]
Expand All @@ -106,14 +111,15 @@ def test_interactive_analysis_on_workflow(monkeypatch):
assert Path(run_dir, "3_caprieval_interactive/capri_ss.tsv").exists() is True

# now analyse the interactive folders
from haddock.clis.cli_analyse import main as cli_analyse
cli_analyse(run_dir,
[2,3],
10,
format=None,
scale=None,
is_cleaned=True,
inter=True)
cli_analyse(
run_dir,
[2, 3],
10,
format=None,
scale=None,
is_cleaned=True,
inter=True,
)
exp_clustfcc_dir = Path(run_dir, "analysis", "2_clustfcc_interactive_analysis")
exp_caprieval_dir = Path(run_dir, "analysis", "3_caprieval_interactive_analysis")
assert os.path.isdir(exp_clustfcc_dir) is True
Expand Down
21 changes: 10 additions & 11 deletions src/haddock/clis/cli_analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,9 +370,9 @@ def get_top_ranked_mapping(
cluster_ranking: ClRank,
clustered_topX: int = 4,
unclustered_topX: int = 10,
) -> dict[Path, str]:
) -> dict[Path, Path]:
# Set mapping of generated files
top_ranked_mapping: dict[Path, str] = {}
top_ranked_mapping: dict[Path, Path] = {}

# Read table
capri_df = read_capri_table(capri_filename, comment="#")
Expand Down Expand Up @@ -413,15 +413,15 @@ def get_top_ranked_mapping(
struct_gz = Path(f"{struct}.gz")
# copy the structure
if Path(struct).exists():
top_ranked_mapping[struct] = target_name
top_ranked_mapping[struct] = Path(target_name)
elif struct_gz.exists():
top_ranked_mapping[struct_gz] = target_name
top_ranked_mapping[struct_gz] = Path(target_name)
else:
log.warning(f"structure {struct} not found")
return top_ranked_mapping

def zip_top_ranked(
top_ranked_mapping: dict[Path, str],
top_ranked_mapping: dict[Path, Path],
summary_name: str,
gen_archive: bool,
) -> Optional[Path]:
Expand Down Expand Up @@ -451,9 +451,9 @@ def zip_top_ranked(
for ori_fpath, new_name in top_ranked_mapping.items():
# If already compressed
if ori_fpath.suffix == ".gz":
copied_fpath = shutil.copy(ori_fpath, ".")
copied_fpath = Path(shutil.copy(ori_fpath, "."))
# unpack the file
_unpack_gz(copied_fpath.name)
_unpack_gz(copied_fpath)
# Rename it
shutil.move(copied_fpath.name.replace(".gz", ""), new_name)
else:
Expand Down Expand Up @@ -482,7 +482,7 @@ def zip_top_ranked(
# Create new path
next_filepath = Path(output_fname, str(new_name))
# Hold it in mapping dict
top_ranked_mapping[ori_fpath] = str(next_filepath)
top_ranked_mapping[ori_fpath] = Path(next_filepath)
# Displace file
shutil.move(new_name, top_ranked_mapping[ori_fpath])
log.info(f"Top structures copied into {output_fname}!")
Expand Down Expand Up @@ -778,13 +778,12 @@ def main(
offline=offline,
mode=mode,
ncores=ncores,
#self_contained=self_contained,
self_contained=True,
self_contained=self_contained,
)
except Exception as e:
log.warning(
f"Could not execute the analysis for step {step}. "
f"The following error occurred {e}"
f"The following error occurred: {e}"
)
bad_folder_paths.append(target_path)
else:
Expand Down
41 changes: 21 additions & 20 deletions src/haddock/libs/libplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,7 +884,7 @@ def clt_table_handler(
topX_clusters: int = 10,
clustered_topX: int = 4,
unclustered_topX: int = 10,
top_ranked_mapping: Optional[dict[Path, str]] = None,
top_ranked_mapping: Optional[dict[Path, Path]] = None,
) -> pd.DataFrame:
"""
Create a dataframe including data for tables.
Expand Down Expand Up @@ -914,29 +914,30 @@ def clt_table_handler(
clusters_df = clusters_df.round(2)
structs_df = structs_df.round(2)

# if the run will be cleaned, the structures are going to be gzipped
if not top_ranked_mapping:
# if the run will be cleaned, the structures are going to be gzipped
if is_cleaned and not top_ranked_mapping:
if is_cleaned:
# substitute the values in the df by adding .gz at the end
structs_df['model'] = structs_df['model'].replace(
to_replace=r"(\.pdb)$", value=r".pdb.gz", regex=True,
)

# ss_file is in NN_caprieval/ while report is in
# analysis/NN_caprieval_analysis/
# need to correct model paths by prepending ../
def correct_relative_paths(
path: str,
top_ranked_mapping: Optional[dict[Path, str]],
) -> str:
try:
new_path = top_ranked_mapping[Path(path)]
except KeyError:
new_path = f"../{path}"
return new_path
structs_df['model'] = structs_df['model'].apply(
lambda x: correct_relative_paths(x, top_ranked_mapping)
)
)
else:
# ss_file is in NN_caprieval/ while report is in
# analysis/NN_caprieval_analysis/
# need to correct model paths by prepending ../
def correct_relative_paths(
path: str,
top_ranked_mapping: Optional[dict[Path, Path]],
) -> str:
try:
new_path = top_ranked_mapping[path]
except (KeyError, TypeError, ):
new_path = f"../{path}"
return new_path

structs_df['model'] = structs_df['model'].apply(
lambda x: correct_relative_paths(x, top_ranked_mapping)
)

is_unclustered = clusters_df["cluster_rank"].unique().tolist() == ["-"]
# If unclustered, we only want to show the top 10 structures in a table.
Expand Down

0 comments on commit 4d68063

Please sign in to comment.