refact:modules&bandit checks

YaoYinYing · YaoYinYing · commit 716227199969 · 2024-10-09T11:38:52.000+08:00
diff --git a/.bandit b/.bandit
@@ -0,0 +1,3 @@
+[bandit]
+exclude = tests
+skips = B103,B607,B603,B101,B404,B311
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -9,16 +9,16 @@ ENV https_proxy=http://a100-internal.yaoyy.moe:10089
 ENV all_proxy=http://a100-internal.yaoyy.moe:10089
 ENV GITHUB_ROSETTA_TEST=YES
 
-RUN apt update -y && apt install git curl wget -y
+RUN apt-get update -y && apt-get install git curl wget -y
 
 RUN python -m pip config set global.index-url https://mirrors.bfsu.edu.cn/pypi/web/simple \
-    && python -m pip install --upgrade pip \
-    && python -m pip install 'flit>=3.8.0'
+    && python -m pip install --no-cache-dir --upgrade pip \
+    && python -m pip install --no-cache-dir 'flit>=3.8.0'
 
 ENV FLIT_ROOT_INSTALL=1
 
 COPY pyproject.toml .
 RUN touch README.md \
     && mkdir -p src/RosettaPy \
-    && python -m flit install --only-deps --deps develop \
+    && python -m flit install --no-cache-dir --only-deps --deps develop \
     && rm -r pyproject.toml README.md src
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -1,9 +1,9 @@
 name: Python CI
 on:
   push:
-    branches: [ main ]
+    branches: [main]
   pull_request:
-    branches: [ main ]
+    branches: [main]
   release:
     types: [created]
   workflow_dispatch:
@@ -19,16 +19,15 @@ jobs:
           - "3.10"
           - "3.11"
           - "3.12"
+          - "3.13"
 
     uses: YaoYinYing/action-python/.github/workflows/validation.yml@v7.3.1-post-6
     with:
-      workdir: '.'
+      workdir: "."
       python-version: ${{ matrix.python }}
     secrets:
       CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
 
-
-
   publish:
     strategy:
       fail-fast: false
diff --git a/.github/workflows/RosettaCI.yml b/.github/workflows/RosettaCI.yml
@@ -44,7 +44,7 @@ jobs:
         run: |
           apt update -y
           apt install gnupg2 git -y
-          pip install '.[test,wrapper]' -U
+          pip install '.[test]' -U
 
       - name: Run test cases
         run: |
diff --git a/README.md b/README.md
@@ -83,7 +83,7 @@ Examples of valid binary filenames:
 
 ## Installation
 
-Ensure you have Python 3.6 or higher installed.
+Ensure you have Python 3.8 or higher installed.
 
 ### Install via PyPI
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ name =  "RosettaPy"
 authors = [
     {name = "Yinying Yao", email = "yaoyy.hi@gmail.com"},
 ]
-description = "Searching for Rosetta Binaries."
+description = "A Python utility for wrapping Rosetta command line tools."
 readme = "README.md"
 classifiers = [
     "Development Status :: 6 - Mature",
@@ -20,23 +20,26 @@ classifiers = [
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12"
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13"
 ]
 requires-python = ">=3.8"
 dynamic = ["version"]
 
-[project.optional-dependencies]
-spark = [
-    "pyspark>=3.0.0"
-]
-wrapper = [
+dependencies = [
     "joblib",
     "absl-py",
     "pandas",
     "biopython",
     "rdkit",
     "numpy>=1.20.3,<3"
 ]
+
+[project.optional-dependencies]
+spark = [
+    "pyspark>=3.0.0"
+]
+
 test = [
     "bandit[toml]==1.7.10",
     "black==24.8.0",
diff --git a/src/RosettaPy/__init__.py b/src/RosettaPy/__init__.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 from .rosetta_finder import RosettaBinary, RosettaFinder, main
-from .rosetta import Rosetta, RosettaScriptsVariableGroup, MPI_node, RosettaEnergyUnitAnalyser
+from .rosetta import Rosetta, RosettaScriptsVariableGroup, MPI_node
+from .analyser import RosettaEnergyUnitAnalyser
 from .utils import timing, isolate
 
 __all__ = [
@@ -15,4 +16,4 @@
     "RosettaEnergyUnitAnalyser",
 ]
 
-__version__ = "0.1.1"
+__version__ = "0.1.2"
diff --git a/src/RosettaPy/analyser/__init__.py b/src/RosettaPy/analyser/__init__.py
@@ -0,0 +1,3 @@
+from .reu import RosettaEnergyUnitAnalyser
+
+__all__ = ["RosettaEnergyUnitAnalyser"]
diff --git a/src/RosettaPy/analyser/reu.py b/src/RosettaPy/analyser/reu.py
@@ -0,0 +1,120 @@
+from dataclasses import dataclass
+import os
+from typing import Dict, Literal, Optional, Tuple, Union
+import warnings
+
+import pandas as pd
+
+
+@dataclass
+class RosettaEnergyUnitAnalyser:
+    """
+    A tool class for analyzing Rosetta energy calculation results.
+
+    Parameters:
+    - score_file (str): The path to the score file or directory containing score files.
+    - score_term (str, optional): The column name in the score file to use as the score. Defaults to "total_score".
+    - job_id (Optional[str], optional): An identifier for the job. Defaults to None.
+    """
+
+    score_file: str
+    score_term: str = "total_score"
+
+    job_id: Optional[str] = None
+
+    @staticmethod
+    def scorefile2df(score_file: str) -> pd.DataFrame:
+        """
+        Converts a score file into a pandas DataFrame.
+
+        Parameters:
+        - score_file (str): Path to the score file.
+
+        Returns:
+        - pd.DataFrame: DataFrame containing the data from the score file.
+        """
+        df = pd.read_fwf(score_file, skiprows=1)
+
+        if "SCORE:" in df.columns:
+            df.drop("SCORE:", axis=1, inplace=True)
+
+        return df
+
+    def __post_init__(self):
+        """
+        Initializes the DataFrame based on the provided score file or directory.
+        """
+        if os.path.isfile(self.score_file):
+            self.df = self.scorefile2df(self.score_file)
+        elif os.path.isdir(self.score_file):
+            dfs = [
+                self.scorefile2df(os.path.join(self.score_file, f))
+                for f in os.listdir(self.score_file)
+                if f.endswith(".sc")
+            ]
+            warnings.warn(UserWarning(f"Concatenate {len(dfs)} score files"))
+            self.df = pd.concat(dfs, axis=0, ignore_index=True)
+        else:
+            raise FileNotFoundError(f"Score file {self.score_file} not found.")
+
+        if not self.score_term in self.df.columns:
+            raise ValueError(f'Score term "{self.score_term}" not found in score file.')
+
+    @staticmethod
+    def df2dict(dfs: pd.DataFrame, k: str = "total_score") -> Tuple[Dict[Literal["score", "decoy"], Union[str, float]]]:
+        """
+        Converts a DataFrame into a tuple of dictionaries with scores and decoys.
+
+        Parameters:
+        - dfs (pd.DataFrame): DataFrame containing the scores.
+        - k (str, optional): Column name to use as the score. Defaults to "total_score".
+
+        Returns:
+        - Tuple[Dict[Literal["score", "decoy"], Union[str, float]]]: Tuple of dictionaries containing scores and decoys.
+        """
+        t = tuple(
+            {
+                "score": float(dfs[dfs.index == i][k].iloc[0]),
+                "decoy": str(dfs[dfs.index == i]["description"].iloc[0]),
+            }
+            for i in dfs.index
+        )
+
+        return t  # type: ignore
+
+    @property
+    def best_decoy(self) -> Dict[Literal["score", "decoy"], Union[str, float]]:
+        """
+        Returns the best decoy based on the score term.
+
+        Returns:
+        - Dict[Literal["score", "decoy"], Union[str, float]]: Dictionary containing the score and decoy of the best entry.
+        """
+        if self.df.empty:
+            return {}
+        return self.top(1)[0]
+
+    def top(
+        self, rank: int = 1, score_term: Optional[str] = None
+    ) -> Tuple[Dict[Literal["score", "decoy"], Union[str, float]]]:
+        """
+        Returns the top `rank` decoys based on the specified score term.
+
+        Parameters:
+        - rank (int, optional): The number of top entries to return. Defaults to 1.
+        - score_term (Optional[str], optional): The column name to use as the score. Defaults to the class attribute `score_term`.
+
+        Returns:
+        - Tuple[Dict[Literal["score", "decoy"], Union[str, float]]]: Tuple of dictionaries containing scores and decoys of the top entries.
+        """
+        if rank <= 0:
+            raise ValueError(f"Rank must be greater than 0")
+
+        # Override score_term if provided
+        score_term = score_term if score_term is not None and score_term in self.df.columns else self.score_term
+
+        df = self.df.sort_values(
+            by=score_term if score_term is not None and score_term in self.df.columns else self.score_term
+        ).head(rank)
+
+        return self.df2dict(dfs=df, k=score_term)
diff --git a/src/RosettaPy/app/utils/smiles2param.py b/src/RosettaPy/app/utils/smiles2param.py
@@ -160,7 +160,7 @@ def convert(self, ligands: Dict[str, str]):
             try:
                 cs = Chem.CanonSmiles(ds)
                 c_smiles.append(cs)
-            except:
+            except Exception:
                 print('Invalid SMILES: %s\n%s' % (i, ds))
         print(c_smiles)
 
diff --git a/src/RosettaPy/node/__init__.py b/src/RosettaPy/node/__init__.py
@@ -0,0 +1,3 @@
+from .mpi import MPI_node
+
+__all__ = ['MPI_node']
diff --git a/src/RosettaPy/node/mpi.py b/src/RosettaPy/node/mpi.py
@@ -0,0 +1,88 @@
+
+import contextlib
+import copy
+from dataclasses import dataclass
+import os
+import random
+import shutil
+import subprocess
+from typing import Dict, List, Optional
+import warnings
+
+
+class MPI_IncompatibleInputWarning(RuntimeWarning): ...
+
+
+@dataclass
+class MPI_node:
+    nproc: int = 0
+    node_matrix: Optional[Dict[str, int]] = None  # Node ID: nproc
+    node_file = f"nodefile_{random.randint(1,9_999_999_999)}.txt"
+
+    user = os.getuid()
+
+    def __post_init__(self):
+
+        for mpi_exec in ["mpirun", "mpicc", ...]:
+            self.mpi_excutable = shutil.which(mpi_exec)
+            if self.mpi_excutable is not None:
+                break
+
+        if not isinstance(self.node_matrix, dict):
+            return
+
+        with open(self.node_file, "w") as f:
+            for node, nproc in self.node_matrix.items():
+                f.write(f"{node} slots={nproc}\n")
+        self.nproc = sum(self.node_matrix.values())  # fix nproc to real node matrix
+
+    @property
+    def local(self) -> List[str]:
+        return [self.mpi_excutable, "--use-hwthread-cpus", "-np", str(self.nproc)]
+
+    @property
+    def host_file(self) -> List[str]:
+        return [self.mpi_excutable, "--hostfile", self.node_file]
+
+    @contextlib.contextmanager
+    def apply(self, cmd: List[str]):
+        cmd_copy = copy.copy(cmd)
+        m = self.local if not self.node_matrix else self.host_file
+        if self.user == 0:
+            m.append("--allow-run-as-root")
+            warnings.warn(UserWarning("Running Rosetta with MPI as Root User"))
+
+        yield m + cmd_copy
+
+        if os.path.exists(self.node_file):
+            os.remove(self.node_file)
+
+    @classmethod
+    def from_slurm(cls) -> "MPI_node":
+        try:
+            nodes = (
+                subprocess.check_output(["scontrol", "show", "hostnames", os.environ["SLURM_JOB_NODELIST"]])
+                .decode()
+                .strip()
+                .split("\n")
+            )
+        except KeyError as e:
+            raise RuntimeError(f"Environment variable {e} not set") from None
+        except subprocess.CalledProcessError as e:
+            raise RuntimeError(f"Failed to get node list: {e.output}") from None
+
+        slurm_cpus_per_task = os.environ.get("SLURM_CPUS_PER_TASK", "1")
+        slurm_ntasks_per_node = os.environ.get("SLURM_NTASKS_PER_NODE", "1")
+
+        if int(slurm_cpus_per_task) < 1:
+            print(f"Fixing $SLURM_CPUS_PER_TASK from {slurm_cpus_per_task} to 1.")
+            slurm_cpus_per_task = "1"
+
+        if int(slurm_ntasks_per_node) < 1:
+            print(f"Fixing $SLURM_NTASKS_PER_NODE from {slurm_ntasks_per_node} to 1.")
+            slurm_ntasks_per_node = "1"
+
+        node_dict = {i: int(slurm_ntasks_per_node) * int(slurm_cpus_per_task) for i in nodes}
+
+        total_nproc = sum(node_dict.values())
+        return cls(total_nproc, node_dict)
diff --git a/src/RosettaPy/rosetta.py b/src/RosettaPy/rosetta.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[bandit]`
	`2`	`+exclude = tests`
	`3`	`+skips = B103,B607,B603,B101,B404,B311`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .reu import RosettaEnergyUnitAnalyser`
	`2`	`+`
	`3`	`+__all__ = ["RosettaEnergyUnitAnalyser"]`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .mpi import MPI_node`
	`2`	`+`
	`3`	`+__all__ = ['MPI_node']`