Skip to content

Commit

Permalink
use pooch for PLB files
Browse files Browse the repository at this point in the history
previous solution used io.StringIO which rdkit doesn't like (and wasn't documented as accepted)
  • Loading branch information
richardjgowers committed Dec 20, 2023
1 parent 8658083 commit 215dace
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 82 deletions.
82 changes: 33 additions & 49 deletions gufe/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import importlib.resources
import urllib.request
from urllib.error import URLError
import io
import functools
import pooch
import pytest
from rdkit import Chem
from rdkit.Chem import AllChem
Expand All @@ -20,58 +20,42 @@
else:
HAS_INTERNET = True

PLB_files = pooch.create(
path=pooch.os_cache('pdbinf'),
base_url='https://github.com/openforcefield/protein-ligand-benchmark/raw/d3387602bbeb0167abf00dfb81753d8936775dd2/data/',
version=None,
registry={
'p38/01_protein/crd/protein.pdb': '3f0bf718644e7c29f5200cd3def4240ac25ef5fb1948b2e64deb5015d8a45aa4',
'mcl1/01_protein/crd/protein.pdb': 'f80ff9dd93a5d9dd6e90091e9631a8ce7fe0dc931e16543e22c1f92009660306',
'cdk2/01_protein/crd/protein.pdb': '15d1e509d7951ca45ea266d51a627d5f452dcf0bb5bd48751ae57eb29e28ab69',
'shp2/01_protein/crd/protein.pdb': 'd6759cbd135aaddaa658446064df4095d978d3681c014a0528b542d60b2c8770',
'pde2/01_protein/crd/protein.pdb': '3b7967c1717789215452cdf919520625602d5438a9d2a18620726b8b1b3a8ef0',
'cmet/01_protein/crd/protein.pdb': '155ec32941a9082dbdbbfde460ff97c88d4fe7e100e9a9577edb5a9e7b6467ae',
'ptp1b/01_protein/crd/protein.pdb': 'bfa0f9204e96aa463b80946b788c4153cd24701291007eb77638a16fd156634e',
'thrombin/01_protein/crd/protein.pdb': 'eb4ea18bef9c4c71dcdc922616d6719ee918112be87a0bd6b274c856eff1dd59',
'cdk8/01_protein/crd/protein.pdb': 'b058774526a19775d8f438b14e9d6da331b6de74e0ef9e96db575f6c0bb067b2',
'pfkfb3/01_protein/crd/protein.pdb': '4367710db0dbf284cc715ae9a8dd82d06bd77dcc3fb0885678e16632a2732dcc',
'tyk2/01_protein/crd/protein.pdb': '9090684f4bdae90afbe5f2698a14c778396c024c19ceb6333de4808d9e29fae6',
'syk/01_protein/crd/protein.pdb': 'f6199d0c1818eb5bb24e164426789cf39cae7aa32c8ca2e98f5f44d299a6f82f',
'tnks2/01_protein/crd/protein.pdb': 'fc7681a05dbf07590aa8de133f981b6d8ae9cebcc23d54addc2c4fe80be80299',
'eg5/01_protein/crd/protein.pdb': 'f2964a785c922502dc86fb4e2e5295d32d41d5b68b8c3246e989de5234c3fd0f',
'hif2a/01_protein/crd/protein.pdb': '5bbf520e7c102a65cc7ba0253fd66f43562f77284c82b3b9613e997b7ac76c93',

},
)

class URLFileLike:
def __init__(self, url, encoding='utf-8'):
self.url = url
self.encoding = encoding
self.data = None

def __call__(self):
@pytest.fixture(params=['p38', 'mcl1', 'cdk2', 'shp2', 'pde2', 'cmet', 'ptp1b',
'thrombin', 'cdk8', 'pfkfb3', 'tyk2', 'syk', 'tnks2',
'eg5', 'hif2a', '181l'])
def PDB_files(request):
if request.param == '181l':
with importlib.resources.path('gufe.tests.data', '181l.pdb') as file:
return str(file)
else:
if not HAS_INTERNET: # pragma: no-cover
pytest.skip("Skipping because internet seems faulty")

if self.data is None:
req = urllib.request.urlopen(self.url)
self.data = req.read().decode(self.encoding)

return io.StringIO(self.data)


def get_test_filename(filename):
with importlib.resources.path('gufe.tests.data', filename) as file:
return str(file)


_benchmark_pdb_names = [
"cmet_protein",
"hif2a_protein",
"mcl1_protein",
"p38_protein",
"ptp1b_protein",
"syk_protein",
"thrombin_protein",
"tnsk2_protein",
"tyk2_protein",
]


_pl_benchmark_url_pattern = (
"https://github.com/OpenFreeEnergy/openfe-benchmarks/blob/main/openfe_benchmarks/data/{name}.pdb?raw=true"
)


PDB_BENCHMARK_LOADERS = {
name: URLFileLike(url=_pl_benchmark_url_pattern.format(name=name))
for name in _benchmark_pdb_names
}

PDB_FILE_LOADERS = {
name: lambda: get_test_filename(name)
for name in ["181l.pdb"]
}

ALL_PDB_LOADERS = dict(**PDB_BENCHMARK_LOADERS, **PDB_FILE_LOADERS)
return PLB_files.fetch('{}/01_protein/crd/protein.pdb'.format(request.param))


@pytest.fixture
Expand Down
50 changes: 17 additions & 33 deletions gufe/tests/test_proteincomponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from openmm import unit
from numpy.testing import assert_almost_equal

from .conftest import ALL_PDB_LOADERS
from .conftest import PLB_files


@pytest.fixture
Expand Down Expand Up @@ -94,11 +94,8 @@ class TestProteinComponent(GufeTokenizableTestsMixin):
def instance(self, PDB_181L_path):
return self.cls.from_pdb_file(PDB_181L_path, name="Steve")

# From
@pytest.mark.parametrize('in_pdb_path', ALL_PDB_LOADERS.keys())
def test_from_pdb_file(self, in_pdb_path):
in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()
p = self.cls.from_pdb_file(in_pdb_io, name="Steve")
def test_from_pdb_file(self, PDB_files):
p = self.cls.from_pdb_file(PDB_files, name="Steve")

assert isinstance(p, ProteinComponent)
assert p.name == "Steve"
Expand Down Expand Up @@ -177,21 +174,16 @@ def test_to_pdb_input_types(self, PDB_181L_OpenMMClean_path, tmp_path,
output_func=p.to_pdb_file
)

@pytest.mark.parametrize('in_pdb_path', ALL_PDB_LOADERS.keys())
def test_to_pdb_round_trip(self, in_pdb_path, tmp_path):
in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()

p = self.cls.from_pdb_file(in_pdb_io, name="Wuff")
out_file_name = "tmp_"+in_pdb_path+".pdb"
def test_to_pdb_round_trip(self, PDB_files, tmp_path):
p = self.cls.from_pdb_file(PDB_files, name="Wuff")
out_file_name = "tmp_foo.pdb"
out_file = tmp_path / out_file_name

p.to_pdb_file(str(out_file))

ref_in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()

# generate openMM reference file:
openmm_pdb = pdbfile.PDBFile(ref_in_pdb_io)
out_ref_file_name = "tmp_"+in_pdb_path+"_openmm_ref.pdb"
openmm_pdb = pdbfile.PDBFile(PDB_files)
out_ref_file_name = "tmp_foo_openmm_ref.pdb"
out_ref_file = tmp_path / out_ref_file_name

pdbfile.PDBFile.writeFile(openmm_pdb.topology, openmm_pdb.positions, file=open(str(out_ref_file), "w"))
Expand All @@ -213,33 +205,23 @@ def test_dummy_from_dict(self, PDB_181L_OpenMMClean_path):

assert p == p2

# parametrize
@pytest.mark.parametrize('in_pdb_path', ALL_PDB_LOADERS.keys())
def test_to_openmm_positions(self, in_pdb_path):
in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()
ref_in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()

openmm_pdb = pdbfile.PDBFile(ref_in_pdb_io)
def test_to_openmm_positions(self, PDB_files):
openmm_pdb = pdbfile.PDBFile(PDB_files)
openmm_pos = openmm_pdb.positions

p = self.cls.from_pdb_file(in_pdb_io, name="Bob")
p = self.cls.from_pdb_file(PDB_files, name="Bob")
gufe_openmm_pos = p.to_openmm_positions()

v1 = gufe_openmm_pos.value_in_unit(unit.nanometer)
v2 = openmm_pos.value_in_unit(unit.nanometer)

assert_almost_equal(actual=v1, desired=v2, decimal=6)

# parametrize
@pytest.mark.parametrize('in_pdb_path', ALL_PDB_LOADERS.keys())
def test_to_openmm_topology(self, in_pdb_path):
in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()
ref_in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()

openmm_pdb = pdbfile.PDBFile(ref_in_pdb_io)
def test_to_openmm_topology(self, PDB_files):
openmm_pdb = pdbfile.PDBFile(PDB_files)
openmm_top = openmm_pdb.topology

p = self.cls.from_pdb_file(in_pdb_io, name="Bob")
p = self.cls.from_pdb_file(PDB_files, name="Bob")
gufe_openmm_top = p.to_openmm_topology()
assert_topology_equal(openmm_top, gufe_openmm_top)

Expand Down Expand Up @@ -290,7 +272,9 @@ def test_protein_total_charge(self, PDB_181L_path):
assert m1.total_charge == 7

def test_protein_total_charge_thromb(self):
m1 = self.cls.from_pdb_file(ALL_PDB_LOADERS["thrombin_protein"]())
f = PLB_files.fetch('thrombin/01_protein/crd/protein.pdb')

m1 = self.cls.from_pdb_file(f)

assert m1.total_charge == 6

Expand Down

0 comments on commit 215dace

Please sign in to comment.