Skip to content

Commit

Permalink
Merge pull request #658 from marrink-lab/system-meta
Browse files Browse the repository at this point in the history
move header writing in gmx files to a system level meta attribute
  • Loading branch information
pckroon authored Feb 12, 2025
2 parents df2a554 + b5b9638 commit ac93f62
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 39 deletions.
39 changes: 6 additions & 33 deletions bin/martinize2
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def martinize(system, mappings, to_ff, delete_unknown=False, idrs=False, disorde
mappings=mappings,
to_ff=to_ff,
delete_unknown=delete_unknown,
attribute_keep=("cgsecstruct", "chain"),
attribute_keep=("cgsecstruct", "chain", "secstruct"),
attribute_must=("resname",),
attribute_stash=("resid",),
).run_system(system)
Expand Down Expand Up @@ -911,7 +911,10 @@ def entry():
write_repair=args.write_repair,
write_canon=args.write_canon,
)

system.meta["header"].extend(("This file was generated using the following command:",
" ".join(sys.argv),
VERSION,
))
LOGGER.info("Read input.", type="step")
for molecule in system.molecules:
LOGGER.debug("Read molecule {}.", molecule, type="step")
Expand Down Expand Up @@ -975,16 +978,6 @@ def entry():
'for this force field',
type="missing-feature")

ss_sequence = list(
itertools.chain(
*(
dssp.sequence_from_residues(molecule, "secstruct")
for molecule in system.molecules
if selectors.is_protein(molecule)
)
)
)

if args.cystein_bridge == "none":
vermouth.RemoveCysteinBridgeEdges().run_system(system)
elif args.cystein_bridge != "auto":
Expand Down Expand Up @@ -1147,33 +1140,13 @@ def entry():
LOGGER.log(loglevel, entry, **fmt_arg, type="model")

# Write the topology if requested
# grompp has a limit in the number of character it can read per line
# (due to the size limit of a buffer somewhere in its implementation).
# The command line can be longer than this limit and therefore
# prevent grompp from reading the topology.
gromacs_char_limit = 4000 # the limit is actually 4095, but I play safe
command = " ".join(sys.argv)
if len(command) > gromacs_char_limit:
command = command[:gromacs_char_limit] + " ..."
header = [
"This file was generated using the following command:",
command,
VERSION,
]
if None not in ss_sequence:
header += [
"The following sequence of secondary structure ",
"was used for the full system:",
"".join(ss_sequence),
]

if args.top_path is not None:
write_gmx_topology(system,
args.top_path,
itp_paths=itp_paths,
C6C12=False,
defines=defines,
header=header)
defines=defines)

# Write a PDB file.
vermouth.pdb.write_pdb(system, str(args.outpath), omit_charges=True)
Expand Down
23 changes: 23 additions & 0 deletions vermouth/dssp/dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import subprocess
import tempfile
import re
import itertools

from ..file_writer import deferred_open
from ..pdb import pdb
Expand Down Expand Up @@ -541,6 +542,24 @@ def convert_dssp_annotation_to_martini(
raise ValueError('Not all residues have a DSSP assignation.')


def gmx_system_header(system):

ss_sequence = list(
itertools.chain(
*(
sequence_from_residues(molecule, "secstruct")
for molecule in system.molecules
if is_protein(molecule)
)
)
)

if None not in ss_sequence and ss_sequence:
system.meta["header"].extend(("The following sequence of secondary structure ",
"was used for the full system:",
"".join(ss_sequence),
))

class AnnotateDSSP(Processor):
name = 'AnnotateDSSP'

Expand Down Expand Up @@ -570,6 +589,10 @@ def run_molecule(molecule):
convert_dssp_annotation_to_martini(molecule)
return molecule

def run_system(self, system):
gmx_system_header(system)
super().run_system(system)


class AnnotateResidues(Processor):
"""
Expand Down
23 changes: 18 additions & 5 deletions vermouth/gmx/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
from ..log_helpers import StyleAdapter, get_logger
from .itp import _interaction_sorting_key
from ..data import COMMON_CITATIONS

from ..dssp.dssp import sequence_from_residues
from ..selectors import is_protein
LOGGER = StyleAdapter(get_logger(__name__))

Atomtype = namedtuple('Atomtype', 'molecule node sigma epsilon meta')
NonbondParam = namedtuple('NonbondParam', 'atoms sigma epsilon meta')


def _group_by_conditionals(interactions):
interactions_group_sorted = sorted(interactions,
key=_interaction_sorting_key
Expand Down Expand Up @@ -116,8 +118,7 @@ def write_gmx_topology(system,
itp_paths={"nonbond_params": "extra_nbparams.itp",
"atomtypes": "extra_atomtypes.itp"},
C6C12=False,
defines=(),
header=()):
defines=()):
"""
Writes a Gromacs .top file for the specified system. Gromacs topology
files are defined by directives for example `[ atomtypes ]`. However,
Expand All @@ -142,8 +143,6 @@ def write_gmx_topology(system,
C6C12 form
defines: tuple(str)
define statments to include in the topology
header: tuple(str)
any comment lines to include at the beginning
"""
if not system.molecules:
raise ValueError("No molecule in the system. Nothing to write.")
Expand Down Expand Up @@ -178,6 +177,20 @@ def write_gmx_topology(system,
molecule_groups = itertools.groupby(
system.molecules, key=lambda x: x.meta["moltype"]
)

# grompp has a limit in the number of character it can read per line
# (due to the size limit of a buffer somewhere in its implementation).
# The command line can be longer than this limit and therefore
# prevent grompp from reading the topology.
gromacs_char_limit = 4000 # the limit is actually 4095, but I play safe
_header = system.meta.get('header', [])
header = []
for line in _header:
if len(line) > gromacs_char_limit:
header.append(line[:gromacs_char_limit] + " ...")
else:
header.append(line)

for moltype, molecules in molecule_groups:
molecule = next(molecules)
if molecule.force_field is not None:
Expand Down
1 change: 1 addition & 0 deletions vermouth/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def __init__(self, force_field=None):
self.force_field = force_field
self.gmx_topology_params = defaultdict(list)
self.go_params = defaultdict(list)
self.meta = defaultdict(list)

@property
def force_field(self):
Expand Down
32 changes: 31 additions & 1 deletion vermouth/tests/gmx/test_topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import textwrap
import pytest
import vermouth
from itertools import product
from string import ascii_letters
from vermouth.file_writer import DeferredFileWriter
from vermouth.gmx.topology import (sigma_epsilon_to_C6_C12,
write_atomtypes,
Expand Down Expand Up @@ -178,12 +180,12 @@ def test_toplevel_topology(tmp_path, dummy_molecule):
sigma=0.43,
epsilon=2.3,
meta={}))
system.meta["header"] = ['first header comment', 'second header comment']
outpath = tmp_path / 'out.itp'
atompath = tmp_path / 'atomtypes.itp'
nbpath = tmp_path / 'nonbond_params.itp'
write_gmx_topology(system,
outpath,
header=['first header comment', 'second header comment'],
defines=('random', ),
itp_paths={"atomtypes": atompath, "nonbond_params": nbpath},
# at this level C6C12 doesn't matter; it gets
Expand All @@ -205,3 +207,31 @@ def test_toplevel_topology(tmp_path, dummy_molecule):
with open(str(outpath)) as infile:
for line, ref_line in zip(infile, ref_lines):
assert line.strip() == ref_line

@pytest.mark.parametrize('command, expected',
(
[ascii_letters, {"length": len(ascii_letters), "string": ascii_letters}],
[(ascii_letters*100), {"length": 4004, "string": (ascii_letters*100)[:4000]+" ..."}]
))
def test_gromacs_cmd_len(dummy_molecule, tmp_path, command, expected):
os.chdir(tmp_path)
system = vermouth.System()
system.add_molecule(dummy_molecule)
dummy_molecule.meta['moltype'] = "molecule_0"
system.meta['header'].extend([command])

outpath = tmp_path / 'out.itp'

write_gmx_topology(system,
outpath,
defines=('random', ),
C6C12=False)
DeferredFileWriter().write()

with open(str(tmp_path / 'molecule_0.itp')) as infile:
expected_line = infile.readlines()[0].strip()[2:]

assert len(expected_line) == expected["length"]

assert expected_line == expected["string"]
41 changes: 41 additions & 0 deletions vermouth/tests/test_dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
PDB_ALA5_CG,
PDB_ALA5,
)
from vermouth.tests.helper_functions import test_molecule, create_sys_all_attrs

DSSP_EXECUTABLE = os.environ.get("VERMOUTH_TEST_DSSP", "dssp")
SECSTRUCT_1BTA = list(
Expand Down Expand Up @@ -705,3 +706,43 @@ def test_cterm_atomnames():
def test_convert_dssp_to_martini(sequence, expected):
found = dssp.convert_dssp_to_martini(sequence)
assert expected == found

@pytest.mark.parametrize('resnames, ss_string, secstruc',
( # protein resnames with secstruc
({0: "ALA", 1: "ALA", 2: "ALA",
3: "GLY", 4: "GLY",
5: "MET",
6: "ARG", 7: "ARG", 8: "ARG"},
'HHHH',
{1: "H", 2: "H", 3: "H", 4: "H"}
),
# not protein resnames, no secstruc annotated or gets written
({0: "A", 1: "A", 2: "A",
3: "B", 4: "B",
5: "C",
6: "D", 7: "D", 8: "D"},
"",
{1: "", 2: "", 3: "", 4: ""}
)
))
def test_gmx_system_header(test_molecule, resnames, ss_string, secstruc):

atypes = {0: "P1", 1: "SN4a", 2: "SN4a",
3: "SP1", 4: "C1",
5: "TP1",
6: "P1", 7: "SN3a", 8: "SP4"}

system = create_sys_all_attrs(test_molecule,
moltype="molecule_0",
secstruc=secstruc,
defaults={"chain": "A"},
attrs={"resname": resnames,
"atype": atypes})

# annotate the actual 'secstruct' attribute because create_sys_all_attrs actually annotates cgsecstruct
dssp.AnnotateResidues(attribute="secstruct",
sequence="HHHH").run_system(system)

dssp.AnnotateMartiniSecondaryStructures().run_system(system)

assert ss_string in system.meta.get('header', [''])

0 comments on commit ac93f62

Please sign in to comment.