Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

move header writing in gmx files to a system level meta attribute #658

Merged
merged 15 commits into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 6 additions & 33 deletions bin/martinize2
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def martinize(system, mappings, to_ff, delete_unknown=False, idrs=False, disorde
mappings=mappings,
to_ff=to_ff,
delete_unknown=delete_unknown,
attribute_keep=("cgsecstruct", "chain"),
attribute_keep=("cgsecstruct", "chain", "secstruct"),
attribute_must=("resname",),
attribute_stash=("resid",),
).run_system(system)
Expand Down Expand Up @@ -911,7 +911,10 @@ def entry():
write_repair=args.write_repair,
write_canon=args.write_canon,
)

system.meta["header"].extend(("This file was generated using the following command:",
" ".join(sys.argv),
VERSION,
))
LOGGER.info("Read input.", type="step")
for molecule in system.molecules:
LOGGER.debug("Read molecule {}.", molecule, type="step")
Expand Down Expand Up @@ -975,16 +978,6 @@ def entry():
'for this force field',
type="missing-feature")

ss_sequence = list(
itertools.chain(
*(
dssp.sequence_from_residues(molecule, "secstruct")
for molecule in system.molecules
if selectors.is_protein(molecule)
)
)
)

if args.cystein_bridge == "none":
vermouth.RemoveCysteinBridgeEdges().run_system(system)
elif args.cystein_bridge != "auto":
Expand Down Expand Up @@ -1147,33 +1140,13 @@ def entry():
LOGGER.log(loglevel, entry, **fmt_arg, type="model")

# Write the topology if requested
# grompp has a limit in the number of character it can read per line
# (due to the size limit of a buffer somewhere in its implementation).
# The command line can be longer than this limit and therefore
# prevent grompp from reading the topology.
gromacs_char_limit = 4000 # the limit is actually 4095, but I play safe
command = " ".join(sys.argv)
if len(command) > gromacs_char_limit:
command = command[:gromacs_char_limit] + " ..."
header = [
"This file was generated using the following command:",
command,
VERSION,
]
if None not in ss_sequence:
header += [
"The following sequence of secondary structure ",
"was used for the full system:",
"".join(ss_sequence),
]

if args.top_path is not None:
write_gmx_topology(system,
args.top_path,
itp_paths=itp_paths,
C6C12=False,
defines=defines,
header=header)
defines=defines)

# Write a PDB file.
vermouth.pdb.write_pdb(system, str(args.outpath), omit_charges=True)
Expand Down
23 changes: 23 additions & 0 deletions vermouth/dssp/dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import subprocess
import tempfile
import re
import itertools

from ..file_writer import deferred_open
from ..pdb import pdb
Expand Down Expand Up @@ -541,6 +542,24 @@ def convert_dssp_annotation_to_martini(
raise ValueError('Not all residues have a DSSP assignation.')


def gmx_system_header(system):

ss_sequence = list(
itertools.chain(
*(
sequence_from_residues(molecule, "secstruct")
for molecule in system.molecules
if is_protein(molecule)
)
)
)

if None not in ss_sequence and ss_sequence:
system.meta["header"].extend(("The following sequence of secondary structure ",
"was used for the full system:",
"".join(ss_sequence),
))

class AnnotateDSSP(Processor):
name = 'AnnotateDSSP'

Expand Down Expand Up @@ -570,6 +589,10 @@ def run_molecule(molecule):
convert_dssp_annotation_to_martini(molecule)
return molecule

def run_system(self, system):
gmx_system_header(system)
super().run_system(system)


class AnnotateResidues(Processor):
"""
Expand Down
20 changes: 15 additions & 5 deletions vermouth/gmx/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
from ..log_helpers import StyleAdapter, get_logger
from .itp import _interaction_sorting_key
from ..data import COMMON_CITATIONS

from ..dssp.dssp import sequence_from_residues
from ..selectors import is_protein
LOGGER = StyleAdapter(get_logger(__name__))

Atomtype = namedtuple('Atomtype', 'molecule node sigma epsilon meta')
NonbondParam = namedtuple('NonbondParam', 'atoms sigma epsilon meta')


def _group_by_conditionals(interactions):
interactions_group_sorted = sorted(interactions,
key=_interaction_sorting_key
Expand Down Expand Up @@ -116,8 +118,7 @@ def write_gmx_topology(system,
itp_paths={"nonbond_params": "extra_nbparams.itp",
"atomtypes": "extra_atomtypes.itp"},
C6C12=False,
defines=(),
header=()):
defines=()):
csbrasnett marked this conversation as resolved.
Show resolved Hide resolved
"""
Writes a Gromacs .top file for the specified system. Gromacs topology
files are defined by directives for example `[ atomtypes ]`. However,
Expand All @@ -142,8 +143,6 @@ def write_gmx_topology(system,
C6C12 form
defines: tuple(str)
define statments to include in the topology
header: tuple(str)
any comment lines to include at the beginning
"""
if not system.molecules:
raise ValueError("No molecule in the system. Nothing to write.")
Expand Down Expand Up @@ -178,6 +177,17 @@ def write_gmx_topology(system,
molecule_groups = itertools.groupby(
system.molecules, key=lambda x: x.meta["moltype"]
)

# grompp has a limit in the number of character it can read per line
# (due to the size limit of a buffer somewhere in its implementation).
# The command line can be longer than this limit and therefore
# prevent grompp from reading the topology.
gromacs_char_limit = 4000 # the limit is actually 4095, but I play safe
header = system.meta.get('header', [])
for line in header:
if len(line) > gromacs_char_limit:
header.append(line[:gromacs_char_limit] + " ...")

for moltype, molecules in molecule_groups:
molecule = next(molecules)
if molecule.force_field is not None:
Expand Down
1 change: 1 addition & 0 deletions vermouth/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def __init__(self, force_field=None):
self.force_field = force_field
self.gmx_topology_params = defaultdict(list)
self.go_params = defaultdict(list)
self.meta = defaultdict(list)

@property
def force_field(self):
Expand Down
29 changes: 28 additions & 1 deletion vermouth/tests/gmx/test_topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import textwrap
import pytest
import vermouth
from itertools import product
from string import ascii_letters
from vermouth.file_writer import DeferredFileWriter
from vermouth.gmx.topology import (sigma_epsilon_to_C6_C12,
write_atomtypes,
Expand Down Expand Up @@ -178,12 +180,12 @@ def test_toplevel_topology(tmp_path, dummy_molecule):
sigma=0.43,
epsilon=2.3,
meta={}))
system.meta["header"] = ['first header comment', 'second header comment']
outpath = tmp_path / 'out.itp'
atompath = tmp_path / 'atomtypes.itp'
nbpath = tmp_path / 'nonbond_params.itp'
write_gmx_topology(system,
outpath,
header=['first header comment', 'second header comment'],
defines=('random', ),
itp_paths={"atomtypes": atompath, "nonbond_params": nbpath},
# at this level C6C12 doesn't matter; it gets
Expand All @@ -205,3 +207,28 @@ def test_toplevel_topology(tmp_path, dummy_molecule):
with open(str(outpath)) as infile:
for line, ref_line in zip(infile, ref_lines):
assert line.strip() == ref_line

@pytest.mark.parametrize('command, expected',
(
[ascii_letters, " ".join(ascii_letters)],
[ascii_letters*100, (" ".join(ascii_letters*100)[:4000] + " ...")]
))
def test_gromacs_cmd_len(dummy_molecule, tmp_path, command, expected):
os.chdir(tmp_path)
system = vermouth.System()
system.add_molecule(dummy_molecule)
dummy_molecule.meta['moltype'] = "molecule_0"

outpath = tmp_path / 'out.itp'

write_gmx_topology(system,
outpath,
defines=('random', ),
C6C12=False,
command=command)
DeferredFileWriter().write()

with open(str(tmp_path / 'molecule_0.itp')) as infile:
expected_line = infile.readlines()[1].strip()[2:]

assert expected_line == expected
41 changes: 41 additions & 0 deletions vermouth/tests/test_dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
PDB_ALA5_CG,
PDB_ALA5,
)
from vermouth.tests.helper_functions import test_molecule, create_sys_all_attrs

DSSP_EXECUTABLE = os.environ.get("VERMOUTH_TEST_DSSP", "dssp")
SECSTRUCT_1BTA = list(
Expand Down Expand Up @@ -705,3 +706,43 @@ def test_cterm_atomnames():
def test_convert_dssp_to_martini(sequence, expected):
found = dssp.convert_dssp_to_martini(sequence)
assert expected == found

@pytest.mark.parametrize('resnames, ss_string, secstruc',
( # protein resnames with secstruc
({0: "ALA", 1: "ALA", 2: "ALA",
3: "GLY", 4: "GLY",
5: "MET",
6: "ARG", 7: "ARG", 8: "ARG"},
'HHHH',
{1: "H", 2: "H", 3: "H", 4: "H"}
),
# not protein resnames, no secstruc annotated or gets written
({0: "A", 1: "A", 2: "A",
3: "B", 4: "B",
5: "C",
6: "D", 7: "D", 8: "D"},
"",
{1: "", 2: "", 3: "", 4: ""}
)
))
def test_gmx_system_header(test_molecule, resnames, ss_string, secstruc):

atypes = {0: "P1", 1: "SN4a", 2: "SN4a",
3: "SP1", 4: "C1",
5: "TP1",
6: "P1", 7: "SN3a", 8: "SP4"}

system = create_sys_all_attrs(test_molecule,
moltype="molecule_0",
secstruc=secstruc,
defaults={"chain": "A"},
attrs={"resname": resnames,
"atype": atypes})

# annotate the actual 'secstruct' attribute because create_sys_all_attrs actually annotates cgsecstruct
dssp.AnnotateResidues(attribute="secstruct",
sequence="HHHH").run_system(system)

dssp.AnnotateMartiniSecondaryStructures().run_system(system)

assert ss_string in system.meta.get('header', [''])
Loading