From 4fa2f2068007507362d83c5aaf23d65b7166899f Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Fri, 7 Feb 2025 15:09:39 +0100 Subject: [PATCH 01/15] move header writing in gmx files to a system level meta attribute --- bin/martinize2 | 22 ++++++++++------------ vermouth/gmx/topology.py | 5 +++-- vermouth/system.py | 1 + vermouth/tests/gmx/test_topology.py | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/bin/martinize2 b/bin/martinize2 index 4d4cf246..c6d33856 100755 --- a/bin/martinize2 +++ b/bin/martinize2 @@ -1155,25 +1155,23 @@ def entry(): command = " ".join(sys.argv) if len(command) > gromacs_char_limit: command = command[:gromacs_char_limit] + " ..." - header = [ - "This file was generated using the following command:", - command, - VERSION, - ] + + system.meta["header"].append(["This file was generated using the following command:", + command, + VERSION, + ]) if None not in ss_sequence: - header += [ - "The following sequence of secondary structure ", - "was used for the full system:", - "".join(ss_sequence), - ] + system.meta["header"].append(["The following sequence of secondary structure ", + "was used for the full system:", + "".join(ss_sequence), + ]) if args.top_path is not None: write_gmx_topology(system, args.top_path, itp_paths=itp_paths, C6C12=False, - defines=defines, - header=header) + defines=defines) # Write a PDB file. vermouth.pdb.write_pdb(system, str(args.outpath), omit_charges=True) diff --git a/vermouth/gmx/topology.py b/vermouth/gmx/topology.py index 85be94e9..b68b1b55 100644 --- a/vermouth/gmx/topology.py +++ b/vermouth/gmx/topology.py @@ -116,8 +116,7 @@ def write_gmx_topology(system, itp_paths={"nonbond_params": "extra_nbparams.itp", "atomtypes": "extra_atomtypes.itp"}, C6C12=False, - defines=(), - header=()): + defines=()): """ Writes a Gromacs .top file for the specified system. Gromacs topology files are defined by directives for example `[ atomtypes ]`. However, @@ -178,6 +177,8 @@ def write_gmx_topology(system, molecule_groups = itertools.groupby( system.molecules, key=lambda x: x.meta["moltype"] ) + + header = [i for j in system.meta["header"] for i in j] for moltype, molecules in molecule_groups: molecule = next(molecules) if molecule.force_field is not None: diff --git a/vermouth/system.py b/vermouth/system.py index 6f14c3bc..8356f2f0 100644 --- a/vermouth/system.py +++ b/vermouth/system.py @@ -33,6 +33,7 @@ def __init__(self, force_field=None): self.force_field = force_field self.gmx_topology_params = defaultdict(list) self.go_params = defaultdict(list) + self.meta = defaultdict(list) @property def force_field(self): diff --git a/vermouth/tests/gmx/test_topology.py b/vermouth/tests/gmx/test_topology.py index 23380953..740afb74 100644 --- a/vermouth/tests/gmx/test_topology.py +++ b/vermouth/tests/gmx/test_topology.py @@ -178,12 +178,12 @@ def test_toplevel_topology(tmp_path, dummy_molecule): sigma=0.43, epsilon=2.3, meta={})) + system.meta["header"] = ['first header comment', 'second header comment'] outpath = tmp_path / 'out.itp' atompath = tmp_path / 'atomtypes.itp' nbpath = tmp_path / 'nonbond_params.itp' write_gmx_topology(system, outpath, - header=['first header comment', 'second header comment'], defines=('random', ), itp_paths={"atomtypes": atompath, "nonbond_params": nbpath}, # at this level C6C12 doesn't matter; it gets From cd756ad9e439157403d38c57bde02e684b396799 Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Fri, 7 Feb 2025 15:44:22 +0100 Subject: [PATCH 02/15] sort out header in the topology. requires keeping the atomistic secondary structure annotation during martinize --- bin/martinize2 | 25 ++++++++++--------------- vermouth/gmx/topology.py | 28 ++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/bin/martinize2 b/bin/martinize2 index c6d33856..810c4664 100755 --- a/bin/martinize2 +++ b/bin/martinize2 @@ -174,7 +174,7 @@ def martinize(system, mappings, to_ff, delete_unknown=False, idrs=False, disorde mappings=mappings, to_ff=to_ff, delete_unknown=delete_unknown, - attribute_keep=("cgsecstruct", "chain"), + attribute_keep=("cgsecstruct", "chain", "secstruct"), attribute_must=("resname",), attribute_stash=("resid",), ).run_system(system) @@ -975,15 +975,15 @@ def entry(): 'for this force field', type="missing-feature") - ss_sequence = list( - itertools.chain( - *( - dssp.sequence_from_residues(molecule, "secstruct") - for molecule in system.molecules - if selectors.is_protein(molecule) - ) - ) - ) + # ss_sequence = list( + # itertools.chain( + # *( + # dssp.sequence_from_residues(molecule, "secstruct") + # for molecule in system.molecules + # if selectors.is_protein(molecule) + # ) + # ) + # ) if args.cystein_bridge == "none": vermouth.RemoveCysteinBridgeEdges().run_system(system) @@ -1160,11 +1160,6 @@ def entry(): command, VERSION, ]) - if None not in ss_sequence: - system.meta["header"].append(["The following sequence of secondary structure ", - "was used for the full system:", - "".join(ss_sequence), - ]) if args.top_path is not None: write_gmx_topology(system, diff --git a/vermouth/gmx/topology.py b/vermouth/gmx/topology.py index b68b1b55..3fb916b9 100644 --- a/vermouth/gmx/topology.py +++ b/vermouth/gmx/topology.py @@ -10,12 +10,35 @@ from ..log_helpers import StyleAdapter, get_logger from .itp import _interaction_sorting_key from ..data import COMMON_CITATIONS - +from ..dssp.dssp import sequence_from_residues +from ..selectors import is_protein LOGGER = StyleAdapter(get_logger(__name__)) Atomtype = namedtuple('Atomtype', 'molecule node sigma epsilon meta') NonbondParam = namedtuple('NonbondParam', 'atoms sigma epsilon meta') +def system_header(system): + + ss_sequence = list( + itertools.chain( + *( + sequence_from_residues(molecule, "secstruct") + for molecule in system.molecules + if is_protein(molecule) + ) + ) + ) + + if None not in ss_sequence: + system.meta["header"].append(["The following sequence of secondary structure ", + "was used for the full system:", + "".join(ss_sequence), + ]) + + header = [i for j in system.meta["header"] for i in j] + + return header + def _group_by_conditionals(interactions): interactions_group_sorted = sorted(interactions, key=_interaction_sorting_key @@ -178,7 +201,8 @@ def write_gmx_topology(system, system.molecules, key=lambda x: x.meta["moltype"] ) - header = [i for j in system.meta["header"] for i in j] + header = system_header(system) + for moltype, molecules in molecule_groups: molecule = next(molecules) if molecule.force_field is not None: From 0ec59de0283da65f1f42077e745f0a0b01337bc6 Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Fri, 7 Feb 2025 15:44:48 +0100 Subject: [PATCH 03/15] remove commented out code --- bin/martinize2 | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/bin/martinize2 b/bin/martinize2 index 810c4664..006eb6e1 100755 --- a/bin/martinize2 +++ b/bin/martinize2 @@ -975,16 +975,6 @@ def entry(): 'for this force field', type="missing-feature") - # ss_sequence = list( - # itertools.chain( - # *( - # dssp.sequence_from_residues(molecule, "secstruct") - # for molecule in system.molecules - # if selectors.is_protein(molecule) - # ) - # ) - # ) - if args.cystein_bridge == "none": vermouth.RemoveCysteinBridgeEdges().run_system(system) elif args.cystein_bridge != "auto": From 5ebb94be203910449d4e6fb3d79c918aab6d807b Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Fri, 7 Feb 2025 17:41:49 +0100 Subject: [PATCH 04/15] move secondary structure system header function to dssp - needed to move the command comment to just after the system is made, otherwise the output files have things written the wrong way around --- bin/martinize2 | 26 +++++++++++++------------- vermouth/dssp/dssp.py | 22 ++++++++++++++++++++++ vermouth/gmx/topology.py | 25 +------------------------ 3 files changed, 36 insertions(+), 37 deletions(-) diff --git a/bin/martinize2 b/bin/martinize2 index 006eb6e1..e71fe8d1 100755 --- a/bin/martinize2 +++ b/bin/martinize2 @@ -912,6 +912,19 @@ def entry(): write_canon=args.write_canon, ) + # grompp has a limit in the number of character it can read per line + # (due to the size limit of a buffer somewhere in its implementation). + # The command line can be longer than this limit and therefore + # prevent grompp from reading the topology. + gromacs_char_limit = 4000 # the limit is actually 4095, but I play safe + command = " ".join(sys.argv) + if len(command) > gromacs_char_limit: + command = command[:gromacs_char_limit] + " ..." + system.meta["header"].extend(("This file was generated using the following command:", + command, + VERSION, + )) + LOGGER.info("Read input.", type="step") for molecule in system.molecules: LOGGER.debug("Read molecule {}.", molecule, type="step") @@ -1137,19 +1150,6 @@ def entry(): LOGGER.log(loglevel, entry, **fmt_arg, type="model") # Write the topology if requested - # grompp has a limit in the number of character it can read per line - # (due to the size limit of a buffer somewhere in its implementation). - # The command line can be longer than this limit and therefore - # prevent grompp from reading the topology. - gromacs_char_limit = 4000 # the limit is actually 4095, but I play safe - command = " ".join(sys.argv) - if len(command) > gromacs_char_limit: - command = command[:gromacs_char_limit] + " ..." - - system.meta["header"].append(["This file was generated using the following command:", - command, - VERSION, - ]) if args.top_path is not None: write_gmx_topology(system, diff --git a/vermouth/dssp/dssp.py b/vermouth/dssp/dssp.py index d0221ebb..9c3452bb 100644 --- a/vermouth/dssp/dssp.py +++ b/vermouth/dssp/dssp.py @@ -23,6 +23,7 @@ import subprocess import tempfile import re +import itertools from ..file_writer import deferred_open from ..pdb import pdb @@ -541,6 +542,24 @@ def convert_dssp_annotation_to_martini( raise ValueError('Not all residues have a DSSP assignation.') +def gmx_system_header(system): + + ss_sequence = list( + itertools.chain( + *( + sequence_from_residues(molecule, "secstruct") + for molecule in system.molecules + if is_protein(molecule) + ) + ) + ) + + if None not in ss_sequence: + system.meta["header"].extend(("The following sequence of secondary structure ", + "was used for the full system:", + "".join(ss_sequence), + )) + class AnnotateDSSP(Processor): name = 'AnnotateDSSP' @@ -570,6 +589,9 @@ def run_molecule(molecule): convert_dssp_annotation_to_martini(molecule) return molecule + def run_system(self, system): + gmx_system_header(system) + class AnnotateResidues(Processor): """ diff --git a/vermouth/gmx/topology.py b/vermouth/gmx/topology.py index 3fb916b9..82f00689 100644 --- a/vermouth/gmx/topology.py +++ b/vermouth/gmx/topology.py @@ -17,27 +17,6 @@ Atomtype = namedtuple('Atomtype', 'molecule node sigma epsilon meta') NonbondParam = namedtuple('NonbondParam', 'atoms sigma epsilon meta') -def system_header(system): - - ss_sequence = list( - itertools.chain( - *( - sequence_from_residues(molecule, "secstruct") - for molecule in system.molecules - if is_protein(molecule) - ) - ) - ) - - if None not in ss_sequence: - system.meta["header"].append(["The following sequence of secondary structure ", - "was used for the full system:", - "".join(ss_sequence), - ]) - - header = [i for j in system.meta["header"] for i in j] - - return header def _group_by_conditionals(interactions): interactions_group_sorted = sorted(interactions, @@ -164,8 +143,6 @@ def write_gmx_topology(system, C6C12 form defines: tuple(str) define statments to include in the topology - header: tuple(str) - any comment lines to include at the beginning """ if not system.molecules: raise ValueError("No molecule in the system. Nothing to write.") @@ -201,7 +178,7 @@ def write_gmx_topology(system, system.molecules, key=lambda x: x.meta["moltype"] ) - header = system_header(system) + header = system.meta["header"] for moltype, molecules in molecule_groups: molecule = next(molecules) From f669db1e789d56fa1606d6bd2497e21e3086c686 Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Fri, 7 Feb 2025 17:42:57 +0100 Subject: [PATCH 05/15] made requested change --- vermouth/gmx/topology.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vermouth/gmx/topology.py b/vermouth/gmx/topology.py index 82f00689..0d4d1689 100644 --- a/vermouth/gmx/topology.py +++ b/vermouth/gmx/topology.py @@ -178,7 +178,7 @@ def write_gmx_topology(system, system.molecules, key=lambda x: x.meta["moltype"] ) - header = system.meta["header"] + header = system.meta.get("header", []) for moltype, molecules in molecule_groups: molecule = next(molecules) From 4ce038c9db3cd2f345d02c4c468c1ab0aaa35b6f Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Fri, 7 Feb 2025 18:04:08 +0100 Subject: [PATCH 06/15] fix integration tests --- vermouth/dssp/dssp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vermouth/dssp/dssp.py b/vermouth/dssp/dssp.py index 9c3452bb..dd349875 100644 --- a/vermouth/dssp/dssp.py +++ b/vermouth/dssp/dssp.py @@ -591,6 +591,7 @@ def run_molecule(molecule): def run_system(self, system): gmx_system_header(system) + super().run_system(system) class AnnotateResidues(Processor): From b1e71c3b9d0d97e4fb6a37b2d334d268cb3dd710 Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Mon, 10 Feb 2025 10:06:50 +0100 Subject: [PATCH 07/15] add test to check header is written --- vermouth/tests/test_dssp.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/vermouth/tests/test_dssp.py b/vermouth/tests/test_dssp.py index aa6d393c..ec87fe13 100644 --- a/vermouth/tests/test_dssp.py +++ b/vermouth/tests/test_dssp.py @@ -37,6 +37,7 @@ PDB_ALA5_CG, PDB_ALA5, ) +from vermouth.tests.helper_functions import test_molecule, create_sys_all_attrs DSSP_EXECUTABLE = os.environ.get("VERMOUTH_TEST_DSSP", "dssp") SECSTRUCT_1BTA = list( @@ -705,3 +706,28 @@ def test_cterm_atomnames(): def test_convert_dssp_to_martini(sequence, expected): found = dssp.convert_dssp_to_martini(sequence) assert expected == found + + +def test_dssp_system_header(test_molecule): + + atypes = {0: "P1", 1: "SN4a", 2: "SN4a", + 3: "SP1", 4: "C1", + 5: "TP1", + 6: "P1", 7: "SN3a", 8: "SP4"} + # the molecule resnames + resnames = {0: "A", 1: "A", 2: "A", + 3: "B", 4: "B", + 5: "C", + 6: "D", 7: "D", 8: "D"} + secstruc = {1: "H", 2: "H", 3: "H", 4: "H"} + + system = create_sys_all_attrs(test_molecule, + moltype="molecule_0", + secstruc=secstruc, + defaults={"chain": "A"}, + attrs={"resname": resnames, + "atype": atypes}) + + dssp.AnnotateMartiniSecondaryStructures().run_system(system) + + assert system.meta.get('head') From f86de4a997da3a604cd50b9983be420422e12261 Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Mon, 10 Feb 2025 10:21:21 +0100 Subject: [PATCH 08/15] typo --- vermouth/tests/test_dssp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vermouth/tests/test_dssp.py b/vermouth/tests/test_dssp.py index ec87fe13..00fa51bf 100644 --- a/vermouth/tests/test_dssp.py +++ b/vermouth/tests/test_dssp.py @@ -730,4 +730,4 @@ def test_dssp_system_header(test_molecule): dssp.AnnotateMartiniSecondaryStructures().run_system(system) - assert system.meta.get('head') + assert system.meta.get('header') From c73ad8b375b9f6e37b727e13c0314097e5595fba Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Mon, 10 Feb 2025 10:54:39 +0100 Subject: [PATCH 09/15] fix test for gmx_system_header --- vermouth/tests/test_dssp.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/vermouth/tests/test_dssp.py b/vermouth/tests/test_dssp.py index 00fa51bf..1886a07c 100644 --- a/vermouth/tests/test_dssp.py +++ b/vermouth/tests/test_dssp.py @@ -708,17 +708,17 @@ def test_convert_dssp_to_martini(sequence, expected): assert expected == found -def test_dssp_system_header(test_molecule): +def test_gmx_system_header(test_molecule): atypes = {0: "P1", 1: "SN4a", 2: "SN4a", 3: "SP1", 4: "C1", 5: "TP1", 6: "P1", 7: "SN3a", 8: "SP4"} - # the molecule resnames - resnames = {0: "A", 1: "A", 2: "A", - 3: "B", 4: "B", - 5: "C", - 6: "D", 7: "D", 8: "D"} + # the molecule resnames. needs to be actual protein resnames because gmx_system_header checks is_protein(molecule) + resnames = {0: "ALA", 1: "ALA", 2: "ALA", + 3: "GLY", 4: "GLY", + 5: "MET", + 6: "ARG", 7: "ARG", 8: "ARG"} secstruc = {1: "H", 2: "H", 3: "H", 4: "H"} system = create_sys_all_attrs(test_molecule, @@ -728,6 +728,12 @@ def test_dssp_system_header(test_molecule): attrs={"resname": resnames, "atype": atypes}) + # need to annotate the actual 'secstruct' attribute because create_sys_all_attrs actually annotates cgsecstruct + dssp.AnnotateResidues(attribute="secstruct", + sequence="HHHH").run_system(system) + dssp.AnnotateMartiniSecondaryStructures().run_system(system) - assert system.meta.get('header') + assert len(system.meta.get('header')) == 3 + assert system.meta.get('header')[-1] == "HHHH" + From 2f071e2e398c1e4646a18c4afe2211fe8b4ab625 Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Mon, 10 Feb 2025 11:22:12 +0100 Subject: [PATCH 10/15] add test for if non protein sequence is given. --- vermouth/dssp/dssp.py | 2 +- vermouth/tests/test_dssp.py | 33 +++++++++++++++++++++------------ 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/vermouth/dssp/dssp.py b/vermouth/dssp/dssp.py index dd349875..9b234ae0 100644 --- a/vermouth/dssp/dssp.py +++ b/vermouth/dssp/dssp.py @@ -554,7 +554,7 @@ def gmx_system_header(system): ) ) - if None not in ss_sequence: + if (None not in ss_sequence) and (len(ss_sequence) > 0): system.meta["header"].extend(("The following sequence of secondary structure ", "was used for the full system:", "".join(ss_sequence), diff --git a/vermouth/tests/test_dssp.py b/vermouth/tests/test_dssp.py index 1886a07c..270e6c6a 100644 --- a/vermouth/tests/test_dssp.py +++ b/vermouth/tests/test_dssp.py @@ -707,19 +707,30 @@ def test_convert_dssp_to_martini(sequence, expected): found = dssp.convert_dssp_to_martini(sequence) assert expected == found - -def test_gmx_system_header(test_molecule): +@pytest.mark.parametrize('resnames, ss_string, secstruc', + ( # protein resnames with secstruc + ({0: "ALA", 1: "ALA", 2: "ALA", + 3: "GLY", 4: "GLY", + 5: "MET", + 6: "ARG", 7: "ARG", 8: "ARG"}, + 'HHHH', + {1: "H", 2: "H", 3: "H", 4: "H"} + ), + # not protein resnames, no secstruc annotated or gets written + ({0: "A", 1: "A", 2: "A", + 3: "B", 4: "B", + 5: "C", + 6: "D", 7: "D", 8: "D"}, + "", + {1: "", 2: "", 3: "", 4: ""} + ) + )) +def test_gmx_system_header(test_molecule, resnames, ss_string, secstruc): atypes = {0: "P1", 1: "SN4a", 2: "SN4a", 3: "SP1", 4: "C1", 5: "TP1", 6: "P1", 7: "SN3a", 8: "SP4"} - # the molecule resnames. needs to be actual protein resnames because gmx_system_header checks is_protein(molecule) - resnames = {0: "ALA", 1: "ALA", 2: "ALA", - 3: "GLY", 4: "GLY", - 5: "MET", - 6: "ARG", 7: "ARG", 8: "ARG"} - secstruc = {1: "H", 2: "H", 3: "H", 4: "H"} system = create_sys_all_attrs(test_molecule, moltype="molecule_0", @@ -728,12 +739,10 @@ def test_gmx_system_header(test_molecule): attrs={"resname": resnames, "atype": atypes}) - # need to annotate the actual 'secstruct' attribute because create_sys_all_attrs actually annotates cgsecstruct + # annotate the actual 'secstruct' attribute because create_sys_all_attrs actually annotates cgsecstruct dssp.AnnotateResidues(attribute="secstruct", sequence="HHHH").run_system(system) dssp.AnnotateMartiniSecondaryStructures().run_system(system) - assert len(system.meta.get('header')) == 3 - assert system.meta.get('header')[-1] == "HHHH" - + assert ss_string in system.meta.get('header', ['']) From bf135b7cda0c703b66a4b2da26e752a5c61cc250 Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Mon, 10 Feb 2025 11:23:49 +0100 Subject: [PATCH 11/15] move gromacs part of header to topology writer --- bin/martinize2 | 17 +++-------------- vermouth/gmx/topology.py | 21 ++++++++++++++++++++- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/bin/martinize2 b/bin/martinize2 index e71fe8d1..7b7b1961 100755 --- a/bin/martinize2 +++ b/bin/martinize2 @@ -912,19 +912,6 @@ def entry(): write_canon=args.write_canon, ) - # grompp has a limit in the number of character it can read per line - # (due to the size limit of a buffer somewhere in its implementation). - # The command line can be longer than this limit and therefore - # prevent grompp from reading the topology. - gromacs_char_limit = 4000 # the limit is actually 4095, but I play safe - command = " ".join(sys.argv) - if len(command) > gromacs_char_limit: - command = command[:gromacs_char_limit] + " ..." - system.meta["header"].extend(("This file was generated using the following command:", - command, - VERSION, - )) - LOGGER.info("Read input.", type="step") for molecule in system.molecules: LOGGER.debug("Read molecule {}.", molecule, type="step") @@ -1156,7 +1143,9 @@ def entry(): args.top_path, itp_paths=itp_paths, C6C12=False, - defines=defines) + defines=defines, + command=sys.argv, + version=VERSION) # Write a PDB file. vermouth.pdb.write_pdb(system, str(args.outpath), omit_charges=True) diff --git a/vermouth/gmx/topology.py b/vermouth/gmx/topology.py index 0d4d1689..0ba6c773 100644 --- a/vermouth/gmx/topology.py +++ b/vermouth/gmx/topology.py @@ -118,7 +118,9 @@ def write_gmx_topology(system, itp_paths={"nonbond_params": "extra_nbparams.itp", "atomtypes": "extra_atomtypes.itp"}, C6C12=False, - defines=()): + defines=(), + command=[], + version=''): """ Writes a Gromacs .top file for the specified system. Gromacs topology files are defined by directives for example `[ atomtypes ]`. However, @@ -143,6 +145,10 @@ def write_gmx_topology(system, C6C12 form defines: tuple(str) define statments to include in the topology + command: list + command used to generate the system + version: str + version of vermouth used to generate system """ if not system.molecules: raise ValueError("No molecule in the system. Nothing to write.") @@ -178,6 +184,19 @@ def write_gmx_topology(system, system.molecules, key=lambda x: x.meta["moltype"] ) + # grompp has a limit in the number of character it can read per line + # (due to the size limit of a buffer somewhere in its implementation). + # The command line can be longer than this limit and therefore + # prevent grompp from reading the topology. + gromacs_char_limit = 4000 # the limit is actually 4095, but I play safe + command_used = " ".join(command) + if len(command_used) > gromacs_char_limit: + command_used = command_used[:gromacs_char_limit] + " ..." + system.meta["header"].extend(("This file was generated using the following command:", + command_used, + version, + )) + header = system.meta.get("header", []) for moltype, molecules in molecule_groups: From b0814203ba38ab8ae6483880170bd937e095d787 Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Mon, 10 Feb 2025 11:48:34 +0100 Subject: [PATCH 12/15] add line limit test for topology writer --- vermouth/tests/gmx/test_topology.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/vermouth/tests/gmx/test_topology.py b/vermouth/tests/gmx/test_topology.py index 740afb74..640e8717 100644 --- a/vermouth/tests/gmx/test_topology.py +++ b/vermouth/tests/gmx/test_topology.py @@ -19,6 +19,8 @@ import textwrap import pytest import vermouth +from itertools import product +from string import ascii_letters from vermouth.file_writer import DeferredFileWriter from vermouth.gmx.topology import (sigma_epsilon_to_C6_C12, write_atomtypes, @@ -205,3 +207,28 @@ def test_toplevel_topology(tmp_path, dummy_molecule): with open(str(outpath)) as infile: for line, ref_line in zip(infile, ref_lines): assert line.strip() == ref_line + +@pytest.mark.parametrize('command, expected', + ( + [ascii_letters, " ".join(ascii_letters)], + [ascii_letters*100, (" ".join(ascii_letters*100)[:4000] + " ...")] + )) +def test_gromacs_cmd_len(dummy_molecule, tmp_path, command, expected): + os.chdir(tmp_path) + system = vermouth.System() + system.add_molecule(dummy_molecule) + dummy_molecule.meta['moltype'] = "molecule_0" + + outpath = tmp_path / 'out.itp' + + write_gmx_topology(system, + outpath, + defines=('random', ), + C6C12=False, + command=command) + DeferredFileWriter().write() + + with open(str(tmp_path / 'molecule_0.itp')) as infile: + expected_line = infile.readlines()[1].strip()[2:] + + assert expected_line == expected From 020d1307b2cdc7b1aabe6b5108989d1b1c513efe Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Mon, 10 Feb 2025 15:10:11 +0100 Subject: [PATCH 13/15] requested changes --- bin/martinize2 | 9 +++++---- vermouth/dssp/dssp.py | 2 +- vermouth/gmx/topology.py | 21 +++++---------------- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/bin/martinize2 b/bin/martinize2 index 7b7b1961..bae13745 100755 --- a/bin/martinize2 +++ b/bin/martinize2 @@ -911,7 +911,10 @@ def entry(): write_repair=args.write_repair, write_canon=args.write_canon, ) - + system.meta["header"].extend(("This file was generated using the following command:", + " ".join(sys.argv), + VERSION, + )) LOGGER.info("Read input.", type="step") for molecule in system.molecules: LOGGER.debug("Read molecule {}.", molecule, type="step") @@ -1143,9 +1146,7 @@ def entry(): args.top_path, itp_paths=itp_paths, C6C12=False, - defines=defines, - command=sys.argv, - version=VERSION) + defines=defines) # Write a PDB file. vermouth.pdb.write_pdb(system, str(args.outpath), omit_charges=True) diff --git a/vermouth/dssp/dssp.py b/vermouth/dssp/dssp.py index 9b234ae0..24ec59cd 100644 --- a/vermouth/dssp/dssp.py +++ b/vermouth/dssp/dssp.py @@ -554,7 +554,7 @@ def gmx_system_header(system): ) ) - if (None not in ss_sequence) and (len(ss_sequence) > 0): + if None not in ss_sequence and ss_sequence: system.meta["header"].extend(("The following sequence of secondary structure ", "was used for the full system:", "".join(ss_sequence), diff --git a/vermouth/gmx/topology.py b/vermouth/gmx/topology.py index 0ba6c773..bde99e1d 100644 --- a/vermouth/gmx/topology.py +++ b/vermouth/gmx/topology.py @@ -118,9 +118,7 @@ def write_gmx_topology(system, itp_paths={"nonbond_params": "extra_nbparams.itp", "atomtypes": "extra_atomtypes.itp"}, C6C12=False, - defines=(), - command=[], - version=''): + defines=()): """ Writes a Gromacs .top file for the specified system. Gromacs topology files are defined by directives for example `[ atomtypes ]`. However, @@ -145,10 +143,6 @@ def write_gmx_topology(system, C6C12 form defines: tuple(str) define statments to include in the topology - command: list - command used to generate the system - version: str - version of vermouth used to generate system """ if not system.molecules: raise ValueError("No molecule in the system. Nothing to write.") @@ -189,15 +183,10 @@ def write_gmx_topology(system, # The command line can be longer than this limit and therefore # prevent grompp from reading the topology. gromacs_char_limit = 4000 # the limit is actually 4095, but I play safe - command_used = " ".join(command) - if len(command_used) > gromacs_char_limit: - command_used = command_used[:gromacs_char_limit] + " ..." - system.meta["header"].extend(("This file was generated using the following command:", - command_used, - version, - )) - - header = system.meta.get("header", []) + header = system.meta.get('header', []) + for line in header: + if len(line) > gromacs_char_limit: + header.append(line[:gromacs_char_limit] + " ...") for moltype, molecules in molecule_groups: molecule = next(molecules) From 188e04eb98661ad3e0ed5058e171b85d82d58c50 Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Mon, 10 Feb 2025 17:19:31 +0100 Subject: [PATCH 14/15] correct topology.py and topology tests --- vermouth/gmx/topology.py | 7 +++++-- vermouth/tests/gmx/test_topology.py | 15 +++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/vermouth/gmx/topology.py b/vermouth/gmx/topology.py index bde99e1d..88ba5e49 100644 --- a/vermouth/gmx/topology.py +++ b/vermouth/gmx/topology.py @@ -183,10 +183,13 @@ def write_gmx_topology(system, # The command line can be longer than this limit and therefore # prevent grompp from reading the topology. gromacs_char_limit = 4000 # the limit is actually 4095, but I play safe - header = system.meta.get('header', []) - for line in header: + _header = system.meta.get('header', []) + header = [] + for line in _header: if len(line) > gromacs_char_limit: header.append(line[:gromacs_char_limit] + " ...") + if not header: + header = _header for moltype, molecules in molecule_groups: molecule = next(molecules) diff --git a/vermouth/tests/gmx/test_topology.py b/vermouth/tests/gmx/test_topology.py index 640e8717..49c27e7f 100644 --- a/vermouth/tests/gmx/test_topology.py +++ b/vermouth/tests/gmx/test_topology.py @@ -210,25 +210,28 @@ def test_toplevel_topology(tmp_path, dummy_molecule): @pytest.mark.parametrize('command, expected', ( - [ascii_letters, " ".join(ascii_letters)], - [ascii_letters*100, (" ".join(ascii_letters*100)[:4000] + " ...")] + [ascii_letters, {"length": len(ascii_letters), "string": ascii_letters}], + [(ascii_letters*100), {"length": 4004, "string": (ascii_letters*100)[:4000]+" ..."}] + )) def test_gromacs_cmd_len(dummy_molecule, tmp_path, command, expected): os.chdir(tmp_path) system = vermouth.System() system.add_molecule(dummy_molecule) dummy_molecule.meta['moltype'] = "molecule_0" + system.meta['header'].extend([command]) outpath = tmp_path / 'out.itp' write_gmx_topology(system, outpath, defines=('random', ), - C6C12=False, - command=command) + C6C12=False) DeferredFileWriter().write() with open(str(tmp_path / 'molecule_0.itp')) as infile: - expected_line = infile.readlines()[1].strip()[2:] + expected_line = infile.readlines()[0].strip()[2:] + + assert len(expected_line) == expected["length"] - assert expected_line == expected + assert expected_line == expected["string"] From b5b9638cb2738be9aef7e3dbb8c0d47d026826b7 Mon Sep 17 00:00:00 2001 From: csbrasnett Date: Mon, 10 Feb 2025 17:20:12 +0100 Subject: [PATCH 15/15] sanitise topology.py --- vermouth/gmx/topology.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vermouth/gmx/topology.py b/vermouth/gmx/topology.py index 88ba5e49..f73540b6 100644 --- a/vermouth/gmx/topology.py +++ b/vermouth/gmx/topology.py @@ -188,8 +188,8 @@ def write_gmx_topology(system, for line in _header: if len(line) > gromacs_char_limit: header.append(line[:gromacs_char_limit] + " ...") - if not header: - header = _header + else: + header.append(line) for moltype, molecules in molecule_groups: molecule = next(molecules)