diff --git a/package/AUTHORS b/package/AUTHORS index f8e82065ad..35e4029f77 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -235,6 +235,7 @@ Chronological list of authors - Jenna M. Swarthout Goddard 2024 - Aditya Keshari + - Philipp Stärk External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index 23a72464cd..139c19fd27 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -15,7 +15,7 @@ The rules for this file: ------------------------------------------------------------------------------- ??/??/?? IAlibay, HeetVekariya, marinegor, lilyminium, RMeli, - ljwoods2, aditya292002 + ljwoods2, aditya292002, pstaerk * 2.8.0 @@ -30,6 +30,7 @@ Fixes * Fix deploy action to use the correct version of the pypi upload action. Enhancements + * Added parsing of arbitrary columns of the LAMMPS dump parser. (Issue #3504) * Documented the r0 attribute in the `Contacts` class and added the `n_initial_contacts` attribute, with documentation. (Issue #2604, PR #4415) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 0cd3874997..0227ed742c 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -135,6 +135,7 @@ from ..topology.LAMMPSParser import DATAParser from ..exceptions import NoDataError from . import base +import warnings btype_sections = {'bond':'Bonds', 'angle':'Angles', 'dihedral':'Dihedrals', 'improper':'Impropers'} @@ -458,12 +459,50 @@ class DumpReader(base.ReaderBase): """Reads the default `LAMMPS dump format `__ - Supports coordinates in various LAMMPS coordinate conventions and both - orthogonal and triclinic simulation box dimensions (for more details see - `documentation `__). In - either case, MDAnalysis will always use ``(*A*, *B*, *C*, *alpha*, *beta*, - *gamma*)`` to represent the unit cell. Lengths *A*, *B*, *C* are in the - MDAnalysis length unit (Å), and angles are in degrees. + Supports coordinates in the LAMMPS "unscaled" (x,y,z), "scaled" (xs,ys,zs), + "unwrapped" (xu,yu,zu) and "scaled_unwrapped" (xsu,ysu,zsu) coordinate + conventions (see https://docs.lammps.org/dump.html for more details). + If `lammps_coordinate_convention='auto'` (default), + one will be guessed. Guessing checks whether the coordinates fit each + convention in the order "unscaled", "scaled", "unwrapped", + "scaled_unwrapped" and whichever set of coordinates is detected first will + be used. If coordinates are given in the scaled coordinate convention + (xs,ys,zs) or scaled unwrapped coordinate convention (xsu,ysu,zsu) they + will automatically be converted from their scaled/fractional representation + to their real values. + + Supports both orthogonal and triclinic simulation box dimensions (for more + details see https://docs.lammps.org/Howto_triclinic.html). In either case, + MDAnalysis will always use ``(*A*, *B*, *C*, *alpha*, *beta*, *gamma*)`` + to represent the unit cell. Lengths *A*, *B*, *C* are in the MDAnalysis + length unit (Å), and angles are in degrees. + + By using the keyword `additional_columns`, you can specify arbitrary data + to be read. The keyword expects a list of the names of the columns or + `True` to read all additional columns. The results are saved to + :attr:`Timestep.data`. For example, if your LAMMPS dump looks like this + + .. code-block:: + + ITEM: ATOMS id x y z q l + 1 2.84 8.17 -25 0.00258855 1.1 + 2 7.1 8.17 -25 6.91952e-05 1.2 + + Then you may parse the additional columns `q` and `l` via: + + .. code-block:: python + + u = mda.Universe('structure.data', 'traj.lammpsdump', + additional_columns=['q', 'l']) + + The additional data is then available for each time step via: + + .. code-block:: python + + for ts in u.trajectory: + charges = ts.data['q'] # Access additional data, sorted by the id + ls = ts.data['l'] + ... Parameters ---------- @@ -497,6 +536,9 @@ class DumpReader(base.ReaderBase): **kwargs Other keyword arguments used in :class:`~MDAnalysis.coordinates.base.ReaderBase` + .. versionchanged:: 2.7.0 + Reading of arbitrary, additional columns is now supported. + (Issue #3608) .. versionchanged:: 2.4.0 Now imports velocities and forces, translates the box to the origin, and optionally unwraps trajectories with image flags upon loading. @@ -510,6 +552,7 @@ class DumpReader(base.ReaderBase): format = 'LAMMPSDUMP' _conventions = ["auto", "unscaled", "scaled", "unwrapped", "scaled_unwrapped"] + _coordtype_column_names = { "unscaled": ["x", "y", "z"], "scaled": ["xs", "ys", "zs"], @@ -517,11 +560,15 @@ class DumpReader(base.ReaderBase): "scaled_unwrapped": ["xsu", "ysu", "zsu"] } + _parsable_columns = ["id", "vx", "vy", "vz", "fx", "fy", "fz"] + for key in _coordtype_column_names.keys(): + _parsable_columns += _coordtype_column_names[key] + @store_init_arguments - def __init__(self, filename, + def __init__(self, filename, lammps_coordinate_convention="auto", unwrap_images=False, - **kwargs): + additional_columns=None, **kwargs): super(DumpReader, self).__init__(filename, **kwargs) root, ext = os.path.splitext(self.filename) @@ -536,6 +583,16 @@ def __init__(self, filename, self._unwrap = unwrap_images + if (util.iterable(additional_columns) + or additional_columns is None + or additional_columns is True): + self._additional_columns = additional_columns + else: + raise ValueError(f"additional_columns={additional_columns} " + "is not a valid option. Please provide an " + "iterable containing the additional" + "column headers.") + self._cache = {} self._reopen() @@ -681,6 +738,25 @@ def _read_next_timestep(self): coord_cols.extend(image_cols) ids = "id" in attr_to_col_ix + + # Create the data arrays for additional attributes which will be saved + # under ts.data + if self._additional_columns is True: + # Parse every column that is not already parsed + # elsewhere (total \ parsable) + additional_keys = set(attrs).difference(self._parsable_columns) + elif self._additional_columns: + if not all([key in attrs for key in self._additional_columns]): + warnings.warn("Some of the additional columns are not present " + "in the file, they will be ignored") + additional_keys = \ + [key for key in self._additional_columns if key in attrs] + else: + additional_keys = [] + for key in additional_keys: + ts.data[key] = np.empty(self.n_atoms) + + # Parse all the atoms for i in range(self.n_atoms): fields = f.readline().split() if ids: @@ -701,12 +777,22 @@ def _read_next_timestep(self): if self._has_forces: ts.forces[i] = [fields[dim] for dim in force_cols] + # Collect additional cols + for attribute_key in additional_keys: + ts.data[attribute_key][i] = \ + fields[attr_to_col_ix[attribute_key]] + order = np.argsort(indices) ts.positions = ts.positions[order] if self._has_vels: ts.velocities = ts.velocities[order] if self._has_forces: ts.forces = ts.forces[order] + + # Also need to sort the additional keys + for attribute_key in additional_keys: + ts.data[attribute_key] = ts.data[attribute_key][order] + if (self.lammps_coordinate_convention.startswith("scaled")): # if coordinates are given in scaled format, undo that ts.positions = distances.transform_StoR(ts.positions, diff --git a/testsuite/MDAnalysisTests/coordinates/reference.py b/testsuite/MDAnalysisTests/coordinates/reference.py index 2d2ec036bb..8c523c639e 100644 --- a/testsuite/MDAnalysisTests/coordinates/reference.py +++ b/testsuite/MDAnalysisTests/coordinates/reference.py @@ -25,9 +25,10 @@ from MDAnalysisTests import datafiles from MDAnalysisTests.datafiles import (PDB_small, PDB, LAMMPSdata, LAMMPSdata2, LAMMPSdcd2, - LAMMPSdata_mini, PSF_TRICLINIC, - DCD_TRICLINIC, PSF_NAMD_TRICLINIC, - DCD_NAMD_TRICLINIC) + LAMMPSdata_mini, + LAMMPSdata_additional_columns, + PSF_TRICLINIC, DCD_TRICLINIC, + PSF_NAMD_TRICLINIC, DCD_NAMD_TRICLINIC) class RefAdKSmall(object): @@ -227,3 +228,9 @@ class RefLAMMPSDataMini(object): dtype=np.float32) dimensions = np.array([60., 50., 30., 90., 90., 90.], dtype=np.float32) + +class RefLAMMPSDataAdditionalColumns(object): + q = np.array([2.58855e-03, 6.91952e-05, 1.05548e-02, 4.20319e-03, + 9.19172e-03, 4.79777e-03, 6.36864e-04, 5.87125e-03, + -2.18125e-03, 6.88910e-03]) + p = np.array(5 * [1.1, 1.2]) diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index c8afb286fb..88b4b8c35e 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -29,16 +29,18 @@ import MDAnalysis as mda from MDAnalysis import NoDataError -from numpy.testing import (assert_equal, assert_allclose, assert_allclose) +from numpy.testing import (assert_equal, assert_allclose) from MDAnalysisTests import make_Universe from MDAnalysisTests.coordinates.reference import ( RefLAMMPSData, RefLAMMPSDataMini, RefLAMMPSDataDCD, + RefLAMMPSDataAdditionalColumns ) from MDAnalysisTests.datafiles import ( LAMMPScnt, LAMMPShyd, LAMMPSdata, LAMMPSdata_mini, LAMMPSdata_triclinic, LAMMPSDUMP, LAMMPSDUMP_allcoords, LAMMPSDUMP_nocoords, LAMMPSDUMP_triclinic, - LAMMPSDUMP_image_vf, LAMMPS_image_vf + LAMMPSDUMP_image_vf, LAMMPS_image_vf, LAMMPSdata_additional_columns, + LAMMPSDUMP_additional_columns ) @@ -511,6 +513,46 @@ def u(self, tmpdir, request): yield mda.Universe(f, format='LAMMPSDUMP', lammps_coordinate_convention="auto") + @pytest.fixture() + def u_additional_columns_true(self): + f = LAMMPSDUMP_additional_columns + top = LAMMPSdata_additional_columns + return mda.Universe(top, f, format='LAMMPSDUMP', + lammps_coordinate_convention="auto", + additional_columns=True) + + @pytest.fixture() + def u_additional_columns_single(self): + f = LAMMPSDUMP_additional_columns + top = LAMMPSdata_additional_columns + return mda.Universe(top, f, format='LAMMPSDUMP', + lammps_coordinate_convention="auto", + additional_columns=['q']) + + @pytest.fixture() + def u_additional_columns_multiple(self): + f = LAMMPSDUMP_additional_columns + top = LAMMPSdata_additional_columns + return mda.Universe(top, f, format='LAMMPSDUMP', + lammps_coordinate_convention="auto", + additional_columns=['q', 'p']) + + @pytest.fixture() + def u_additional_columns_wrong_format(self): + f = LAMMPSDUMP_additional_columns + top = LAMMPSdata_additional_columns + return mda.Universe(top, f, format='LAMMPSDUMP', + lammps_coordinate_convention="auto", + additional_columns='q') + + @pytest.fixture() + def u_additional_columns_not_present(self): + f = LAMMPSDUMP_additional_columns + top = LAMMPSdata_additional_columns + return mda.Universe(top, f, format='LAMMPSDUMP', + lammps_coordinate_convention="auto", + additional_columns=['q', 'w']) + @pytest.fixture() def reference_positions(self): # manually copied from traj file @@ -592,6 +634,32 @@ def test_atom_reordering(self, u, reference_positions): assert_allclose(atom1.position, atom1_pos-bmin, atol=1e-5) assert_allclose(atom13.position, atom13_pos-bmin, atol=1e-5) + @pytest.mark.parametrize("system, fields", [ + ('u_additional_columns_true', ['q', 'p']), + ('u_additional_columns_single', ['q']), + ('u_additional_columns_multiple', ['q', 'p']), + ]) + def test_additional_columns(self, system, fields, request): + u = request.getfixturevalue(system) + for field in fields: + data = u.trajectory[0].data[field] + assert_allclose(data, + getattr(RefLAMMPSDataAdditionalColumns, field)) + + @pytest.mark.parametrize("system", [ + ('u_additional_columns_wrong_format'), + ]) + def test_wrong_format_additional_colums(self, system, request): + with pytest.raises(ValueError, + match="Please provide an iterable containing"): + request.getfixturevalue(system) + + @pytest.mark.parametrize("system", [ + ('u_additional_columns_not_present'), + ]) + def test_warning(self, system, request): + with pytest.warns(match="Some of the additional"): + request.getfixturevalue(system) @pytest.mark.parametrize("convention", ["unscaled", "unwrapped", "scaled_unwrapped"]) diff --git a/testsuite/MDAnalysisTests/data/lammps/additional_columns.data b/testsuite/MDAnalysisTests/data/lammps/additional_columns.data new file mode 100644 index 0000000000..80b9a57bb7 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/lammps/additional_columns.data @@ -0,0 +1,29 @@ +LAMMPS data file via write_data, version 24 Mar 2022, timestep = 500 + +10 atoms +1 atom types + +0 42.6 xlo xhi +0 44.2712 ylo yhi +-25.1 25.1 zlo zhi + +Masses + +1 12.011 + +Pair Coeffs # lj/cut/coul/long/omp + +1 0.0663 3.5812 + +Atoms # full + +1 2 1 -0.00706800004577013 2.84 8.17 -25 0 0 0 +2 2 1 0.004078816788554217 7.1 8.17 -25 0 0 0 +3 2 1 -0.005824512619752745 2.13 6.94 -25 0 0 0 +4 2 1 0.002812345167059992 6.39 6.94 -25 0 0 0 +5 2 1 -0.004070019151543417 2.84 5.71 -25 0 0 0 +6 2 1 0.004796116641855679 7.1 5.71 -25 0 0 0 +7 2 1 -0.003217742434809291 2.13 4.48 -25 0 0 0 +8 2 1 0.0008273956785370801 6.39 4.48 -25 0 0 0 +9 2 1 -0.0003942558636157474 2.84 3.25 -25 0 0 0 +10 2 1 0.001288716009147968 7.1 3.25 -25 0 0 0 diff --git a/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj b/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj new file mode 100644 index 0000000000..79ad9a6fb7 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj @@ -0,0 +1,19 @@ +ITEM: TIMESTEP +0 +ITEM: NUMBER OF ATOMS +10 +ITEM: BOX BOUNDS pp pp ff +0.0000000000000000e+00 4.2600000000000001e+01 +0.0000000000000000e+00 4.4271200000000000e+01 +-2.5100000000000001e+01 2.5100000000000001e+01 +ITEM: ATOMS id x y z q p +1 2.84 8.17 -25 0.00258855 1.1 +2 7.1 8.17 -25 6.91952e-05 1.2 +3 2.13 6.94 -25 0.0105548 1.1 +4 6.39 6.94 -25 0.00420319 1.2 +5 2.84 5.71 -25 0.00919172 1.1 +6 7.1 5.71 -25 0.00479777 1.2 +7 2.13 4.48 -25 0.000636864 1.1 +8 6.39 4.48 -25 0.00587125 1.2 +9 2.84 3.25 -25 -0.00218125 1.1 +10 7.1 3.25 -25 0.0068891 1.2 diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 8ae48dfa41..3fb17695de 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -156,6 +156,7 @@ "LAMMPSdata_deletedatoms", # with deleted atoms "LAMMPSdata_triclinic", # lammpsdata file to test triclinic dimension parsing, albite with most atoms deleted "LAMMPSdata_PairIJ", # lammps datafile with a PairIJ Coeffs section + "LAMMPSdata_additional_columns", # structure for the additional column lammpstrj "LAMMPSDUMP", "LAMMPSDUMP_long", # lammpsdump file with a few zeros sprinkled in the first column first frame "LAMMPSDUMP_allcoords", # lammpsdump file with all coordinate conventions (x,xs,xu,xsu) present, from LAMMPS rdf example @@ -166,6 +167,7 @@ "LAMMPSDUMP_chain1", # Lammps dump file with chain reader "LAMMPSDUMP_chain2", # Lammps dump file with chain reader "LAMMPS_chain", # Lammps data file with chain reader + "LAMMPSDUMP_additional_columns", # lammpsdump file with additional data (an additional charge column) "unordered_res", # pdb file with resids non sequential "GMS_ASYMOPT", # GAMESS C1 optimization "GMS_SYMOPT", # GAMESS D4h optimization @@ -552,6 +554,8 @@ LAMMPSDUMP_chain2 = (_data_ref / "lammps/chain_dump_2.lammpstrj").as_posix() LAMMPS_chain = (_data_ref / "lammps/chain_initial.data").as_posix() LAMMPSdata_many_bonds = (_data_ref / "lammps/a_lot_of_bond_types.data").as_posix() +LAMMPSdata_additional_columns = (_data_ref / "lammps/additional_columns.data").as_posix() +LAMMPSDUMP_additional_columns = (_data_ref / "lammps/additional_columns.lammpstrj").as_posix() unordered_res = (_data_ref / "unordered_res.pdb").as_posix()