From 7a7dad7e444e1c73d3e7e598382cba48dfd6759d Mon Sep 17 00:00:00 2001 From: Jacob Wilkins <46597752+oerc0122@users.noreply.github.com> Date: Fri, 10 Jan 2025 13:42:13 +0000 Subject: [PATCH] Change md/geom parsing to frame function for easier use. (#176) - Fix issue with reading all frames of MD/Geom files - Add a function to parse a single frame of an MD calculation from a block enabling generator approaches. - Makes `Block.__bool__` verify it contains non-whitespace data. --- castep_outputs/parsers/md_geom_file_parser.py | 60 ++- castep_outputs/test/md.json | 397 ++++++++++++++++++ castep_outputs/test/md.yaml | 94 +++++ castep_outputs/test/test_md_geom_parser.py | 54 +-- castep_outputs/utilities/dumpers.py | 4 +- castep_outputs/utilities/filewrapper.py | 2 +- 6 files changed, 560 insertions(+), 51 deletions(-) diff --git a/castep_outputs/parsers/md_geom_file_parser.py b/castep_outputs/parsers/md_geom_file_parser.py index 03b3f73..1211458 100644 --- a/castep_outputs/parsers/md_geom_file_parser.py +++ b/castep_outputs/parsers/md_geom_file_parser.py @@ -7,6 +7,7 @@ from ..utilities.castep_res import ATOMIC_DATA_TAG, TAG_RE, get_numbers from ..utilities.constants import FST_D, TAG_ALIASES from ..utilities.datatypes import AtomIndex, ThreeByThreeMatrix, ThreeVector +from ..utilities.filewrapper import Block from ..utilities.utility import add_aliases, atreg_to_index, to_type @@ -67,47 +68,64 @@ class MDGeomTimestepInfo(TypedDict, total=False): S: ThreeByThreeMatrix -def parse_md_geom_file(md_geom_file: TextIO) -> list[MDGeomTimestepInfo]: +def parse_md_geom_frame(block: Block) -> MDGeomTimestepInfo: """ - Parse standard .md and .geom files. + Parse a single frame of a .md/.geom file. Parameters ---------- - md_geom_file - Open handle to file to parse. + block + Block containing frame of data. Returns ------- - list[MDGeomTimestepInfo] - Step-by-step Parsed info. + MDGeomTimestepInfo + Parsed frame of data. """ - while "END header" not in md_geom_file.readline(): - pass - - steps = [] curr: MDGeomTimestepInfo = defaultdict(list) curr["ions"] = {} - for line in md_geom_file: - if not line.strip(): # Next step - if curr and curr["ions"]: - add_aliases(curr, TAG_ALIASES) - for ion in curr["ions"].values(): - add_aliases(ion, TAG_ALIASES) - steps.append(curr) - curr = defaultdict(list) - curr["ions"] = {} + + for line in block: + if not line.strip(): + pass elif not TAG_RE.search(line): # Timestep curr["time"] = to_type(get_numbers(line)[0], float) elif match := ATOMIC_DATA_TAG.match(line): ion = atreg_to_index(match) - if ion not in curr["ions"]: - curr["ions"][ion] = {} + curr["ions"].setdefault(ion, {}) curr["ions"][ion][match.group("tag")] = to_type([match.group(d) for d in FST_D], float) elif match := TAG_RE.search(line): curr[match.group("tag")].append([*to_type(get_numbers(line), float)]) + add_aliases(curr, TAG_ALIASES) + for ion in curr["ions"].values(): + add_aliases(ion, TAG_ALIASES) + + return curr + +def parse_md_geom_file(md_geom_file: TextIO) -> list[MDGeomTimestepInfo]: + """ + Parse standard .md and .geom files. + + Parameters + ---------- + md_geom_file + Open handle to file to parse. + + Returns + ------- + list[MDGeomTimestepInfo] + Step-by-step Parsed info. + """ + while "END header" not in md_geom_file.readline(): + pass + md_geom_file.readline() + steps = [] + while block := Block.from_re("", md_geom_file, "", "^$", eof_possible=True): + steps.append(parse_md_geom_frame(block)) + return steps diff --git a/castep_outputs/test/md.json b/castep_outputs/test/md.json index 938ccef..8a6c394 100644 --- a/castep_outputs/test/md.json +++ b/castep_outputs/test/md.json @@ -395,5 +395,402 @@ -0.0002715826885868542 ] ] + }, + { + "ions": { + "Si_1": { + "R": [ + 0.012744294941511641, + 0.0031475895044957244, + 0.010483557256432526 + ], + "V": [ + 0.00015331190519307433, + 3.48946343947653e-05, + 0.00013547550516623848 + ], + "F": [ + -0.0044114263236281984, + -0.00469102307836962, + 0.007719718027258218 + ], + "position": [ + 0.012744294941511641, + 0.0031475895044957244, + 0.010483557256432526 + ], + "velocity": [ + 0.00015331190519307433, + 3.48946343947653e-05, + 0.00013547550516623848 + ], + "force": [ + -0.0044114263236281984, + -0.00469102307836962, + 0.007719718027258218 + ] + }, + "Si_2": { + "R": [ + 0.007223135537547241, + 5.17422118318903, + 5.339935205258146 + ], + "V": [ + 9.639286482142268e-05, + 0.00015557205280474068, + 0.00026755621083043906 + ], + "F": [ + 0.0090177288696785, + 0.006584225073473675, + -0.01969694320559347 + ], + "position": [ + 0.007223135537547241, + 5.17422118318903, + 5.339935205258146 + ], + "velocity": [ + 9.639286482142268e-05, + 0.00015557205280474068, + 0.00026755621083043906 + ], + "force": [ + 0.0090177288696785, + 0.006584225073473675, + -0.01969694320559347 + ] + }, + "Si_3": { + "R": [ + 5.183722016397011, + 0.009165366226882792, + 5.270173676392562 + ], + "V": [ + 0.00027141915149831943, + 0.00011945814360504374, + 4.2948865693523264e-05 + ], + "F": [ + 0.0051791328140053435, + 0.007982345714264035, + -0.021142145815895456 + ], + "position": [ + 5.183722016397011, + 0.009165366226882792, + 5.270173676392562 + ], + "velocity": [ + 0.00027141915149831943, + 0.00011945814360504374, + 4.2948865693523264e-05 + ], + "force": [ + 0.0051791328140053435, + 0.007982345714264035, + -0.021142145815895456 + ] + }, + "Si_4": { + "R": [ + 5.171620778486176, + 5.188712527635344, + 0.005401881771545275 + ], + "V": [ + 0.00011490162732095732, + 0.00032405725428895436, + 6.353055962862474e-05 + ], + "F": [ + -0.0038982262065240855, + -0.005516913298210962, + -0.00363186795998617 + ], + "position": [ + 5.171620778486176, + 5.188712527635344, + 0.005401881771545275 + ], + "velocity": [ + 0.00011490162732095732, + 0.00032405725428895436, + 6.353055962862474e-05 + ], + "force": [ + -0.0038982262065240855, + -0.005516913298210962, + -0.00363186795998617 + ] + }, + "Si_5": { + "R": [ + 7.865627890503028, + 2.5922910465500344, + 7.753633656230245 + ], + "V": [ + 0.00022380439611879922, + 0.00013708831797473156, + 0.00014010649582267966 + ], + "F": [ + -0.017468137533968822, + -0.002160057305721435, + 0.011759717884650078 + ], + "position": [ + 7.865627890503028, + 2.5922910465500344, + 7.753633656230245 + ], + "velocity": [ + 0.00022380439611879922, + 0.00013708831797473156, + 0.00014010649582267966 + ], + "force": [ + -0.017468137533968822, + -0.002160057305721435, + 0.011759717884650078 + ] + }, + "Si_6": { + "R": [ + 2.496946204169283, + 2.599153182939363, + 2.5951445151261545 + ], + "V": [ + 0.0002442973155652518, + 0.00022404164931934104, + 0.00018111056684752262 + ], + "F": [ + 0.009928394544057981, + 0.0008029716627710678, + 0.008607634246194224 + ], + "position": [ + 2.496946204169283, + 2.599153182939363, + 2.5951445151261545 + ], + "velocity": [ + 0.0002442973155652518, + 0.00022404164931934104, + 0.00018111056684752262 + ], + "force": [ + 0.009928394544057981, + 0.0008029716627710678, + 0.008607634246194224 + ] + }, + "Si_7": { + "R": [ + 2.602492317857248, + 7.870971278165349, + 7.756252407248658 + ], + "V": [ + 0.00026625495463489766, + 0.00029407254229973787, + 0.00017037584469949238 + ], + "F": [ + 0.002125445771926847, + -0.012057481153662996, + 0.009357271722404015 + ], + "position": [ + 2.602492317857248, + 7.870971278165349, + 7.756252407248658 + ], + "velocity": [ + 0.00026625495463489766, + 0.00029407254229973787, + 0.00017037584469949238 + ], + "force": [ + 0.002125445771926847, + -0.012057481153662996, + 0.009357271722404015 + ] + }, + "Si_8": { + "R": [ + 7.751145702037532, + 7.650794328951599, + 2.6099972908325 + ], + "V": [ + 9.937744963960346e-05, + 0.00014281825472689115, + 0.00036272354679770376 + ], + "F": [ + -0.0004729119355475677, + 0.009055932385456236, + 0.007026615100968557 + ], + "position": [ + 7.751145702037532, + 7.650794328951599, + 2.6099972908325 + ], + "velocity": [ + 9.937744963960346e-05, + 0.00014281825472689115, + 0.00036272354679770376 + ], + "force": [ + -0.0004729119355475677, + 0.009055932385456236, + 0.007026615100968557 + ] + } + }, + "time": 82.68274668837904, + "E": [ + [ + -31.43997547379767, + -31.374678859949793, + 0.024052714949383245 + ] + ], + "T": [ + [ + 0.002004392912448604 + ] + ], + "P": [ + [ + 8.7348209776675e-05 + ] + ], + "h": [ + [ + 10.324059500983507, + 0.0, + 0.0 + ], + [ + 0.0, + 10.324059500983507, + 0.0 + ], + [ + 0.0, + 0.0, + 10.324059500983507 + ] + ], + "hv": [ + [ + 0.00011795963543494103, + 0.0, + 0.0 + ], + [ + 0.0, + 0.00011795963543494103, + 0.0 + ], + [ + 0.0, + 0.0, + 0.00011795963543494103 + ] + ], + "S": [ + [ + 0.00010458765776711853, + 3.972062174895196e-05, + 8.193016017703576e-05 + ], + [ + 3.972062174895196e-05, + -0.00016019777020622887, + 0.0001503701669484797 + ], + [ + 8.193016017703576e-05, + 0.0001503701669484797, + -0.0002064345168909147 + ] + ], + "energy": [ + [ + -31.43997547379767, + -31.374678859949793, + 0.024052714949383245 + ] + ], + "temperature": [ + [ + 0.002004392912448604 + ] + ], + "pressure": [ + [ + 8.7348209776675e-05 + ] + ], + "lattice_vectors": [ + [ + 10.324059500983507, + 0.0, + 0.0 + ], + [ + 0.0, + 10.324059500983507, + 0.0 + ], + [ + 0.0, + 0.0, + 10.324059500983507 + ] + ], + "lattice_velocity": [ + [ + 0.00011795963543494103, + 0.0, + 0.0 + ], + [ + 0.0, + 0.00011795963543494103, + 0.0 + ], + [ + 0.0, + 0.0, + 0.00011795963543494103 + ] + ], + "stress": [ + [ + 0.00010458765776711853, + 3.972062174895196e-05, + 8.193016017703576e-05 + ], + [ + 3.972062174895196e-05, + -0.00016019777020622887, + 0.0001503701669484797 + ], + [ + 8.193016017703576e-05, + 0.0001503701669484797, + -0.0002064345168909147 + ] + ] } ] \ No newline at end of file diff --git a/castep_outputs/test/md.yaml b/castep_outputs/test/md.yaml index 465d40a..2c27439 100644 --- a/castep_outputs/test/md.yaml +++ b/castep_outputs/test/md.yaml @@ -92,3 +92,97 @@ temperature: - [0.002106468068283934] time: 0.0 +- E: + - [-31.43997547379767, -31.374678859949793, 0.024052714949383245] + P: + - [8.7348209776675e-05] + S: + - [0.00010458765776711853, 3.972062174895196e-05, 8.193016017703576e-05] + - [3.972062174895196e-05, -0.00016019777020622887, 0.0001503701669484797] + - [8.193016017703576e-05, 0.0001503701669484797, -0.0002064345168909147] + T: + - [0.002004392912448604] + energy: + - [-31.43997547379767, -31.374678859949793, 0.024052714949383245] + h: + - [10.324059500983507, 0.0, 0.0] + - [0.0, 10.324059500983507, 0.0] + - [0.0, 0.0, 10.324059500983507] + hv: + - [0.00011795963543494103, 0.0, 0.0] + - [0.0, 0.00011795963543494103, 0.0] + - [0.0, 0.0, 0.00011795963543494103] + ions: + ? !!python/tuple [Si, 1] + : F: [-0.0044114263236281984, -0.00469102307836962, 0.007719718027258218] + R: [0.012744294941511641, 0.0031475895044957244, 0.010483557256432526] + V: [0.00015331190519307433, 3.48946343947653e-05, 0.00013547550516623848] + force: [-0.0044114263236281984, -0.00469102307836962, 0.007719718027258218] + position: [0.012744294941511641, 0.0031475895044957244, 0.010483557256432526] + velocity: [0.00015331190519307433, 3.48946343947653e-05, 0.00013547550516623848] + ? !!python/tuple [Si, 2] + : F: [0.0090177288696785, 0.006584225073473675, -0.01969694320559347] + R: [0.007223135537547241, 5.17422118318903, 5.339935205258146] + V: [9.639286482142268e-05, 0.00015557205280474068, 0.00026755621083043906] + force: [0.0090177288696785, 0.006584225073473675, -0.01969694320559347] + position: [0.007223135537547241, 5.17422118318903, 5.339935205258146] + velocity: [9.639286482142268e-05, 0.00015557205280474068, 0.00026755621083043906] + ? !!python/tuple [Si, 3] + : F: [0.0051791328140053435, 0.007982345714264035, -0.021142145815895456] + R: [5.183722016397011, 0.009165366226882792, 5.270173676392562] + V: [0.00027141915149831943, 0.00011945814360504374, 4.2948865693523264e-05] + force: [0.0051791328140053435, 0.007982345714264035, -0.021142145815895456] + position: [5.183722016397011, 0.009165366226882792, 5.270173676392562] + velocity: [0.00027141915149831943, 0.00011945814360504374, 4.2948865693523264e-05] + ? !!python/tuple [Si, 4] + : F: [-0.0038982262065240855, -0.005516913298210962, -0.00363186795998617] + R: [5.171620778486176, 5.188712527635344, 0.005401881771545275] + V: [0.00011490162732095732, 0.00032405725428895436, 6.353055962862474e-05] + force: [-0.0038982262065240855, -0.005516913298210962, -0.00363186795998617] + position: [5.171620778486176, 5.188712527635344, 0.005401881771545275] + velocity: [0.00011490162732095732, 0.00032405725428895436, 6.353055962862474e-05] + ? !!python/tuple [Si, 5] + : F: [-0.017468137533968822, -0.002160057305721435, 0.011759717884650078] + R: [7.865627890503028, 2.5922910465500344, 7.753633656230245] + V: [0.00022380439611879922, 0.00013708831797473156, 0.00014010649582267966] + force: [-0.017468137533968822, -0.002160057305721435, 0.011759717884650078] + position: [7.865627890503028, 2.5922910465500344, 7.753633656230245] + velocity: [0.00022380439611879922, 0.00013708831797473156, 0.00014010649582267966] + ? !!python/tuple [Si, 6] + : F: [0.009928394544057981, 0.0008029716627710678, 0.008607634246194224] + R: [2.496946204169283, 2.599153182939363, 2.5951445151261545] + V: [0.0002442973155652518, 0.00022404164931934104, 0.00018111056684752262] + force: [0.009928394544057981, 0.0008029716627710678, 0.008607634246194224] + position: [2.496946204169283, 2.599153182939363, 2.5951445151261545] + velocity: [0.0002442973155652518, 0.00022404164931934104, 0.00018111056684752262] + ? !!python/tuple [Si, 7] + : F: [0.002125445771926847, -0.012057481153662996, 0.009357271722404015] + R: [2.602492317857248, 7.870971278165349, 7.756252407248658] + V: [0.00026625495463489766, 0.00029407254229973787, 0.00017037584469949238] + force: [0.002125445771926847, -0.012057481153662996, 0.009357271722404015] + position: [2.602492317857248, 7.870971278165349, 7.756252407248658] + velocity: [0.00026625495463489766, 0.00029407254229973787, 0.00017037584469949238] + ? !!python/tuple [Si, 8] + : F: [-0.0004729119355475677, 0.009055932385456236, 0.007026615100968557] + R: [7.751145702037532, 7.650794328951599, 2.6099972908325] + V: [9.937744963960346e-05, 0.00014281825472689115, 0.00036272354679770376] + force: [-0.0004729119355475677, 0.009055932385456236, 0.007026615100968557] + position: [7.751145702037532, 7.650794328951599, 2.6099972908325] + velocity: [9.937744963960346e-05, 0.00014281825472689115, 0.00036272354679770376] + lattice_vectors: + - [10.324059500983507, 0.0, 0.0] + - [0.0, 10.324059500983507, 0.0] + - [0.0, 0.0, 10.324059500983507] + lattice_velocity: + - [0.00011795963543494103, 0.0, 0.0] + - [0.0, 0.00011795963543494103, 0.0] + - [0.0, 0.0, 0.00011795963543494103] + pressure: + - [8.7348209776675e-05] + stress: + - [0.00010458765776711853, 3.972062174895196e-05, 8.193016017703576e-05] + - [3.972062174895196e-05, -0.00016019777020622887, 0.0001503701669484797] + - [8.193016017703576e-05, 0.0001503701669484797, -0.0002064345168909147] + temperature: + - [0.002004392912448604] + time: 82.68274668837904 diff --git a/castep_outputs/test/test_md_geom_parser.py b/castep_outputs/test/test_md_geom_parser.py index 6711627..ddbba5e 100644 --- a/castep_outputs/test/test_md_geom_parser.py +++ b/castep_outputs/test/test_md_geom_parser.py @@ -1,6 +1,5 @@ # pylint: skip-file import io -import pprint from unittest import TestCase, main from castep_outputs import parse_md_geom_file @@ -12,6 +11,7 @@ def test_parser(self): BEGIN header END header + 0.0000000000000000E+000 -3.1438056609022318E+001 -3.1376043990507860E+001 2.5277616819407205E-002 <-- E 2.1064680682839339E-003 <-- T @@ -35,11 +35,11 @@ def test_parser(self): """) test_dict = parse_md_geom_file(test_text) - self.assertEqual(test_dict, [{'E': [[-31.43805660902232, + self.assertEqual(test_dict, [{"E": [[-31.43805660902232, -31.37604399050786, 0.025277616819407205]], - 'P': [[0.00014675541909861883]], - 'S': [[5.394766743433992e-05, + "P": [[0.00014675541909861883]], + "S": [[5.394766743433992e-05, 3.556753376834879e-05, 8.160736723805646e-05], [3.556753376834879e-05, @@ -48,54 +48,54 @@ def test_parser(self): [8.160736723805646e-05, 0.00015275407397741958, -0.0002715826885868542]], - 'T': [[0.002106468068283934]], - 'energy': [[-31.43805660902232, + "T": [[0.002106468068283934]], + "energy": [[-31.43805660902232, -31.37604399050786, 0.025277616819407205]], - 'h': [[10.261212863294524, 0.0, 0.0], + "h": [[10.261212863294524, 0.0, 0.0], [0.0, 10.261212863294524, 0.0], [0.0, 0.0, 10.261212863294524]], - 'hv': [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], - 'ions': {('Si', 1): {'F': (-0.004748515054133074, + "hv": [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], + "ions": {("Si", 1): {"F": (-0.004748515054133074, -0.006425692354875657, 0.006272275443637005), - 'R': (0.0, 0.0, 0.0), - 'V': (0.0001646132642834471, + "R": (0.0, 0.0, 0.0), + "V": (0.0001646132642834471, 4.5089543337825654e-05, 0.00012682182569746878), - 'force': (-0.004748515054133074, + "force": (-0.004748515054133074, -0.006425692354875657, 0.006272275443637005), - 'position': (0.0, 0.0, 0.0), - 'velocity': (0.0001646132642834471, + "position": (0.0, 0.0, 0.0), + "velocity": (0.0001646132642834471, 4.5089543337825654e-05, 0.00012682182569746878)}, - ('Si', 2): {'F': (0.007374644351523948, + ("Si", 2): {"F": (0.007374644351523948, 0.006270380992687044, -0.019171200407951994), - 'R': (0.0, + "R": (0.0, 5.130606431647262, 5.28452462459668), - 'V': (8.480309957891598e-05, + "V": (8.480309957891598e-05, 0.0001483463437144886, 0.00030659125052563604), - 'force': (0.007374644351523948, + "force": (0.007374644351523948, 0.006270380992687044, -0.019171200407951994), - 'position': (0.0, + "position": (0.0, 5.130606431647262, 5.28452462459668), - 'velocity': (8.480309957891598e-05, + "velocity": (8.480309957891598e-05, 0.0001483463437144886, 0.00030659125052563604)}}, - 'lattice_vectors': [[10.261212863294524, 0.0, 0.0], + "lattice_vectors": [[10.261212863294524, 0.0, 0.0], [0.0, 10.261212863294524, 0.0], [0.0, 0.0, 10.261212863294524]], - 'lattice_velocity': [[0.0, 0.0, 0.0], + "lattice_velocity": [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], - 'pressure': [[0.00014675541909861883]], - 'stress': [[5.394766743433992e-05, + "pressure": [[0.00014675541909861883]], + "stress": [[5.394766743433992e-05, 3.556753376834879e-05, 8.160736723805646e-05], [3.556753376834879e-05, @@ -104,9 +104,9 @@ def test_parser(self): [8.160736723805646e-05, 0.00015275407397741958, -0.0002715826885868542]], - 'temperature': [[0.002106468068283934]], - 'time': 0.0}]) + "temperature": [[0.002106468068283934]], + "time": 0.0}]) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/castep_outputs/utilities/dumpers.py b/castep_outputs/utilities/dumpers.py index eb6a04c..cfeed4a 100644 --- a/castep_outputs/utilities/dumpers.py +++ b/castep_outputs/utilities/dumpers.py @@ -36,7 +36,7 @@ def json_dumper(data: Any, file: TextIO): def ruamel_dumper(data: Any, file: TextIO): """ - ruamel.yaml format dumper. + YAML (ruamel.yaml) format dumper. Parameters ---------- @@ -51,7 +51,7 @@ def ruamel_dumper(data: Any, file: TextIO): def yaml_dumper(data: Any, file: TextIO): """ - yaml format dumper. + YAML (pyyaml) format dumper. Parameters ---------- diff --git a/castep_outputs/utilities/filewrapper.py b/castep_outputs/utilities/filewrapper.py index d2496ec..4bdec37 100644 --- a/castep_outputs/utilities/filewrapper.py +++ b/castep_outputs/utilities/filewrapper.py @@ -308,7 +308,7 @@ def rewind(self): self._i -= 1 def __bool__(self): - return bool(self._data) + return any(map(str.strip, self._data)) def __str__(self): return "\n".join(self._data)