Skip to content

Commit

Permalink
Change md/geom parsing to frame function for easier use. (#176)
Browse files Browse the repository at this point in the history
- Fix issue with reading all frames of MD/Geom files
- Add a function to parse a single frame of an MD calculation from a block enabling generator approaches.
- Makes `Block.__bool__` verify it contains non-whitespace data.
  • Loading branch information
oerc0122 authored Jan 10, 2025
1 parent 9fe6e0c commit 7a7dad7
Show file tree
Hide file tree
Showing 6 changed files with 560 additions and 51 deletions.
60 changes: 39 additions & 21 deletions castep_outputs/parsers/md_geom_file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from ..utilities.castep_res import ATOMIC_DATA_TAG, TAG_RE, get_numbers
from ..utilities.constants import FST_D, TAG_ALIASES
from ..utilities.datatypes import AtomIndex, ThreeByThreeMatrix, ThreeVector
from ..utilities.filewrapper import Block
from ..utilities.utility import add_aliases, atreg_to_index, to_type


Expand Down Expand Up @@ -67,47 +68,64 @@ class MDGeomTimestepInfo(TypedDict, total=False):
S: ThreeByThreeMatrix


def parse_md_geom_file(md_geom_file: TextIO) -> list[MDGeomTimestepInfo]:
def parse_md_geom_frame(block: Block) -> MDGeomTimestepInfo:
"""
Parse standard .md and .geom files.
Parse a single frame of a .md/.geom file.
Parameters
----------
md_geom_file
Open handle to file to parse.
block
Block containing frame of data.
Returns
-------
list[MDGeomTimestepInfo]
Step-by-step Parsed info.
MDGeomTimestepInfo
Parsed frame of data.
"""
while "END header" not in md_geom_file.readline():
pass

steps = []
curr: MDGeomTimestepInfo = defaultdict(list)
curr["ions"] = {}
for line in md_geom_file:
if not line.strip(): # Next step
if curr and curr["ions"]:
add_aliases(curr, TAG_ALIASES)
for ion in curr["ions"].values():
add_aliases(ion, TAG_ALIASES)
steps.append(curr)
curr = defaultdict(list)
curr["ions"] = {}

for line in block:
if not line.strip():
pass
elif not TAG_RE.search(line): # Timestep
curr["time"] = to_type(get_numbers(line)[0], float)

elif match := ATOMIC_DATA_TAG.match(line):
ion = atreg_to_index(match)
if ion not in curr["ions"]:
curr["ions"][ion] = {}
curr["ions"].setdefault(ion, {})
curr["ions"][ion][match.group("tag")] = to_type([match.group(d) for d in FST_D], float)

elif match := TAG_RE.search(line):
curr[match.group("tag")].append([*to_type(get_numbers(line), float)])

add_aliases(curr, TAG_ALIASES)
for ion in curr["ions"].values():
add_aliases(ion, TAG_ALIASES)

return curr

def parse_md_geom_file(md_geom_file: TextIO) -> list[MDGeomTimestepInfo]:
"""
Parse standard .md and .geom files.
Parameters
----------
md_geom_file
Open handle to file to parse.
Returns
-------
list[MDGeomTimestepInfo]
Step-by-step Parsed info.
"""
while "END header" not in md_geom_file.readline():
pass
md_geom_file.readline()
steps = []
while block := Block.from_re("", md_geom_file, "", "^$", eof_possible=True):
steps.append(parse_md_geom_frame(block))

return steps


Expand Down
Loading

0 comments on commit 7a7dad7

Please sign in to comment.