From 95fccd47590a0e872185888a954af32a6ed65b8e Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Fri, 6 Sep 2024 14:46:11 -0400 Subject: [PATCH] parser cleanup/organization, tuple structuring working (but still clunky) --- flopy4/lark/__init__.py | 83 -------------------------- flopy4/mf6/io/__init__.py | 4 ++ flopy4/mf6/io/parser.py | 110 +++++++++++++++++++++++++++++++++++ flopy4/mf6/io/transformer.py | 91 +++++++++++++++++++++++++++++ test/test_gwfoc.py | 81 ++++++++++++++------------ test/test_lark.py | 93 +++++------------------------ 6 files changed, 262 insertions(+), 200 deletions(-) delete mode 100644 flopy4/lark/__init__.py create mode 100644 flopy4/mf6/io/__init__.py create mode 100644 flopy4/mf6/io/parser.py create mode 100644 flopy4/mf6/io/transformer.py diff --git a/flopy4/lark/__init__.py b/flopy4/lark/__init__.py deleted file mode 100644 index 77492db..0000000 --- a/flopy4/lark/__init__.py +++ /dev/null @@ -1,83 +0,0 @@ -from lark import Lark - -MF6_GRAMMAR = r""" -// component -component: _NL* (block _NL+)* _NL* - -// blocks -block: _paramsblock | _listblock -_paramsblock: _BEGIN paramsblockname _NL params _END paramsblockname -_listblock: _BEGIN listblockname _NL list _END listblockname -paramsblockname: PARAMSBLOCKNAME -listblockname: LISTBLOCKNAME [_blockindex] -_blockindex: INT -_BEGIN: "begin"i -_END: "end"i - -// parameters (higher priority than lists -// since list of records will match also) -params.1: (param _NL)* -param: key | _pair -_pair: key value -key: PARAMNAME -?value: array - | list - | path - | string - | int - | float - -// string -word: WORD -?string: word+ - -// numbers -int: INT -float: FLOAT - -// file path -path: INOUT PATH -PATH: [_PATHSEP] (NON_SEPARATOR_STRING [_PATHSEP]) [NON_SEPARATOR_STRING] -_PATHSEP: "/" -INOUT: "filein"i|"fileout"i - -// array -array: constantarray | internalarray | externalarray -constantarray: "CONSTANT" float -internalarray: "INTERNAL" [factor] [iprn] (float* [_NL])* -externalarray: "OPEN/CLOSE" PATH [factor] ["binary"] [iprn] -factor: "FACTOR" NUMBER -iprn: "IPRN" INT - -// list adapted from https://github.com/lark-parser/lark/blob/master/examples/composition/csv.lark -list.-1: record* -// negative priority for records bc -// the pattern is so indiscriminate. -record.-1: _anything+ _NL -_anything: int | float | word -NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ - -// newline -_NL: /(\r?\n[\t ]*)+/ - -// TODO: -// a parameter key file can be generated -// with the rest of the plugin interface -// rather than known keys hardcoded here -// (and likewise for block names) -PARAMNAME: ("K"|"I"|"D"|"S"|"F"|"A") -PARAMSBLOCKNAME: ("OPTIONS"|"PACKAGEDATA") -LISTBLOCKNAME: "PERIOD" - -%import common.SH_COMMENT -> COMMENT -%import common.SIGNED_NUMBER -> NUMBER -%import common.SIGNED_INT -> INT -%import common.SIGNED_FLOAT -> FLOAT -%import common.WORD -%import common.WS_INLINE - -%ignore COMMENT -%ignore WS_INLINE -""" - -MF6_PARSER = Lark(MF6_GRAMMAR, start="component") diff --git a/flopy4/mf6/io/__init__.py b/flopy4/mf6/io/__init__.py new file mode 100644 index 0000000..2ac26a9 --- /dev/null +++ b/flopy4/mf6/io/__init__.py @@ -0,0 +1,4 @@ +__all__ = ["make_parser", "MF6Transformer"] + +from flopy4.mf6.io.parser import make_parser +from flopy4.mf6.io.transformer import MF6Transformer diff --git a/flopy4/mf6/io/parser.py b/flopy4/mf6/io/parser.py new file mode 100644 index 0000000..31c166b --- /dev/null +++ b/flopy4/mf6/io/parser.py @@ -0,0 +1,110 @@ +from os import linesep +from typing import Iterable + +from lark import Lark + +MF6_GRAMMAR = r""" +// component +component: _NL* (block _NL+)* _NL* + +// block +block: _paramblock | _listblock +_paramblock: _BEGIN paramblock _NL params _END paramblock +_listblock: _BEGIN listblock _NL list _END listblock +paramblock: PARAMBLOCK +listblock: LISTBLOCK [_blockindex] +_blockindex: INT +_BEGIN: "begin"i +_END: "end"i + +// parameter +params.1: (param _NL)* +param: key | _pair +_pair: key value +key: PARAM +?value: array + | list + | path + | string + | scalar +?scalar: int + | float + | word + +// string +word: WORD +?string: word+ +NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ + +// number +int: INT +float: FLOAT + +// file path +path: INOUT PATH +PATH: [_PATHSEP] (NON_SEPARATOR_STRING [_PATHSEP]) [NON_SEPARATOR_STRING] +_PATHSEP: "/" +INOUT: "filein"i|"fileout"i + +// array +array: constantarray | internalarray | externalarray +constantarray: "CONSTANT" float +internalarray: "INTERNAL" [factor] [iprn] (float* [_NL])* +externalarray: "OPEN/CLOSE" PATH [factor] ["binary"] [iprn] +factor: "FACTOR" NUMBER +iprn: "IPRN" INT + +// list adapted from https://github.com/lark-parser/lark/blob/master/examples/composition/csv.lark +// negative priority for records because the pattern is so indiscriminate +list.-1: record* +record.-1: _record+ _NL +_record: scalar + +// newline +_NL: /(\r?\n[\t ]*)+/ + +%import common.SH_COMMENT -> COMMENT +%import common.SIGNED_NUMBER -> NUMBER +%import common.SIGNED_INT -> INT +%import common.SIGNED_FLOAT -> FLOAT +%import common.WORD +%import common.WS_INLINE + +%ignore COMMENT +%ignore WS_INLINE +""" +""" +EBNF description for the MODFLOW 6 input language. +""" + + +def make_parser( + params: Iterable[str], + param_blocks: Iterable[str], + list_blocks: Iterable[str], +): + """ + Create a parser for the MODFLOW 6 input language with the given + parameter and block specification. + + Notes + ----- + We specify blocks containing parameters separately from blocks + that contain a list. These must be handled separately because + the pattern for list elements (records) casts a wider net than + the pattern for parameters, causing parameter blocks to parse + as lists otherwise. + + """ + params = "|".join(['"' + n + '"i' for n in params]) + param_blocks = "|".join(['"' + n + '"i' for n in param_blocks]) + list_blocks = "|".join(['"' + n + '"i' for n in list_blocks]) + grammar = linesep.join( + [ + MF6_GRAMMAR, + f"PARAM: ({params})", + f"PARAMBLOCK: ({param_blocks})", + f"LISTBLOCK: ({list_blocks})", + ] + ) + return Lark(grammar, start="component") diff --git a/flopy4/mf6/io/transformer.py b/flopy4/mf6/io/transformer.py new file mode 100644 index 0000000..51e9e38 --- /dev/null +++ b/flopy4/mf6/io/transformer.py @@ -0,0 +1,91 @@ +from pathlib import Path + +import numpy as np +from lark import Transformer + + +class MF6Transformer(Transformer): + """ + Transforms a parse tree for the MODFLOW 6 input language + into a nested dictionary AST suitable for structuring to + a strongly-typed input data model. + + Notes + ----- + Each function represents a node in the tree. Its argument + is a list of its children. Nodes are processed bottom-up, + so non-leaf functions can assume they will get a list of + primitives which are already in the right representation. + + See https://lark-parser.readthedocs.io/en/stable/visitors.html#transformer + for more info. + """ + + def key(self, k): + (k,) = k + return str(k).lower() + + def word(self, w): + (w,) = w + return str(w) + + def path(self, p): + _, p = p + return Path(p) + + def string(self, s): + return " ".join(s) + + def int(self, i): + (i,) = i + return int(i) + + def float(self, f): + (f,) = f + return float(f) + + def array(self, a): + (a,) = a + return a + + def constantarray(self, a): + # TODO factor out `ConstantArray` + # array-like class from `MFArray` + # with deferred shape and use it + pass + + def internalarray(self, a): + factor = a[0] + array = np.array(a[2:]) + if factor is not None: + array *= factor + return array + + def externalarray(self, a): + # TODO + pass + + record = tuple + list = list + + def param(self, p): + k = p[0] + v = True if len(p) == 1 else p[1] + return k, v + + params = dict + + def block(self, b): + return tuple(b[:2]) + + def paramblock(self, bn): + return str(bn[0]).lower() + + def listblock(self, bn): + name = str(bn[0]) + if len(bn) == 2: + index = int(bn[1]) + name = f"{name} {index}" + return name.lower() + + component = dict diff --git a/test/test_gwfoc.py b/test/test_gwfoc.py index 4d86f7c..42caf21 100644 --- a/test/test_gwfoc.py +++ b/test/test_gwfoc.py @@ -1,7 +1,7 @@ from pathlib import Path from typing import Dict, List, Literal, Optional, Union -import pytest +from cattrs import Converter from flopy4.attrs import context, is_frozen, param, params, to_path @@ -103,16 +103,30 @@ class Frequency: # choices in the union. There is likely a better way. -StepSelection = Union[All, First, Last, Steps, Frequency] -OutputAction = Literal["print", "save"] -OutputVariable = Literal["budget", "head"] +PrintSave = Literal["print", "save"] +RType = Literal["budget", "head"] +OCSetting = Union[All, First, Last, Steps, Frequency] @context class OutputControlData: - action: OutputAction = param() - variable: OutputVariable = param() - ocsetting: StepSelection = param() + printsave: PrintSave = param() + rtype: RType = param() + ocsetting: OCSetting = param() + + @classmethod + def from_tuple(cls, t): + t = list(t) + printsave = t.pop(0) + rtype = t.pop(0) + ocsetting = { + "all": All, + "first": First, + "last": Last, + "steps": Steps, + "frequency": Frequency, + }[t.pop(0).lower()](t) + return cls(printsave, rtype, ocsetting) Period = List[OutputControlData] @@ -131,6 +145,18 @@ class GwfOc: ) +# Converter + +converter = Converter() + + +def output_control_data_hook(value, type) -> OutputControlData: + return OutputControlData.from_tuple(value) + + +converter.register_structure_hook(OutputControlData, output_control_data_hook) + + # Tests @@ -141,7 +167,7 @@ def test_spec(): assert not is_frozen(OutputControlData) ocsetting = spec["ocsetting"] - assert ocsetting.type is StepSelection + assert ocsetting.type is OCSetting def test_options_to_dict(): @@ -152,29 +178,17 @@ def test_options_to_dict(): assert len(options.to_dict()) == 4 -def test_output_control_data_from_dict(): - # from dict - ocdata = OutputControlData.from_dict( - { - "action": "print", - "variable": "budget", - "ocsetting": {"steps": [1, 3, 5]}, - } - ) - assert ocdata.action == "print" - - -@pytest.mark.xfail(reason="todo") def test_output_control_data_from_tuple(): ocdata = OutputControlData.from_tuple( ("print", "budget", "steps", 1, 3, 5) ) - assert ocdata.action == "print" - assert ocdata.variable == "budget" + assert ocdata.printsave == "print" + assert ocdata.rtype == "budget" + assert ocdata.ocsetting == Steps([1, 3, 5]) def test_gwfoc_from_dict(): - gwfoc = GwfOc.from_dict( + gwfoc = converter.structure( { "options": { "budget_file": "some/file/path.cbc", @@ -188,25 +202,18 @@ def test_gwfoc_from_dict(): }, "periods": [ [ - { - "action": "print", - "variable": "budget", - "ocsetting": {"steps": [1, 3, 5]}, - }, - { - "action": "save", - "variable": "head", - "ocsetting": {"frequency": 2}, - }, + ("print", "budget", "steps", 1, 3, 5), + ("save", "head", "frequency", 2), ] ], - } + }, + GwfOc, ) assert gwfoc.options.budget_file == Path("some/file/path.cbc") assert gwfoc.options.print_format.width == 10 assert gwfoc.options.print_format.array_format == "scientific" period = gwfoc.periods[0] assert len(period) == 2 - assert period[0] == OutputControlData( - action="print", variable="budget", ocsetting=Steps([1, 3, 5]) + assert period[0] == OutputControlData.from_tuple( + ("print", "budget", "steps", 1, 3, 5) ) diff --git a/test/test_lark.py b/test/test_lark.py index 23c0dba..6dd3b95 100644 --- a/test/test_lark.py +++ b/test/test_lark.py @@ -2,11 +2,10 @@ from pathlib import Path import numpy as np -from lark import Transformer -from flopy4.lark import MF6_PARSER +from flopy4.mf6.io import MF6Transformer, make_parser -TEST_COMPONENT = """ +COMPONENT = """ BEGIN OPTIONS K I 1 @@ -30,90 +29,24 @@ """ +PARSER = make_parser( + params=["k", "i", "d", "s", "f", "a"], + param_blocks=["options", "packagedata"], + list_blocks=["period"], +) +TRANSFORMER = MF6Transformer() + + def test_parse(): - tree = MF6_PARSER.parse(TEST_COMPONENT) + tree = PARSER.parse(COMPONENT) # view the parse tree with e.g. # pytest test/test_lark.py::test_parse -s print(linesep + tree.pretty()) -class MF6Transformer(Transformer): - def key(self, k): - (k,) = k - return str(k).lower() - - def word(self, w): - (w,) = w - return str(w) - - def path(self, p): - _, p = p - return Path(p) - - def string(self, s): - return " ".join(s) - - def int(self, i): - (i,) = i - return int(i) - - def float(self, f): - (f,) = f - return float(f) - - def array(self, a): - (a,) = a - return a - - def constantarray(self, a): - # TODO factor out `ConstantArray` - # array-like class from `MFArray` - # with deferred shape and use it - pass - - def internalarray(self, a): - factor = a[0] - array = np.array(a[2:]) - if factor is not None: - array *= factor - return array - - def externalarray(self, a): - # TODO - pass - - record = tuple - list = list - - def param(self, p): - k = p[0] - v = True if len(p) == 1 else p[1] - return k, v - - params = dict - - def block(self, b): - return tuple(b[:2]) - - def paramsblockname(self, bn): - return str(bn[0]).lower() - - def listblockname(self, bn): - name = str(bn[0]) - if len(bn) == 2: - index = int(bn[1]) - name = f"{name} {index}" - return name.lower() - - component = dict - - -MF6_TRANSFORMER = MF6Transformer() - - def test_transform(): - tree = MF6_PARSER.parse(TEST_COMPONENT) - data = MF6_TRANSFORMER.transform(tree) + tree = PARSER.parse(COMPONENT) + data = TRANSFORMER.transform(tree) assert data["options"] == { "d": 1.0, "f": Path("some/path"),