From b205400b507584a8df35b8d80bf426e24710d2d0 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Thu, 5 Sep 2024 22:47:43 -0400 Subject: [PATCH] lark transformer working --- flopy4/lark/__init__.py | 79 ++++++++++++++++----------- pyproject.toml | 3 +- test/test_lark.py | 115 ++++++++++++++++++++++++++++++++++------ 3 files changed, 149 insertions(+), 48 deletions(-) diff --git a/flopy4/lark/__init__.py b/flopy4/lark/__init__.py index 8f81a2c..77492db 100644 --- a/flopy4/lark/__init__.py +++ b/flopy4/lark/__init__.py @@ -1,26 +1,39 @@ from lark import Lark MF6_GRAMMAR = r""" -?start: _NL* _item* -_item: (block | COMMENT) _NL+ +// component +component: _NL* (block _NL+)* _NL* -// block -block: _begin _NL params _end -_begin: _BEGIN name [index] -_end: _END name -name: WORD -index: INT +// blocks +block: _paramsblock | _listblock +_paramsblock: _BEGIN paramsblockname _NL params _END paramsblockname +_listblock: _BEGIN listblockname _NL list _END listblockname +paramsblockname: PARAMSBLOCKNAME +listblockname: LISTBLOCKNAME [_blockindex] +_blockindex: INT _BEGIN: "begin"i _END: "end"i -// parameter -params: (param _NL)* -param: _key [_value] -_key: KEYS -_value: NUMBER | path | string | array | list +// parameters (higher priority than lists +// since list of records will match also) +params.1: (param _NL)* +param: key | _pair +_pair: key value +key: PARAMNAME +?value: array + | list + | path + | string + | int + | float // string -string: WORD+ +word: WORD +?string: word+ + +// numbers +int: INT +float: FLOAT // file path path: INOUT PATH @@ -30,39 +43,41 @@ // array array: constantarray | internalarray | externalarray -constantarray: "CONSTANT" NUMBER -internalarray: "INTERNAL" [factor] [iprn] (NUMBER* [_NL])* -externalarray: "OPEN/CLOSE" WORD [factor] ["binary"] [iprn] +constantarray: "CONSTANT" float +internalarray: "INTERNAL" [factor] [iprn] (float* [_NL])* +externalarray: "OPEN/CLOSE" PATH [factor] ["binary"] [iprn] factor: "FACTOR" NUMBER iprn: "IPRN" INT -// list (adapted from https://github.com/lark-parser/lark/blob/master/examples/composition/csv.lark) -list: header _NL row* -header: "#" " "? (WORD _SEPARATOR?)+ -row: (_anything _SEPARATOR?)+ _NL -_anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT +// list adapted from https://github.com/lark-parser/lark/blob/master/examples/composition/csv.lark +list.-1: record* +// negative priority for records bc +// the pattern is so indiscriminate. +record.-1: _anything+ _NL +_anything: int | float | word NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ -_SEPARATOR: /[ ]+/ - | "\t" - | "," // newline _NL: /(\r?\n[\t ]*)+/ -// parameter keys file can be generated +// TODO: +// a parameter key file can be generated // with the rest of the plugin interface -// and maybe placed in a separate file -KEYS: "K"|"I"|"D"|"S"|"F"|"A" +// rather than known keys hardcoded here +// (and likewise for block names) +PARAMNAME: ("K"|"I"|"D"|"S"|"F"|"A") +PARAMSBLOCKNAME: ("OPTIONS"|"PACKAGEDATA") +LISTBLOCKNAME: "PERIOD" %import common.SH_COMMENT -> COMMENT %import common.SIGNED_NUMBER -> NUMBER -%import common.SIGNED_FLOAT -%import common.INT -%import common.FLOAT +%import common.SIGNED_INT -> INT +%import common.SIGNED_FLOAT -> FLOAT %import common.WORD %import common.WS_INLINE +%ignore COMMENT %ignore WS_INLINE """ -MF6_PARSER = Lark(MF6_GRAMMAR, start="start") +MF6_PARSER = Lark(MF6_GRAMMAR, start="component") diff --git a/pyproject.toml b/pyproject.toml index 16c1602..2260648 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,5 +105,6 @@ select = [ ] ignore = [ "F821", # undefined name TODO FIXME - "E722" # do not use bare `except` + "E722", # do not use bare `except` + "E741", # ambiguous variable name ] diff --git a/test/test_lark.py b/test/test_lark.py index 2ac5b52..23c0dba 100644 --- a/test/test_lark.py +++ b/test/test_lark.py @@ -1,11 +1,12 @@ -from pprint import pprint +from os import linesep +from pathlib import Path -import pytest +import numpy as np from lark import Transformer from flopy4.lark import MF6_PARSER -TEST_PKG = """ +TEST_COMPONENT = """ BEGIN OPTIONS K I 1 @@ -14,29 +15,113 @@ F FILEIN some/path END OPTIONS -BEGIN PACKAGEDATA 1 +BEGIN PACKAGEDATA A INTERNAL 1.0 2.0 3.0 END PACKAGEDATA + +BEGIN PERIOD 1 + FIRST + FREQUENCY 2 +END PERIOD 1 + +BEGIN PERIOD 2 + STEPS 1 2 3 +END PERIOD 2 """ -def test_parse_mf6(): - tree = MF6_PARSER.parse(TEST_PKG) - # this is working, check it with: - # pytest test/test_lark.py::test_parse_mf6 -s - print(tree.pretty()) +def test_parse(): + tree = MF6_PARSER.parse(TEST_COMPONENT) + # view the parse tree with e.g. + # pytest test/test_lark.py::test_parse -s + print(linesep + tree.pretty()) class MF6Transformer(Transformer): - # TODO - pass + def key(self, k): + (k,) = k + return str(k).lower() + + def word(self, w): + (w,) = w + return str(w) + + def path(self, p): + _, p = p + return Path(p) + + def string(self, s): + return " ".join(s) + + def int(self, i): + (i,) = i + return int(i) + + def float(self, f): + (f,) = f + return float(f) + + def array(self, a): + (a,) = a + return a + + def constantarray(self, a): + # TODO factor out `ConstantArray` + # array-like class from `MFArray` + # with deferred shape and use it + pass + + def internalarray(self, a): + factor = a[0] + array = np.array(a[2:]) + if factor is not None: + array *= factor + return array + + def externalarray(self, a): + # TODO + pass + + record = tuple + list = list + + def param(self, p): + k = p[0] + v = True if len(p) == 1 else p[1] + return k, v + + params = dict + + def block(self, b): + return tuple(b[:2]) + + def paramsblockname(self, bn): + return str(bn[0]).lower() + + def listblockname(self, bn): + name = str(bn[0]) + if len(bn) == 2: + index = int(bn[1]) + name = f"{name} {index}" + return name.lower() + + component = dict MF6_TRANSFORMER = MF6Transformer() -@pytest.mark.xfail -def test_transform_mf6(): - tree = MF6_PARSER.parse(TEST_PKG) +def test_transform(): + tree = MF6_PARSER.parse(TEST_COMPONENT) data = MF6_TRANSFORMER.transform(tree) - pprint(data) + assert data["options"] == { + "d": 1.0, + "f": Path("some/path"), + "i": 1, + "k": True, + "s": "hello world", + } + assert np.array_equal(data["packagedata"]["a"], np.array([1.0, 2.0, 3.0])) + assert data["period 1"][0] == ("FIRST",) + assert data["period 1"][1] == ("FREQUENCY", 2) + assert data["period 2"][0] == ("STEPS", 1, 2, 3)