From edbb455b00cd9fdbefdfebded2170af33453691b Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Sun, 1 Sep 2024 22:00:38 -0400 Subject: [PATCH 1/5] grammar, rough cut, prob broken --- flopy4/dfn.lark | 9 +++++++++ flopy4/mf6.lark | 22 ++++++++++++++++++++++ flopy4/param.py | 2 -- flopy4/parsers.py | 9 +++++++++ pyproject.toml | 7 ++++--- 5 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 flopy4/dfn.lark create mode 100644 flopy4/mf6.lark create mode 100644 flopy4/parsers.py diff --git a/flopy4/dfn.lark b/flopy4/dfn.lark new file mode 100644 index 0000000..7b16a3f --- /dev/null +++ b/flopy4/dfn.lark @@ -0,0 +1,9 @@ +value: string | NUMBER +string: WORD* +keyval: [string] value +param: (keyval NEWLINE)+ NEWLINE + +%import common.SIGNED_NUMBER -> NUMBER +%import common.NEWLINE +%import common.WORD +%ignore common.WS \ No newline at end of file diff --git a/flopy4/mf6.lark b/flopy4/mf6.lark new file mode 100644 index 0000000..32bd899 --- /dev/null +++ b/flopy4/mf6.lark @@ -0,0 +1,22 @@ +scalar: string | NUMBER +value: scalar | array | table | list +keyval: [string] value +repeat: [string] scalar+ +string: WORD* +record: keyval* NEWLINE +raggedrecord: keyval* (keyval | repeat) NEWLINE +table: record* +list: raggedrecord* +array: (constantarray | internalarray | externalarray) +constantarray: "constant" value +internalarray: "internal" [factor] [iprn] NEWLINE repeat (NEWLINE repeat)* NEWLINE +externalarray: "open/close" value [factor] ["binary"] [iprn] +factor: ["factor" value] +iprn: ["iprn" value] +block: "begin" WORD ?INT NEWLINE (keyval NEWLINE)* "end" WORD + +%import common.SIGNED_NUMBER -> NUMBER +%import common.INT +%import common.NEWLINE +%import common.WORD +%ignore common.WS \ No newline at end of file diff --git a/flopy4/param.py b/flopy4/param.py index 64a1dcb..23bfba7 100644 --- a/flopy4/param.py +++ b/flopy4/param.py @@ -29,8 +29,6 @@ class MFParamSpec: repeating: bool = False tagged: bool = True reader: MFReader = MFReader.urword - # todo change to variadic tuple of str and resolve - # actual shape at load time from simulation context shape: Optional[Tuple[int]] = None default_value: Optional[Any] = None diff --git a/flopy4/parsers.py b/flopy4/parsers.py new file mode 100644 index 0000000..ee5f43f --- /dev/null +++ b/flopy4/parsers.py @@ -0,0 +1,9 @@ +from pathlib import Path + +from lark import Lark + +DFN_GRAMMAR_FILE = Path(__file__).parent / "dfn.lark" +MF6_GRAMMAR_FILE = Path(__file__).parent / "mf6.lark" + +DFN_PARSER = Lark(open(DFN_GRAMMAR_FILE).read(), start="value") +MF6_PARSER = Lark(open(MF6_GRAMMAR_FILE).read(), start="value") diff --git a/pyproject.toml b/pyproject.toml index bfda02d..04b22b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,10 +36,11 @@ classifiers = [ ] requires-python = ">=3.9" dependencies = [ - "attrs", # todo: lower bound? - "cattrs", # todo: lower bound? - "Jinja2>=3.0", + "attrs", # todo: bounds? + "cattrs", # todo: bounds? "flopy>=3.7.0", + "Jinja2>=3.0", + "lark", # todo: bounds? "numpy>=1.20.3", "pandas>=2.0.0", "toml>=0.10", From 5c5ca202fa0e7a6f0bd907cfb8d120a5ff0c9686 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Wed, 4 Sep 2024 20:22:17 -0400 Subject: [PATCH 2/5] fix mf6 grammer, minimal test --- flopy4/lark/__init__.py | 6 ++++ flopy4/{ => lark}/dfn.lark | 1 + flopy4/lark/keys.lark | 1 + flopy4/lark/mf6.lark | 61 ++++++++++++++++++++++++++++++++++++++ flopy4/mf6.lark | 22 -------------- pyproject.toml | 1 + test/test_lark.py | 42 ++++++++++++++++++++++++++ 7 files changed, 112 insertions(+), 22 deletions(-) create mode 100644 flopy4/lark/__init__.py rename flopy4/{ => lark}/dfn.lark (86%) create mode 100644 flopy4/lark/keys.lark create mode 100644 flopy4/lark/mf6.lark delete mode 100644 flopy4/mf6.lark create mode 100644 test/test_lark.py diff --git a/flopy4/lark/__init__.py b/flopy4/lark/__init__.py new file mode 100644 index 0000000..41bba6e --- /dev/null +++ b/flopy4/lark/__init__.py @@ -0,0 +1,6 @@ +from pathlib import Path + +from lark import Lark + +MF6_GRAMMAR_PATH = Path(__file__).parent / "mf6.lark" +MF6_PARSER = Lark.open(MF6_GRAMMAR_PATH, rel_to=__file__, start="start") diff --git a/flopy4/dfn.lark b/flopy4/lark/dfn.lark similarity index 86% rename from flopy4/dfn.lark rename to flopy4/lark/dfn.lark index 7b16a3f..f01d79b 100644 --- a/flopy4/dfn.lark +++ b/flopy4/lark/dfn.lark @@ -1,3 +1,4 @@ +// TODO: test, probably broken value: string | NUMBER string: WORD* keyval: [string] value diff --git a/flopy4/lark/keys.lark b/flopy4/lark/keys.lark new file mode 100644 index 0000000..86a6b4e --- /dev/null +++ b/flopy4/lark/keys.lark @@ -0,0 +1 @@ +KEYS: "K"|"I"|"D"|"S"|"F"|"A" \ No newline at end of file diff --git a/flopy4/lark/mf6.lark b/flopy4/lark/mf6.lark new file mode 100644 index 0000000..09c140b --- /dev/null +++ b/flopy4/lark/mf6.lark @@ -0,0 +1,61 @@ +?start: _NL* _item* +_item: (block | COMMENT) _NL+ + +// block +block: _begin _NL params _end +_begin: _BEGIN name [index] +_end: _END name +name: WORD +index: INT +_BEGIN: "begin"i +_END: "end"i + +// parameter +params: (param _NL)* +param: _key [_value] +_key: KEYS +_value: NUMBER | path | string | array | list + +// string +string: WORD+ + +// file path +path: INOUT PATH +PATH: [_PATHSEP] (NON_SEPARATOR_STRING [_PATHSEP]) [NON_SEPARATOR_STRING] +_PATHSEP: "/" +INOUT: "filein"i|"fileout"i + +// array +array: constantarray | internalarray | externalarray +constantarray: "CONSTANT" NUMBER +internalarray: "INTERNAL" [factor] [iprn] (NUMBER* [_NL])* +externalarray: "OPEN/CLOSE" WORD [factor] ["binary"] [iprn] +factor: "FACTOR" NUMBER +iprn: "IPRN" INT + +// list (adapted from https://github.com/lark-parser/lark/blob/master/examples/composition/csv.lark) +list: header _NL row* +header: "#" " "? (WORD _SEPARATOR?)+ +row: (_anything _SEPARATOR?)+ _NL +_anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT +NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ +_SEPARATOR: /[ ]+/ + | "\t" + | "," + +// newline +_NL: /(\r?\n[\t ]*)+/ + +// parameter keys file can be generated +// with the rest of the plugin interface +%import .keys.KEYS -> KEYS + +%import common.SH_COMMENT -> COMMENT +%import common.SIGNED_NUMBER -> NUMBER +%import common.SIGNED_FLOAT +%import common.INT +%import common.FLOAT +%import common.WORD +%import common.WS_INLINE + +%ignore WS_INLINE \ No newline at end of file diff --git a/flopy4/mf6.lark b/flopy4/mf6.lark deleted file mode 100644 index 32bd899..0000000 --- a/flopy4/mf6.lark +++ /dev/null @@ -1,22 +0,0 @@ -scalar: string | NUMBER -value: scalar | array | table | list -keyval: [string] value -repeat: [string] scalar+ -string: WORD* -record: keyval* NEWLINE -raggedrecord: keyval* (keyval | repeat) NEWLINE -table: record* -list: raggedrecord* -array: (constantarray | internalarray | externalarray) -constantarray: "constant" value -internalarray: "internal" [factor] [iprn] NEWLINE repeat (NEWLINE repeat)* NEWLINE -externalarray: "open/close" value [factor] ["binary"] [iprn] -factor: ["factor" value] -iprn: ["iprn" value] -block: "begin" WORD ?INT NEWLINE (keyval NEWLINE)* "end" WORD - -%import common.SIGNED_NUMBER -> NUMBER -%import common.INT -%import common.NEWLINE -%import common.WORD -%ignore common.WS \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 04b22b0..16c1602 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ test = [ "flopy4[lint]", "coverage", "GitPython", + "interegular", "jupyter", "jupytext", "modflow-devtools", diff --git a/test/test_lark.py b/test/test_lark.py new file mode 100644 index 0000000..2ac5b52 --- /dev/null +++ b/test/test_lark.py @@ -0,0 +1,42 @@ +from pprint import pprint + +import pytest +from lark import Transformer + +from flopy4.lark import MF6_PARSER + +TEST_PKG = """ +BEGIN OPTIONS + K + I 1 + D 1.0 + S hello world + F FILEIN some/path +END OPTIONS + +BEGIN PACKAGEDATA 1 + A INTERNAL 1.0 2.0 3.0 +END PACKAGEDATA +""" + + +def test_parse_mf6(): + tree = MF6_PARSER.parse(TEST_PKG) + # this is working, check it with: + # pytest test/test_lark.py::test_parse_mf6 -s + print(tree.pretty()) + + +class MF6Transformer(Transformer): + # TODO + pass + + +MF6_TRANSFORMER = MF6Transformer() + + +@pytest.mark.xfail +def test_transform_mf6(): + tree = MF6_PARSER.parse(TEST_PKG) + data = MF6_TRANSFORMER.transform(tree) + pprint(data) From 21d427b8fd333f010da753c05b1feb7bd315fe0b Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Wed, 4 Sep 2024 20:25:07 -0400 Subject: [PATCH 3/5] remove old file --- flopy4/parsers.py | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 flopy4/parsers.py diff --git a/flopy4/parsers.py b/flopy4/parsers.py deleted file mode 100644 index ee5f43f..0000000 --- a/flopy4/parsers.py +++ /dev/null @@ -1,9 +0,0 @@ -from pathlib import Path - -from lark import Lark - -DFN_GRAMMAR_FILE = Path(__file__).parent / "dfn.lark" -MF6_GRAMMAR_FILE = Path(__file__).parent / "mf6.lark" - -DFN_PARSER = Lark(open(DFN_GRAMMAR_FILE).read(), start="value") -MF6_PARSER = Lark(open(MF6_GRAMMAR_FILE).read(), start="value") From a6adfd1e29cc44098e47618e6859153f3bacf438 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Wed, 4 Sep 2024 20:40:04 -0400 Subject: [PATCH 4/5] inline grammar --- flopy4/lark/__init__.py | 70 ++++++++++++++++++++++++++++++++++++++--- flopy4/lark/dfn.lark | 10 ------ 2 files changed, 66 insertions(+), 14 deletions(-) delete mode 100644 flopy4/lark/dfn.lark diff --git a/flopy4/lark/__init__.py b/flopy4/lark/__init__.py index 41bba6e..8f81a2c 100644 --- a/flopy4/lark/__init__.py +++ b/flopy4/lark/__init__.py @@ -1,6 +1,68 @@ -from pathlib import Path - from lark import Lark -MF6_GRAMMAR_PATH = Path(__file__).parent / "mf6.lark" -MF6_PARSER = Lark.open(MF6_GRAMMAR_PATH, rel_to=__file__, start="start") +MF6_GRAMMAR = r""" +?start: _NL* _item* +_item: (block | COMMENT) _NL+ + +// block +block: _begin _NL params _end +_begin: _BEGIN name [index] +_end: _END name +name: WORD +index: INT +_BEGIN: "begin"i +_END: "end"i + +// parameter +params: (param _NL)* +param: _key [_value] +_key: KEYS +_value: NUMBER | path | string | array | list + +// string +string: WORD+ + +// file path +path: INOUT PATH +PATH: [_PATHSEP] (NON_SEPARATOR_STRING [_PATHSEP]) [NON_SEPARATOR_STRING] +_PATHSEP: "/" +INOUT: "filein"i|"fileout"i + +// array +array: constantarray | internalarray | externalarray +constantarray: "CONSTANT" NUMBER +internalarray: "INTERNAL" [factor] [iprn] (NUMBER* [_NL])* +externalarray: "OPEN/CLOSE" WORD [factor] ["binary"] [iprn] +factor: "FACTOR" NUMBER +iprn: "IPRN" INT + +// list (adapted from https://github.com/lark-parser/lark/blob/master/examples/composition/csv.lark) +list: header _NL row* +header: "#" " "? (WORD _SEPARATOR?)+ +row: (_anything _SEPARATOR?)+ _NL +_anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT +NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ +_SEPARATOR: /[ ]+/ + | "\t" + | "," + +// newline +_NL: /(\r?\n[\t ]*)+/ + +// parameter keys file can be generated +// with the rest of the plugin interface +// and maybe placed in a separate file +KEYS: "K"|"I"|"D"|"S"|"F"|"A" + +%import common.SH_COMMENT -> COMMENT +%import common.SIGNED_NUMBER -> NUMBER +%import common.SIGNED_FLOAT +%import common.INT +%import common.FLOAT +%import common.WORD +%import common.WS_INLINE + +%ignore WS_INLINE +""" + +MF6_PARSER = Lark(MF6_GRAMMAR, start="start") diff --git a/flopy4/lark/dfn.lark b/flopy4/lark/dfn.lark deleted file mode 100644 index f01d79b..0000000 --- a/flopy4/lark/dfn.lark +++ /dev/null @@ -1,10 +0,0 @@ -// TODO: test, probably broken -value: string | NUMBER -string: WORD* -keyval: [string] value -param: (keyval NEWLINE)+ NEWLINE - -%import common.SIGNED_NUMBER -> NUMBER -%import common.NEWLINE -%import common.WORD -%ignore common.WS \ No newline at end of file From 03f1401f144d5dc1d435b231ff0a9eb6c121a5db Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Wed, 4 Sep 2024 20:41:41 -0400 Subject: [PATCH 5/5] remove .lark files for now --- flopy4/lark/keys.lark | 1 - flopy4/lark/mf6.lark | 61 ------------------------------------------- 2 files changed, 62 deletions(-) delete mode 100644 flopy4/lark/keys.lark delete mode 100644 flopy4/lark/mf6.lark diff --git a/flopy4/lark/keys.lark b/flopy4/lark/keys.lark deleted file mode 100644 index 86a6b4e..0000000 --- a/flopy4/lark/keys.lark +++ /dev/null @@ -1 +0,0 @@ -KEYS: "K"|"I"|"D"|"S"|"F"|"A" \ No newline at end of file diff --git a/flopy4/lark/mf6.lark b/flopy4/lark/mf6.lark deleted file mode 100644 index 09c140b..0000000 --- a/flopy4/lark/mf6.lark +++ /dev/null @@ -1,61 +0,0 @@ -?start: _NL* _item* -_item: (block | COMMENT) _NL+ - -// block -block: _begin _NL params _end -_begin: _BEGIN name [index] -_end: _END name -name: WORD -index: INT -_BEGIN: "begin"i -_END: "end"i - -// parameter -params: (param _NL)* -param: _key [_value] -_key: KEYS -_value: NUMBER | path | string | array | list - -// string -string: WORD+ - -// file path -path: INOUT PATH -PATH: [_PATHSEP] (NON_SEPARATOR_STRING [_PATHSEP]) [NON_SEPARATOR_STRING] -_PATHSEP: "/" -INOUT: "filein"i|"fileout"i - -// array -array: constantarray | internalarray | externalarray -constantarray: "CONSTANT" NUMBER -internalarray: "INTERNAL" [factor] [iprn] (NUMBER* [_NL])* -externalarray: "OPEN/CLOSE" WORD [factor] ["binary"] [iprn] -factor: "FACTOR" NUMBER -iprn: "IPRN" INT - -// list (adapted from https://github.com/lark-parser/lark/blob/master/examples/composition/csv.lark) -list: header _NL row* -header: "#" " "? (WORD _SEPARATOR?)+ -row: (_anything _SEPARATOR?)+ _NL -_anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT -NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ -_SEPARATOR: /[ ]+/ - | "\t" - | "," - -// newline -_NL: /(\r?\n[\t ]*)+/ - -// parameter keys file can be generated -// with the rest of the plugin interface -%import .keys.KEYS -> KEYS - -%import common.SH_COMMENT -> COMMENT -%import common.SIGNED_NUMBER -> NUMBER -%import common.SIGNED_FLOAT -%import common.INT -%import common.FLOAT -%import common.WORD -%import common.WS_INLINE - -%ignore WS_INLINE \ No newline at end of file