Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lark transformer working #30

Merged
merged 1 commit into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 47 additions & 32 deletions flopy4/lark/__init__.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,39 @@
from lark import Lark

MF6_GRAMMAR = r"""
?start: _NL* _item*
_item: (block | COMMENT) _NL+
// component
component: _NL* (block _NL+)* _NL*

// block
block: _begin _NL params _end
_begin: _BEGIN name [index]
_end: _END name
name: WORD
index: INT
// blocks
block: _paramsblock | _listblock
_paramsblock: _BEGIN paramsblockname _NL params _END paramsblockname
_listblock: _BEGIN listblockname _NL list _END listblockname
paramsblockname: PARAMSBLOCKNAME
listblockname: LISTBLOCKNAME [_blockindex]
_blockindex: INT
_BEGIN: "begin"i
_END: "end"i

// parameter
params: (param _NL)*
param: _key [_value]
_key: KEYS
_value: NUMBER | path | string | array | list
// parameters (higher priority than lists
// since list of records will match also)
params.1: (param _NL)*
param: key | _pair
_pair: key value
key: PARAMNAME
?value: array
| list
| path
| string
| int
| float

// string
string: WORD+
word: WORD
?string: word+

// numbers
int: INT
float: FLOAT

// file path
path: INOUT PATH
Expand All @@ -30,39 +43,41 @@

// array
array: constantarray | internalarray | externalarray
constantarray: "CONSTANT" NUMBER
internalarray: "INTERNAL" [factor] [iprn] (NUMBER* [_NL])*
externalarray: "OPEN/CLOSE" WORD [factor] ["binary"] [iprn]
constantarray: "CONSTANT" float
internalarray: "INTERNAL" [factor] [iprn] (float* [_NL])*
externalarray: "OPEN/CLOSE" PATH [factor] ["binary"] [iprn]
factor: "FACTOR" NUMBER
iprn: "IPRN" INT

// list (adapted from https://github.com/lark-parser/lark/blob/master/examples/composition/csv.lark)
list: header _NL row*
header: "#" " "? (WORD _SEPARATOR?)+
row: (_anything _SEPARATOR?)+ _NL
_anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT
// list adapted from https://github.com/lark-parser/lark/blob/master/examples/composition/csv.lark
list.-1: record*
// negative priority for records bc
// the pattern is so indiscriminate.
record.-1: _anything+ _NL
_anything: int | float | word
NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/
_SEPARATOR: /[ ]+/
| "\t"
| ","

// newline
_NL: /(\r?\n[\t ]*)+/

// parameter keys file can be generated
// TODO:
// a parameter key file can be generated
// with the rest of the plugin interface
// and maybe placed in a separate file
KEYS: "K"|"I"|"D"|"S"|"F"|"A"
// rather than known keys hardcoded here
// (and likewise for block names)
PARAMNAME: ("K"|"I"|"D"|"S"|"F"|"A")
PARAMSBLOCKNAME: ("OPTIONS"|"PACKAGEDATA")
LISTBLOCKNAME: "PERIOD"

%import common.SH_COMMENT -> COMMENT
%import common.SIGNED_NUMBER -> NUMBER
%import common.SIGNED_FLOAT
%import common.INT
%import common.FLOAT
%import common.SIGNED_INT -> INT
%import common.SIGNED_FLOAT -> FLOAT
%import common.WORD
%import common.WS_INLINE

%ignore COMMENT
%ignore WS_INLINE
"""

MF6_PARSER = Lark(MF6_GRAMMAR, start="start")
MF6_PARSER = Lark(MF6_GRAMMAR, start="component")
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,5 +105,6 @@ select = [
]
ignore = [
"F821", # undefined name TODO FIXME
"E722" # do not use bare `except`
"E722", # do not use bare `except`
"E741", # ambiguous variable name
]
115 changes: 100 additions & 15 deletions test/test_lark.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from pprint import pprint
from os import linesep
from pathlib import Path

import pytest
import numpy as np
from lark import Transformer

from flopy4.lark import MF6_PARSER

TEST_PKG = """
TEST_COMPONENT = """
BEGIN OPTIONS
K
I 1
Expand All @@ -14,29 +15,113 @@
F FILEIN some/path
END OPTIONS

BEGIN PACKAGEDATA 1
BEGIN PACKAGEDATA
A INTERNAL 1.0 2.0 3.0
END PACKAGEDATA

BEGIN PERIOD 1
FIRST
FREQUENCY 2
END PERIOD 1

BEGIN PERIOD 2
STEPS 1 2 3
END PERIOD 2
"""


def test_parse_mf6():
tree = MF6_PARSER.parse(TEST_PKG)
# this is working, check it with:
# pytest test/test_lark.py::test_parse_mf6 -s
print(tree.pretty())
def test_parse():
tree = MF6_PARSER.parse(TEST_COMPONENT)
# view the parse tree with e.g.
# pytest test/test_lark.py::test_parse -s
print(linesep + tree.pretty())


class MF6Transformer(Transformer):
# TODO
pass
def key(self, k):
(k,) = k
return str(k).lower()

def word(self, w):
(w,) = w
return str(w)

def path(self, p):
_, p = p
return Path(p)

def string(self, s):
return " ".join(s)

def int(self, i):
(i,) = i
return int(i)

def float(self, f):
(f,) = f
return float(f)

def array(self, a):
(a,) = a
return a

def constantarray(self, a):
# TODO factor out `ConstantArray`
# array-like class from `MFArray`
# with deferred shape and use it
pass

def internalarray(self, a):
factor = a[0]
array = np.array(a[2:])
if factor is not None:
array *= factor
return array

def externalarray(self, a):
# TODO
pass

record = tuple
list = list

def param(self, p):
k = p[0]
v = True if len(p) == 1 else p[1]
return k, v

params = dict

def block(self, b):
return tuple(b[:2])

def paramsblockname(self, bn):
return str(bn[0]).lower()

def listblockname(self, bn):
name = str(bn[0])
if len(bn) == 2:
index = int(bn[1])
name = f"{name} {index}"
return name.lower()

component = dict


MF6_TRANSFORMER = MF6Transformer()


@pytest.mark.xfail
def test_transform_mf6():
tree = MF6_PARSER.parse(TEST_PKG)
def test_transform():
tree = MF6_PARSER.parse(TEST_COMPONENT)
data = MF6_TRANSFORMER.transform(tree)
pprint(data)
assert data["options"] == {
"d": 1.0,
"f": Path("some/path"),
"i": 1,
"k": True,
"s": "hello world",
}
assert np.array_equal(data["packagedata"]["a"], np.array([1.0, 2.0, 3.0]))
assert data["period 1"][0] == ("FIRST",)
assert data["period 1"][1] == ("FREQUENCY", 2)
assert data["period 2"][0] == ("STEPS", 1, 2, 3)
Loading