Skip to content

Commit

Permalink
Support flatbuffer-based tree exports/imports
Browse files Browse the repository at this point in the history
  • Loading branch information
renatahodovan committed May 24, 2024
1 parent 85d6237 commit ab1bb72
Show file tree
Hide file tree
Showing 10 changed files with 319 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[MASTER]

# Files or directories to be skipped. They should be base names, not paths.
ignore=g4, grammars
ignore=g4, grammars, FBRule_generated.py

# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
Expand Down
5 changes: 3 additions & 2 deletions grammarinator/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023 Renata Hodovan, Akos Kiss.
# Copyright (c) 2020-2024 Renata Hodovan, Akos Kiss.
#
# Licensed under the BSD 3-Clause License
# <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
Expand All @@ -10,7 +10,7 @@

from inators.imp import import_object

from .tool import JsonTreeCodec, PickleTreeCodec
from .tool import FlatBuffersTreeCodec, JsonTreeCodec, PickleTreeCodec

logger = logging.getLogger('grammarinator')

Expand Down Expand Up @@ -46,6 +46,7 @@ def add_encoding_errors_argument(parser):
tree_formats = {
'pickle': {'extension': 'grtp', 'codec_class': PickleTreeCodec},
'json': {'extension': 'grtj', 'codec_class': JsonTreeCodec},
'flatbuffers': {'extension': 'grtf', 'codec_class': FlatBuffersTreeCodec},
}


Expand Down
4 changes: 2 additions & 2 deletions grammarinator/tool/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023 Renata Hodovan, Akos Kiss.
# Copyright (c) 2023-2024 Renata Hodovan, Akos Kiss.
#
# Licensed under the BSD 3-Clause License
# <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
Expand All @@ -9,4 +9,4 @@
from .generator import DefaultGeneratorFactory, GeneratorFactory, GeneratorTool
from .parser import ParserTool
from .processor import ProcessorTool
from .tree_codec import AnnotatedTreeCodec, JsonTreeCodec, PickleTreeCodec, TreeCodec
from .tree_codec import AnnotatedTreeCodec, FlatBuffersTreeCodec, JsonTreeCodec, PickleTreeCodec, TreeCodec
178 changes: 178 additions & 0 deletions grammarinator/tool/fbs/FBRule_generated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
# automatically generated by the FlatBuffers compiler, do not modify

# namespace: fbs

import flatbuffers
from flatbuffers.compat import import_numpy
np = import_numpy()

class FBRuleType(object):
UnlexerRuleType = 0
UnparserRuleType = 1
UnparserRuleQuantifierType = 2
UnparserRuleQuantifiedType = 3
UnparserRuleAlternativeType = 4


class FBRuleSize(object):
__slots__ = ['_tab']

@classmethod
def SizeOf(cls):
return 8

# FBRuleSize
def Init(self, buf, pos):
self._tab = flatbuffers.table.Table(buf, pos)

# FBRuleSize
def Depth(self): return self._tab.Get(flatbuffers.number_types.Int32Flags, self._tab.Pos + flatbuffers.number_types.UOffsetTFlags.py_type(0))
# FBRuleSize
def Tokens(self): return self._tab.Get(flatbuffers.number_types.Int32Flags, self._tab.Pos + flatbuffers.number_types.UOffsetTFlags.py_type(4))

def CreateFBRuleSize(builder, depth, tokens):
builder.Prep(4, 8)
builder.PrependInt32(tokens)
builder.PrependInt32(depth)
return builder.Offset()


class FBRule(object):
__slots__ = ['_tab']

@classmethod
def GetRootAs(cls, buf, offset=0):
n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
x = FBRule()
x.Init(buf, n + offset)
return x

@classmethod
def GetRootAsFBRule(cls, buf, offset=0):
"""This method is deprecated. Please switch to GetRootAs."""
return cls.GetRootAs(buf, offset)
# FBRule
def Init(self, buf, pos):
self._tab = flatbuffers.table.Table(buf, pos)

# FBRule
def Type(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
if o != 0:
return self._tab.Get(flatbuffers.number_types.Int8Flags, o + self._tab.Pos)
return 0

# FBRule
def Name(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
if o != 0:
return self._tab.String(o + self._tab.Pos)
return None

# FBRule
def Children(self, j):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
if o != 0:
x = self._tab.Vector(o)
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
x = self._tab.Indirect(x)
obj = FBRule()
obj.Init(self._tab.Bytes, x)
return obj
return None

# FBRule
def ChildrenLength(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
if o != 0:
return self._tab.VectorLen(o)
return 0

# FBRule
def ChildrenIsNone(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
return o == 0

# FBRule
def Src(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(10))
if o != 0:
return self._tab.String(o + self._tab.Pos)
return None

# FBRule
def Size(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(12))
if o != 0:
x = o + self._tab.Pos
obj = FBRuleSize()
obj.Init(self._tab.Bytes, x)
return obj
return None

# FBRule
def Idx(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14))
if o != 0:
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
return 0

# FBRule
def Start(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(16))
if o != 0:
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
return 0

# FBRule
def Stop(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(18))
if o != 0:
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
return 0

# FBRule
def AltIdx(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(20))
if o != 0:
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
return 0

def FBRuleStart(builder):
builder.StartObject(9)

def FBRuleAddType(builder, type):
builder.PrependInt8Slot(0, type, 0)

def FBRuleAddName(builder, name):
builder.PrependUOffsetTRelativeSlot(1, flatbuffers.number_types.UOffsetTFlags.py_type(name), 0)

def FBRuleAddChildren(builder, children):
builder.PrependUOffsetTRelativeSlot(2, flatbuffers.number_types.UOffsetTFlags.py_type(children), 0)

def FBRuleStartChildrenVector(builder, numElems):
return builder.StartVector(4, numElems, 4)

def FBRuleAddSrc(builder, src):
builder.PrependUOffsetTRelativeSlot(3, flatbuffers.number_types.UOffsetTFlags.py_type(src), 0)

def FBRuleAddSize(builder, size):
builder.PrependStructSlot(4, flatbuffers.number_types.UOffsetTFlags.py_type(size), 0)

def FBRuleAddIdx(builder, idx):
builder.PrependInt32Slot(5, idx, 0)

def FBRuleAddStart(builder, start):
builder.PrependInt32Slot(6, start, 0)

def FBRuleAddStop(builder, stop):
builder.PrependInt32Slot(7, stop, 0)

def FBRuleAddAltIdx(builder, altIdx):
builder.PrependInt32Slot(8, altIdx, 0)

def FBRuleEnd(builder):
return builder.EndObject()



8 changes: 8 additions & 0 deletions grammarinator/tool/fbs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright (c) 2024 Renata Hodovan, Akos Kiss.
#
# Licensed under the BSD 3-Clause License
# <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
# This file may not be copied, modified, or distributed except
# according to those terms.

from .FBRule_generated import CreateFBRuleSize, FBRule, FBRuleAddAltIdx, FBRuleAddChildren, FBRuleAddIdx, FBRuleAddName, FBRuleAddSize, FBRuleAddSrc, FBRuleAddStart, FBRuleAddStop, FBRuleAddType, FBRuleEnd, FBRuleSize, FBRuleStart, FBRuleStartChildrenVector, FBRuleType
47 changes: 47 additions & 0 deletions grammarinator/tool/resources/fbs/FBRule.fbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright (c) 2024 Renata Hodovan, Akos Kiss.
*
* Licensed under the BSD 3-Clause License
* <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
* This file may not be copied, modified, or distributed except
* according to those terms.
*/

/*
* This is the flatbuffer schema of the tree format of Grammarinator.
* It was used as input for the flatc compiler to generate code for
* building and reading flatbuffer tree representations
* (grammarinator/tool/fbs/FBRule_generated.py).
*
* The used flatc command:
* flatc --python --gen-onefile -o grammarinator/tool/fbs/ FBRule.fbs
*/

namespace grammarinator.tool.fbs;

enum FBRuleType: byte {
UnlexerRuleType = 0,
UnparserRuleType = 1,
UnparserRuleQuantifierType = 2,
UnparserRuleQuantifiedType = 3,
UnparserRuleAlternativeType = 4
}

struct FBRuleSize {
depth: int;
tokens: int;
}

table FBRule {
type: FBRuleType;
name: string;
children: [FBRule];
src: string;
size: FBRuleSize;
idx: int;
start: int;
stop: int;
alt_idx: int;
}

root_type FBRule;
73 changes: 73 additions & 0 deletions grammarinator/tool/tree_codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@

import json
import pickle
import struct

import flatbuffers

from ..runtime import RuleSize, UnlexerRule, UnparserRule, UnparserRuleAlternative, UnparserRuleQuantified, UnparserRuleQuantifier
from .fbs import CreateFBRuleSize, FBRule, FBRuleAddAltIdx, FBRuleAddChildren, FBRuleAddIdx, FBRuleAddName, FBRuleAddSize, FBRuleAddSrc, FBRuleAddStart, FBRuleAddStop, FBRuleAddType, FBRuleEnd, FBRuleStart, FBRuleStartChildrenVector, FBRuleType


class TreeCodec:
Expand Down Expand Up @@ -152,3 +156,72 @@ def _dict_to_rule(dct):
return json.loads(data.decode(encoding=self._encoding), object_hook=_dict_to_rule)
except json.JSONDecodeError:
return None


class FlatBuffersTreeCodec(TreeCodec):
"""
FlatBuffers-based tree codec.
"""

def encode(self, root):
def buildFBRule(rule):
fbName = builder.CreateString(rule.name or '')
if isinstance(rule, UnlexerRule):
fbSrc = builder.CreateString(rule.src)
FBRuleStart(builder)
FBRuleAddType(builder, FBRuleType.UnlexerRuleType)
FBRuleAddName(builder, fbName)
FBRuleAddSrc(builder, fbSrc)
FBRuleAddSize(builder, CreateFBRuleSize(builder, rule.size.depth, rule.size.tokens))
else:
children = [buildFBRule(child) for child in rule.children]
FBRuleStartChildrenVector(builder, len(children))
for fbchild in reversed(children):
builder.PrependUOffsetTRelative(fbchild)
fbchildren = builder.EndVector()
FBRuleStart(builder)
FBRuleAddName(builder, fbName)
FBRuleAddChildren(builder, fbchildren)
if isinstance(rule, UnparserRule):
FBRuleAddType(builder, FBRuleType.UnparserRuleType)
elif isinstance(rule, UnparserRuleQuantifier):
FBRuleAddType(builder, FBRuleType.UnparserRuleQuantifierType)
FBRuleAddIdx(builder, rule.idx)
FBRuleAddStart(builder, rule.start)
FBRuleAddStop(builder, rule.stop if rule.stop != 'inf' else 1000)
elif isinstance(rule, UnparserRuleQuantified):
FBRuleAddType(builder, FBRuleType.UnparserRuleQuantifiedType)
elif isinstance(rule, UnparserRuleAlternative):
FBRuleAddType(builder, FBRuleType.UnparserRuleAlternativeType)
FBRuleAddAltIdx(builder, rule.alt_idx)
FBRuleAddIdx(builder, rule.idx)
return FBRuleEnd(builder)

builder = flatbuffers.Builder()
builder.Finish(buildFBRule(root))
return bytes(builder.Output())

def decode(self, data, encoding='utf-8'):
def readFBRule(fbrule):
rule_type = fbrule.Type()
if rule_type == FBRuleType.UnlexerRuleType:
fbsize = fbrule.Size()
rule = UnlexerRule(name=fbrule.Name().decode(encoding), src=fbrule.Src().decode(encoding), size=RuleSize(depth=fbsize.Depth(), tokens=fbsize.Tokens()))
else:
children = [readFBRule(fbrule.Children(i)) for i in range(fbrule.ChildrenLength())]
if rule_type == FBRuleType.UnparserRuleType:
rule = UnparserRule(name=fbrule.Name().decode(encoding), children=children)
elif rule_type == FBRuleType.UnparserRuleQuantifierType:
rule = UnparserRuleQuantifier(idx=fbrule.Idx(), start=fbrule.Start(), stop=fbrule.Stop(), children=children)
elif rule_type == FBRuleType.UnparserRuleQuantifiedType:
rule = UnparserRuleQuantified(children=children)
elif rule_type == FBRuleType.UnparserRuleAlternativeType:
rule = UnparserRuleAlternative(alt_idx=fbrule.AltIdx(), idx=fbrule.Idx(), children=children)
else:
assert False, f'Unexpected type {rule_type}'
return rule

try:
return readFBRule(FBRule.GetRootAs(bytearray(data)))
except struct.error:
return None
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ install_requires =
antlerinator>=1!3.0.0
antlr4-python3-runtime==4.13.1
autopep8
flatbuffers
inators
jinja2
regex
Expand Down
7 changes: 5 additions & 2 deletions tests/grammars/LifeCycle.g4
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023 Renata Hodovan, Akos Kiss.
* Copyright (c) 2023-2024 Renata Hodovan, Akos Kiss.
*
* Licensed under the BSD 3-Clause License
* <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
Expand Down Expand Up @@ -34,7 +34,10 @@
// TEST-GENERATE: {grammar}Generator.{grammar}Generator -j 1 -r start -n 3 --population {tmpdir}/population/j/ --tree-format json -o {tmpdir}/{grammar}JB%d.txt --keep-trees --no-generate --no-recombine
// TEST-GENERATE: {grammar}Generator.{grammar}Generator -j 1 -r start -n 3 --population {tmpdir}/population/j/ --tree-format json -o {tmpdir}/{grammar}JC%d.txt --keep-trees --no-generate --no-mutate
// TEST-GENERATE: {grammar}Generator.{grammar}Generator -j 2 -r start -n 6 --population {tmpdir}/population/j/ --tree-format json -o {tmpdir}/{grammar}JD%d.txt --no-generate

// TEST-PARSE: {grammar}.g4 -j 1 -i {tmpdir}/LifeCycleA0.txt {tmpdir}/LifeCycleA1.txt {tmpdir}/LifeCycleA2.txt -r start --hidden WS -o {tmpdir}/population/f/ --tree-format flatbuffers
// TEST-GENERATE: {grammar}Generator.{grammar}Generator -j 1 -r start -n 3 --population {tmpdir}/population/f/ --tree-format flatbuffers -o {tmpdir}/{grammar}FB%d.txt --keep-trees --no-generate --no-recombine
// TEST-GENERATE: {grammar}Generator.{grammar}Generator -j 1 -r start -n 3 --population {tmpdir}/population/f/ --tree-format flatbuffers -o {tmpdir}/{grammar}FC%d.txt --keep-trees --no-generate --no-mutate
// TEST-GENERATE: {grammar}Generator.{grammar}Generator -j 2 -r start -n 6 --population {tmpdir}/population/f/ --tree-format flatbuffers -o {tmpdir}/{grammar}FD%d.txt --no-generate
grammar LifeCycle;

start : TEST testType ;
Expand Down
Loading

0 comments on commit ab1bb72

Please sign in to comment.