Skip to content

Commit

Permalink
Merge pull request #194 from AsherGlick/proto_field_types
Browse files Browse the repository at this point in the history
Parsing proto field types out of the .proto file
  • Loading branch information
AsherGlick authored Nov 3, 2023
2 parents 1c8526d + 7a1c5e5 commit be480f2
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 3 deletions.
23 changes: 22 additions & 1 deletion xml_converter/generators/code_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from jinja2 import Template, FileSystemLoader, Environment
from jinja_helpers import UnindentBlocks
from schema import string_t, array_t, enum_t, union_t, union_partial_t, pattern_dictionary_t, object_t, boolean_t, DefType
from protobuf_types import get_proto_field_type


SchemaType = Dict[str, Any]
Expand Down Expand Up @@ -96,6 +97,7 @@ class FieldRow:
xml_attribute: str
alternate_xml_attributes: List[str]
binary_field: str
binary_field_type: str
data_type: str
usable_on_html: str
example: str
Expand Down Expand Up @@ -727,11 +729,19 @@ def generate_auto_docs(self, metadata: Dict[str, SchemaType], content: Dict[str,
)
# self.get_examples(fieldval['type'], fieldval['applies_to'], fieldval['xml_fieldsval'][0])

proto_field_type: str = ""
for marker_type in fieldval["applies_to"]:
proto_field_type = get_proto_field_type(marker_type, fieldval["protobuf_field"])
# TODO: catch discrepencies if the proto field types across
# different messages have differing types. This will be caught
# in the cpp code regardless.

field_rows.append(FieldRow(
name=fieldval["name"],
xml_attribute=fieldval["xml_fields"][0],
alternate_xml_attributes=fieldval["xml_fields"][1:],
binary_field=fieldval["protobuf_field"],
binary_field_type=proto_field_type,
data_type=fieldval["type"],
usable_on_html="<br>".join(fieldval["applies_to"]),
example=example,
Expand All @@ -743,11 +753,22 @@ def generate_auto_docs(self, metadata: Dict[str, SchemaType], content: Dict[str,

if fieldval['type'] == "CompoundValue":
for component_field in fieldval["components"]:

binary_field_name = fieldval["protobuf_field"] + "." + component_field["protobuf_field"]

component_field_type: str = ""
for marker_type in fieldval["applies_to"]:
component_field_type = get_proto_field_type(marker_type, binary_field_name)
# TODO: catch discrepencies if the proto field types across
# different messages have differing types. This will be caught
# in the cpp code regardless.

field_rows.append(FieldRow(
name=component_field["name"],
xml_attribute=component_field["xml_fields"][0],
alternate_xml_attributes=component_field["xml_fields"][1:],
binary_field=fieldval["protobuf_field"] + "." + component_field["protobuf_field"],
binary_field=binary_field_name,
binary_field_type=component_field_type,
data_type=component_field["type"],
usable_on_html="<br>".join(fieldval["applies_to"]),
example=self.build_example(
Expand Down
138 changes: 138 additions & 0 deletions xml_converter/generators/protobuf_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from lark import Lark, Transformer
from lark.lexer import Token
from typing import List

################################################################################
# This module parses a proto definition file with the goal of identifying the
# proto field types based on the proto field name. It uses a lark grammar to
# parse the proto file. The grammar is a stringy definition which makes typing
# harder. Most type checks are ignored in this file, but hopefully we will be
# able to add them back in over time once we can figure them out.
################################################################################

parser = Lark(
grammar="""?start: syntax_directive? package_directive? (declaration)*
syntax_directive: "syntax" "=" "\\"proto3\\"" ";"
package_directive: "package" dotted_identifier ";"
declaration: message | enum
message: "message" CNAME "{" message_body "}"
message_body: (field | enum)*
enum: "enum" CNAME "{" enum_body "}"
enum_body: (enum_value)*
enum_value: CNAME "=" SIGNED_INT ";"
field: (repeated_type | type) CNAME "=" SIGNED_INT ";"
repeated_type: "repeated" type
type: CNAME
dotted_identifier: CNAME ("." CNAME)*
LINE_COMMENT: /\\/\\/[^\\n]*/
%import common.CNAME
%import common.WS
%import common.SIGNED_INT
%ignore WS
%ignore LINE_COMMENT""",
start='start',
parser='lalr'
)


# Define transformer
class ProtoDictTransformer(Transformer): # type: ignore
def start(self, items: List): # type: ignore
messages = {}
for item in items:
if type(item) is dict:
messages.update(item)
elif item is None:
pass
else:
print(item,)
return messages

def package_directive(self, items): # type: ignore
return {"__package__": items[0]}

def dotted_identifier(self, items): # type: ignore
return items

# Ignore the syntax directive
def syntax_directive(self, items) -> None: # type: ignore
return None

# Ignore enums
def enum(self, items) -> None: # type: ignore
return None

def declaration(self, items): # type: ignore
if len(items) == 0:
return "ERROR"
if len(items) > 1:
print("Got more then one declaration, the grammar may be bugged", items)
return items[0]

def message(self, items): # type: ignore
name, body = items
return {name: body}

def message_body(self, items): # type: ignore
return {k: v for d in items for k, v in d.items()}

def field(self, items): # type: ignore
# Check for repeated type
if len(items) == 3:
type_name, field_name, index = items
return {field_name: type_name}
print("unknown field syntax, the grammer may be bugged")
return {}

def CNAME(self, item: Token): # type: ignore
return item.value

def type(self, items: List): # type: ignore
if len(items) == 0:
return "ERROR"
if len(items) > 1:
print("Got more then one type, the grammar may be bugged", items)
return items[0]

# repeated_type has type tokens which get processed by type() into strings
def repeated_type(self, items: List[str]) -> str:
if len(items) == 0:
return "ERROR"
if len(items) > 1:
print("Got more then one repeated type, the grammar may be bugged", items)
return "REPEATED[" + items[0] + "]"


def proto_to_dict(proto_str): # type: ignore
tree = parser.parse(proto_str)
transformer = ProtoDictTransformer()
return transformer.transform(tree)


################################################################################
# Gets all of the field types of the proto.
################################################################################
with open("../proto/waypoint.proto") as f:
proto_field_types = proto_to_dict(f.read()) # type: ignore


def get_proto_field_type(message: str, field: str) -> str:

field_type = message
multipart_field = field.split(".")

for field in multipart_field:
field_type = proto_field_types[field_type][field]

return field_type
3 changes: 2 additions & 1 deletion xml_converter/generators/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ PyYAML==5.1
tomli==2.0.1
types-Markdown==3.4.0
types-PyYAML==6.0.10
typing_extensions==4.3.0
typing_extensions==4.3.0
lark==1.1.8
2 changes: 1 addition & 1 deletion xml_converter/generators/web_templates/infotable.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ <h2 id="{{field_row.binary_field}}">{{field_row.name}}</h2>
<tr>
<td><a href="#{{field_row.binary_field}}">{% if field_row.is_sub_field %}&#8627; {% endif %}{{field_row.name}}</a></td>
<td>{{field_row.xml_attribute}}{% for alternate in field_row.alternate_xml_attributes %}, {{alternate}}{% endfor %}</td>
<td>{{field_row.binary_field}}</td>
<td>{{field_row.binary_field}} ({{field_row.binary_field_type}})</td>
<td>{{field_row.data_type}}</td>
<td>{{field_row.usable_on_html}}</td>
</tr>
Expand Down

0 comments on commit be480f2

Please sign in to comment.