Skip to content

Commit

Permalink
Parsing proto field types out of the .proto file
Browse files Browse the repository at this point in the history
Ultimately this became required in order to properly create the generated files. We actually already needed this but were using a hack where we assumed the message name was the same as the name defined in the frontmatter. This is the first step in order to get around that hack, and then also allow for the generator code to know what the proto field type is when creating generated code.
  • Loading branch information
AsherGlick committed Oct 31, 2023
1 parent 3351912 commit 7a1c5e5
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 3 deletions.
23 changes: 22 additions & 1 deletion xml_converter/generators/code_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from jinja2 import Template, FileSystemLoader, Environment
from jinja_helpers import UnindentBlocks
from schema import string_t, array_t, enum_t, union_t, union_partial_t, pattern_dictionary_t, object_t, boolean_t, DefType
from protobuf_types import get_proto_field_type


SchemaType = Dict[str, Any]
Expand Down Expand Up @@ -96,6 +97,7 @@ class FieldRow:
xml_attribute: str
alternate_xml_attributes: List[str]
binary_field: str
binary_field_type: str
data_type: str
usable_on_html: str
example: str
Expand Down Expand Up @@ -727,11 +729,19 @@ def generate_auto_docs(self, metadata: Dict[str, SchemaType], content: Dict[str,
)
# self.get_examples(fieldval['type'], fieldval['applies_to'], fieldval['xml_fieldsval'][0])

proto_field_type: str = ""
for marker_type in fieldval["applies_to"]:
proto_field_type = get_proto_field_type(marker_type, fieldval["protobuf_field"])
# TODO: catch discrepencies if the proto field types across
# different messages have differing types. This will be caught
# in the cpp code regardless.

field_rows.append(FieldRow(
name=fieldval["name"],
xml_attribute=fieldval["xml_fields"][0],
alternate_xml_attributes=fieldval["xml_fields"][1:],
binary_field=fieldval["protobuf_field"],
binary_field_type=proto_field_type,
data_type=fieldval["type"],
usable_on_html="<br>".join(fieldval["applies_to"]),
example=example,
Expand All @@ -743,11 +753,22 @@ def generate_auto_docs(self, metadata: Dict[str, SchemaType], content: Dict[str,

if fieldval['type'] == "CompoundValue":
for component_field in fieldval["components"]:

binary_field_name = fieldval["protobuf_field"] + "." + component_field["protobuf_field"]

component_field_type: str = ""
for marker_type in fieldval["applies_to"]:
component_field_type = get_proto_field_type(marker_type, binary_field_name)
# TODO: catch discrepencies if the proto field types across
# different messages have differing types. This will be caught
# in the cpp code regardless.

field_rows.append(FieldRow(
name=component_field["name"],
xml_attribute=component_field["xml_fields"][0],
alternate_xml_attributes=component_field["xml_fields"][1:],
binary_field=fieldval["protobuf_field"] + "." + component_field["protobuf_field"],
binary_field=binary_field_name,
binary_field_type=component_field_type,
data_type=component_field["type"],
usable_on_html="<br>".join(fieldval["applies_to"]),
example=self.build_example(
Expand Down
138 changes: 138 additions & 0 deletions xml_converter/generators/protobuf_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from lark import Lark, Transformer
from lark.lexer import Token
from typing import List

################################################################################
# This module parses a proto definition file with the goal of identifying the
# proto field types based on the proto field name. It uses a lark grammar to
# parse the proto file. The grammar is a stringy definition which makes typing
# harder. Most type checks are ignored in this file, but hopefully we will be
# able to add them back in over time once we can figure them out.
################################################################################

parser = Lark(
grammar="""?start: syntax_directive? package_directive? (declaration)*
syntax_directive: "syntax" "=" "\\"proto3\\"" ";"
package_directive: "package" dotted_identifier ";"
declaration: message | enum
message: "message" CNAME "{" message_body "}"
message_body: (field | enum)*
enum: "enum" CNAME "{" enum_body "}"
enum_body: (enum_value)*
enum_value: CNAME "=" SIGNED_INT ";"
field: (repeated_type | type) CNAME "=" SIGNED_INT ";"
repeated_type: "repeated" type
type: CNAME
dotted_identifier: CNAME ("." CNAME)*
LINE_COMMENT: /\\/\\/[^\\n]*/
%import common.CNAME
%import common.WS
%import common.SIGNED_INT
%ignore WS
%ignore LINE_COMMENT""",
start='start',
parser='lalr'
)


# Define transformer
class ProtoDictTransformer(Transformer): # type: ignore
def start(self, items: List): # type: ignore
messages = {}
for item in items:
if type(item) is dict:
messages.update(item)
elif item is None:
pass
else:
print(item,)
return messages

def package_directive(self, items): # type: ignore
return {"__package__": items[0]}

def dotted_identifier(self, items): # type: ignore
return items

# Ignore the syntax directive
def syntax_directive(self, items) -> None: # type: ignore
return None

# Ignore enums
def enum(self, items) -> None: # type: ignore
return None

def declaration(self, items): # type: ignore
if len(items) == 0:
return "ERROR"
if len(items) > 1:
print("Got more then one declaration, the grammar may be bugged", items)
return items[0]

def message(self, items): # type: ignore
name, body = items
return {name: body}

def message_body(self, items): # type: ignore
return {k: v for d in items for k, v in d.items()}

def field(self, items): # type: ignore
# Check for repeated type
if len(items) == 3:
type_name, field_name, index = items
return {field_name: type_name}
print("unknown field syntax, the grammer may be bugged")
return {}

def CNAME(self, item: Token): # type: ignore
return item.value

def type(self, items: List): # type: ignore
if len(items) == 0:
return "ERROR"
if len(items) > 1:
print("Got more then one type, the grammar may be bugged", items)
return items[0]

# repeated_type has type tokens which get processed by type() into strings
def repeated_type(self, items: List[str]) -> str:
if len(items) == 0:
return "ERROR"
if len(items) > 1:
print("Got more then one repeated type, the grammar may be bugged", items)
return "REPEATED[" + items[0] + "]"


def proto_to_dict(proto_str): # type: ignore
tree = parser.parse(proto_str)
transformer = ProtoDictTransformer()
return transformer.transform(tree)


################################################################################
# Gets all of the field types of the proto.
################################################################################
with open("../proto/waypoint.proto") as f:
proto_field_types = proto_to_dict(f.read()) # type: ignore


def get_proto_field_type(message: str, field: str) -> str:

field_type = message
multipart_field = field.split(".")

for field in multipart_field:
field_type = proto_field_types[field_type][field]

return field_type
3 changes: 2 additions & 1 deletion xml_converter/generators/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ PyYAML==5.1
tomli==2.0.1
types-Markdown==3.4.0
types-PyYAML==6.0.10
typing_extensions==4.3.0
typing_extensions==4.3.0
lark==1.1.8
2 changes: 1 addition & 1 deletion xml_converter/generators/web_templates/infotable.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ <h2 id="{{field_row.binary_field}}">{{field_row.name}}</h2>
<tr>
<td><a href="#{{field_row.binary_field}}">{% if field_row.is_sub_field %}&#8627; {% endif %}{{field_row.name}}</a></td>
<td>{{field_row.xml_attribute}}{% for alternate in field_row.alternate_xml_attributes %}, {{alternate}}{% endfor %}</td>
<td>{{field_row.binary_field}}</td>
<td>{{field_row.binary_field}} ({{field_row.binary_field_type}})</td>
<td>{{field_row.data_type}}</td>
<td>{{field_row.usable_on_html}}</td>
</tr>
Expand Down

0 comments on commit 7a1c5e5

Please sign in to comment.