Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parsing proto field types out of the .proto file #194

Merged
merged 1 commit into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion xml_converter/generators/code_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from jinja2 import Template, FileSystemLoader, Environment
from jinja_helpers import UnindentBlocks
from schema import string_t, array_t, enum_t, union_t, union_partial_t, pattern_dictionary_t, object_t, boolean_t, DefType
from protobuf_types import get_proto_field_type


SchemaType = Dict[str, Any]
Expand Down Expand Up @@ -96,6 +97,7 @@ class FieldRow:
xml_attribute: str
alternate_xml_attributes: List[str]
binary_field: str
binary_field_type: str
data_type: str
usable_on_html: str
example: str
Expand Down Expand Up @@ -727,11 +729,19 @@ def generate_auto_docs(self, metadata: Dict[str, SchemaType], content: Dict[str,
)
# self.get_examples(fieldval['type'], fieldval['applies_to'], fieldval['xml_fieldsval'][0])

proto_field_type: str = ""
for marker_type in fieldval["applies_to"]:
proto_field_type = get_proto_field_type(marker_type, fieldval["protobuf_field"])
# TODO: catch discrepencies if the proto field types across
# different messages have differing types. This will be caught
# in the cpp code regardless.

field_rows.append(FieldRow(
name=fieldval["name"],
xml_attribute=fieldval["xml_fields"][0],
alternate_xml_attributes=fieldval["xml_fields"][1:],
binary_field=fieldval["protobuf_field"],
binary_field_type=proto_field_type,
data_type=fieldval["type"],
usable_on_html="<br>".join(fieldval["applies_to"]),
example=example,
Expand All @@ -743,11 +753,22 @@ def generate_auto_docs(self, metadata: Dict[str, SchemaType], content: Dict[str,

if fieldval['type'] == "CompoundValue":
for component_field in fieldval["components"]:

binary_field_name = fieldval["protobuf_field"] + "." + component_field["protobuf_field"]

component_field_type: str = ""
for marker_type in fieldval["applies_to"]:
component_field_type = get_proto_field_type(marker_type, binary_field_name)
# TODO: catch discrepencies if the proto field types across
# different messages have differing types. This will be caught
# in the cpp code regardless.

field_rows.append(FieldRow(
name=component_field["name"],
xml_attribute=component_field["xml_fields"][0],
alternate_xml_attributes=component_field["xml_fields"][1:],
binary_field=fieldval["protobuf_field"] + "." + component_field["protobuf_field"],
binary_field=binary_field_name,
binary_field_type=component_field_type,
data_type=component_field["type"],
usable_on_html="<br>".join(fieldval["applies_to"]),
example=self.build_example(
Expand Down
138 changes: 138 additions & 0 deletions xml_converter/generators/protobuf_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from lark import Lark, Transformer
from lark.lexer import Token
from typing import List

################################################################################
# This module parses a proto definition file with the goal of identifying the
# proto field types based on the proto field name. It uses a lark grammar to
# parse the proto file. The grammar is a stringy definition which makes typing
# harder. Most type checks are ignored in this file, but hopefully we will be
# able to add them back in over time once we can figure them out.
################################################################################

parser = Lark(
grammar="""?start: syntax_directive? package_directive? (declaration)*

syntax_directive: "syntax" "=" "\\"proto3\\"" ";"
package_directive: "package" dotted_identifier ";"

declaration: message | enum

message: "message" CNAME "{" message_body "}"
message_body: (field | enum)*

enum: "enum" CNAME "{" enum_body "}"
enum_body: (enum_value)*

enum_value: CNAME "=" SIGNED_INT ";"

field: (repeated_type | type) CNAME "=" SIGNED_INT ";"

repeated_type: "repeated" type

type: CNAME

dotted_identifier: CNAME ("." CNAME)*

LINE_COMMENT: /\\/\\/[^\\n]*/

%import common.CNAME
%import common.WS
%import common.SIGNED_INT
%ignore WS
%ignore LINE_COMMENT""",
start='start',
parser='lalr'
)


# Define transformer
class ProtoDictTransformer(Transformer): # type: ignore
def start(self, items: List): # type: ignore
messages = {}
for item in items:
if type(item) is dict:
messages.update(item)
elif item is None:
pass
else:
print(item,)
return messages

def package_directive(self, items): # type: ignore
return {"__package__": items[0]}

def dotted_identifier(self, items): # type: ignore
return items

# Ignore the syntax directive
def syntax_directive(self, items) -> None: # type: ignore
return None

# Ignore enums
def enum(self, items) -> None: # type: ignore
return None

def declaration(self, items): # type: ignore
if len(items) == 0:
return "ERROR"
if len(items) > 1:
print("Got more then one declaration, the grammar may be bugged", items)
return items[0]

def message(self, items): # type: ignore
name, body = items
return {name: body}

def message_body(self, items): # type: ignore
return {k: v for d in items for k, v in d.items()}

def field(self, items): # type: ignore
# Check for repeated type
if len(items) == 3:
type_name, field_name, index = items
return {field_name: type_name}
print("unknown field syntax, the grammer may be bugged")
return {}

def CNAME(self, item: Token): # type: ignore
return item.value

def type(self, items: List): # type: ignore
if len(items) == 0:
return "ERROR"
if len(items) > 1:
print("Got more then one type, the grammar may be bugged", items)
return items[0]

# repeated_type has type tokens which get processed by type() into strings
def repeated_type(self, items: List[str]) -> str:
if len(items) == 0:
return "ERROR"
if len(items) > 1:
print("Got more then one repeated type, the grammar may be bugged", items)
return "REPEATED[" + items[0] + "]"


def proto_to_dict(proto_str): # type: ignore
tree = parser.parse(proto_str)
transformer = ProtoDictTransformer()
return transformer.transform(tree)


################################################################################
# Gets all of the field types of the proto.
################################################################################
with open("../proto/waypoint.proto") as f:
proto_field_types = proto_to_dict(f.read()) # type: ignore


def get_proto_field_type(message: str, field: str) -> str:

field_type = message
multipart_field = field.split(".")

for field in multipart_field:
field_type = proto_field_types[field_type][field]

return field_type
3 changes: 2 additions & 1 deletion xml_converter/generators/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ PyYAML==5.1
tomli==2.0.1
types-Markdown==3.4.0
types-PyYAML==6.0.10
typing_extensions==4.3.0
typing_extensions==4.3.0
lark==1.1.8
2 changes: 1 addition & 1 deletion xml_converter/generators/web_templates/infotable.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ <h2 id="{{field_row.binary_field}}">{{field_row.name}}</h2>
<tr>
<td><a href="#{{field_row.binary_field}}">{% if field_row.is_sub_field %}&#8627; {% endif %}{{field_row.name}}</a></td>
<td>{{field_row.xml_attribute}}{% for alternate in field_row.alternate_xml_attributes %}, {{alternate}}{% endfor %}</td>
<td>{{field_row.binary_field}}</td>
<td>{{field_row.binary_field}} ({{field_row.binary_field_type}})</td>
<td>{{field_row.data_type}}</td>
<td>{{field_row.usable_on_html}}</td>
</tr>
Expand Down