Parsing proto field types out of the .proto file

Ultimately this became required in order to properly create the generated files. We actually already needed this but were using a hack where we assumed the message name was the same as the name defined in the frontmatter. This is the first step in order to get around that hack, and then also allow for the generator code to know what the proto field type is when creating generated code.
AsherGlick · Oct 31, 2023 · 7a1c5e5 · 7a1c5e5
1 parent 3351912
commit 7a1c5e5
Show file tree

Hide file tree

Showing 4 changed files with 163 additions and 3 deletions.
diff --git a/xml_converter/generators/code_generator.py b/xml_converter/generators/code_generator.py
@@ -8,6 +8,7 @@
 from jinja2 import Template, FileSystemLoader, Environment
 from jinja_helpers import UnindentBlocks
 from schema import string_t, array_t, enum_t, union_t, union_partial_t, pattern_dictionary_t, object_t, boolean_t, DefType
+from protobuf_types import get_proto_field_type
 
 
 SchemaType = Dict[str, Any]
@@ -96,6 +97,7 @@ class FieldRow:
     xml_attribute: str
     alternate_xml_attributes: List[str]
     binary_field: str
+    binary_field_type: str
     data_type: str
     usable_on_html: str
     example: str
@@ -727,11 +729,19 @@ def generate_auto_docs(self, metadata: Dict[str, SchemaType], content: Dict[str,
                 )
                 # self.get_examples(fieldval['type'], fieldval['applies_to'], fieldval['xml_fieldsval'][0])
 
+            proto_field_type: str = ""
+            for marker_type in fieldval["applies_to"]:
+                proto_field_type = get_proto_field_type(marker_type, fieldval["protobuf_field"])
+                # TODO: catch discrepencies if the proto field types across
+                # different messages have differing types. This will be caught
+                # in the cpp code regardless.
+
             field_rows.append(FieldRow(
                 name=fieldval["name"],
                 xml_attribute=fieldval["xml_fields"][0],
                 alternate_xml_attributes=fieldval["xml_fields"][1:],
                 binary_field=fieldval["protobuf_field"],
+                binary_field_type=proto_field_type,
                 data_type=fieldval["type"],
                 usable_on_html="<br>".join(fieldval["applies_to"]),
                 example=example,
@@ -743,11 +753,22 @@ def generate_auto_docs(self, metadata: Dict[str, SchemaType], content: Dict[str,
 
             if fieldval['type'] == "CompoundValue":
                 for component_field in fieldval["components"]:
+
+                    binary_field_name = fieldval["protobuf_field"] + "." + component_field["protobuf_field"]
+
+                    component_field_type: str = ""
+                    for marker_type in fieldval["applies_to"]:
+                        component_field_type = get_proto_field_type(marker_type, binary_field_name)
+                        # TODO: catch discrepencies if the proto field types across
+                        # different messages have differing types. This will be caught
+                        # in the cpp code regardless.
+
                     field_rows.append(FieldRow(
                         name=component_field["name"],
                         xml_attribute=component_field["xml_fields"][0],
                         alternate_xml_attributes=component_field["xml_fields"][1:],
-                        binary_field=fieldval["protobuf_field"] + "." + component_field["protobuf_field"],
+                        binary_field=binary_field_name,
+                        binary_field_type=component_field_type,
                         data_type=component_field["type"],
                         usable_on_html="<br>".join(fieldval["applies_to"]),
                         example=self.build_example(

diff --git a/xml_converter/generators/protobuf_types.py b/xml_converter/generators/protobuf_types.py
@@ -0,0 +1,138 @@
+from lark import Lark, Transformer
+from lark.lexer import Token
+from typing import List
+
+################################################################################
+# This module parses a proto definition file with the goal of identifying the
+# proto field types based on the proto field name. It uses a lark grammar to
+# parse the proto file. The grammar is a stringy definition which makes typing
+# harder. Most type checks are ignored in this file, but hopefully we will be
+# able to add them back in over time once we can figure them out.
+################################################################################
+
+parser = Lark(
+    grammar="""?start: syntax_directive? package_directive? (declaration)*
+
+        syntax_directive: "syntax" "=" "\\"proto3\\"" ";"
+        package_directive: "package" dotted_identifier ";"
+
+        declaration: message | enum
+
+        message: "message" CNAME "{" message_body "}"
+        message_body: (field | enum)*
+
+        enum: "enum" CNAME "{" enum_body "}"
+        enum_body: (enum_value)*
+
+        enum_value: CNAME "=" SIGNED_INT ";"
+
+        field: (repeated_type | type) CNAME "=" SIGNED_INT ";"
+
+        repeated_type: "repeated" type
+
+        type: CNAME
+
+        dotted_identifier: CNAME ("." CNAME)*
+
+        LINE_COMMENT: /\\/\\/[^\\n]*/
+
+        %import common.CNAME
+        %import common.WS
+        %import common.SIGNED_INT
+        %ignore WS
+        %ignore LINE_COMMENT""",
+    start='start',
+    parser='lalr'
+)
+
+
+# Define transformer
+class ProtoDictTransformer(Transformer):  # type: ignore
+    def start(self, items: List):  # type: ignore
+        messages = {}
+        for item in items:
+            if type(item) is dict:
+                messages.update(item)
+            elif item is None:
+                pass
+            else:
+                print(item,)
+        return messages
+
+    def package_directive(self, items):  # type: ignore
+        return {"__package__": items[0]}
+
+    def dotted_identifier(self, items):  # type: ignore
+        return items
+
+    # Ignore the syntax directive
+    def syntax_directive(self, items) -> None:  # type: ignore
+        return None
+
+    # Ignore enums
+    def enum(self, items) -> None:  # type: ignore
+        return None
+
+    def declaration(self, items):  # type: ignore
+        if len(items) == 0:
+            return "ERROR"
+        if len(items) > 1:
+            print("Got more then one declaration, the grammar may be bugged", items)
+        return items[0]
+
+    def message(self, items):  # type: ignore
+        name, body = items
+        return {name: body}
+
+    def message_body(self, items):  # type: ignore
+        return {k: v for d in items for k, v in d.items()}
+
+    def field(self, items):  # type: ignore
+        # Check for repeated type
+        if len(items) == 3:
+            type_name, field_name, index = items
+            return {field_name: type_name}
+        print("unknown field syntax, the grammer may be bugged")
+        return {}
+
+    def CNAME(self, item: Token):  # type: ignore
+        return item.value
+
+    def type(self, items: List):  # type: ignore
+        if len(items) == 0:
+            return "ERROR"
+        if len(items) > 1:
+            print("Got more then one type, the grammar may be bugged", items)
+        return items[0]
+
+    # repeated_type has type tokens which get processed by type() into strings
+    def repeated_type(self, items: List[str]) -> str:
+        if len(items) == 0:
+            return "ERROR"
+        if len(items) > 1:
+            print("Got more then one repeated type, the grammar may be bugged", items)
+        return "REPEATED[" + items[0] + "]"
+
+
+def proto_to_dict(proto_str):  # type: ignore
+    tree = parser.parse(proto_str)
+    transformer = ProtoDictTransformer()
+    return transformer.transform(tree)
+
+
+################################################################################
+# Gets all of the field types of the proto.
+################################################################################
+with open("../proto/waypoint.proto") as f:
+    proto_field_types = proto_to_dict(f.read())  # type: ignore
+
+
+def get_proto_field_type(message: str, field: str) -> str:
+
+    field_type = message
+    multipart_field = field.split(".")
+
+    for field in multipart_field:
+        field_type = proto_field_types[field_type][field]
+
+    return field_type
diff --git a/xml_converter/generators/requirements.txt b/xml_converter/generators/requirements.txt
@@ -15,4 +15,5 @@ PyYAML==5.1
 tomli==2.0.1
 types-Markdown==3.4.0
 types-PyYAML==6.0.10
-typing_extensions==4.3.0
+typing_extensions==4.3.0
+lark==1.1.8
diff --git a/xml_converter/generators/web_templates/infotable.html b/xml_converter/generators/web_templates/infotable.html
@@ -34,7 +34,7 @@ <h2 id="{{field_row.binary_field}}">{{field_row.name}}</h2>
             <tr>
                 <td><a href="#{{field_row.binary_field}}">{% if field_row.is_sub_field %}&#8627; {% endif %}{{field_row.name}}</a></td>
                 <td>{{field_row.xml_attribute}}{% for alternate in field_row.alternate_xml_attributes %}, {{alternate}}{% endfor %}</td>
-                <td>{{field_row.binary_field}}</td>
+                <td>{{field_row.binary_field}} ({{field_row.binary_field_type}})</td>
                 <td>{{field_row.data_type}}</td>
                 <td>{{field_row.usable_on_html}}</td>
             </tr>