AsherGlick · AsherGlick · Nov 3, 2023 · Oct 31, 2023
diff --git a/xml_converter/generators/code_generator.py b/xml_converter/generators/code_generator.py
@@ -8,6 +8,7 @@
 from jinja2 import Template, FileSystemLoader, Environment
 from jinja_helpers import UnindentBlocks
 from schema import string_t, array_t, enum_t, union_t, union_partial_t, pattern_dictionary_t, object_t, boolean_t, DefType
+from protobuf_types import get_proto_field_type
 
 
 SchemaType = Dict[str, Any]
@@ -96,6 +97,7 @@ class FieldRow:
     xml_attribute: str
     alternate_xml_attributes: List[str]
     binary_field: str
+    binary_field_type: str
     data_type: str
     usable_on_html: str
     example: str
@@ -727,11 +729,19 @@ def generate_auto_docs(self, metadata: Dict[str, SchemaType], content: Dict[str,
                 )
                 # self.get_examples(fieldval['type'], fieldval['applies_to'], fieldval['xml_fieldsval'][0])
 
+            proto_field_type: str = ""
+            for marker_type in fieldval["applies_to"]:
+                proto_field_type = get_proto_field_type(marker_type, fieldval["protobuf_field"])
+                # TODO: catch discrepencies if the proto field types across
+                # different messages have differing types. This will be caught
+                # in the cpp code regardless.
+
             field_rows.append(FieldRow(
                 name=fieldval["name"],
                 xml_attribute=fieldval["xml_fields"][0],
                 alternate_xml_attributes=fieldval["xml_fields"][1:],
                 binary_field=fieldval["protobuf_field"],
+                binary_field_type=proto_field_type,
                 data_type=fieldval["type"],
                 usable_on_html="<br>".join(fieldval["applies_to"]),
                 example=example,
@@ -743,11 +753,22 @@ def generate_auto_docs(self, metadata: Dict[str, SchemaType], content: Dict[str,
 
             if fieldval['type'] == "CompoundValue":
                 for component_field in fieldval["components"]:
+
+                    binary_field_name = fieldval["protobuf_field"] + "." + component_field["protobuf_field"]
+
+                    component_field_type: str = ""
+                    for marker_type in fieldval["applies_to"]:
+                        component_field_type = get_proto_field_type(marker_type, binary_field_name)
+                        # TODO: catch discrepencies if the proto field types across
+                        # different messages have differing types. This will be caught
+                        # in the cpp code regardless.
+
                     field_rows.append(FieldRow(
                         name=component_field["name"],
                         xml_attribute=component_field["xml_fields"][0],
                         alternate_xml_attributes=component_field["xml_fields"][1:],
-                        binary_field=fieldval["protobuf_field"] + "." + component_field["protobuf_field"],
+                        binary_field=binary_field_name,
+                        binary_field_type=component_field_type,
                         data_type=component_field["type"],
                         usable_on_html="<br>".join(fieldval["applies_to"]),
                         example=self.build_example(

diff --git a/xml_converter/generators/protobuf_types.py b/xml_converter/generators/protobuf_types.py
@@ -0,0 +1,138 @@
+from lark import Lark, Transformer
+from lark.lexer import Token
+from typing import List
+
+################################################################################
+# This module parses a proto definition file with the goal of identifying the
+# proto field types based on the proto field name. It uses a lark grammar to
+# parse the proto file. The grammar is a stringy definition which makes typing
+# harder. Most type checks are ignored in this file, but hopefully we will be
+# able to add them back in over time once we can figure them out.
+################################################################################
+
+parser = Lark(
+    grammar="""?start: syntax_directive? package_directive? (declaration)*
+
+        syntax_directive: "syntax" "=" "\\"proto3\\"" ";"
+        package_directive: "package" dotted_identifier ";"
+
+        declaration: message | enum
+
+        message: "message" CNAME "{" message_body "}"
+        message_body: (field | enum)*
+
+        enum: "enum" CNAME "{" enum_body "}"
+        enum_body: (enum_value)*
+
+        enum_value: CNAME "=" SIGNED_INT ";"
+
+        field: (repeated_type | type) CNAME "=" SIGNED_INT ";"
+
+        repeated_type: "repeated" type
+
+        type: CNAME
+
+        dotted_identifier: CNAME ("." CNAME)*
+
+        LINE_COMMENT: /\\/\\/[^\\n]*/
+
+        %import common.CNAME
+        %import common.WS
+        %import common.SIGNED_INT
+        %ignore WS
+        %ignore LINE_COMMENT""",
+    start='start',
+    parser='lalr'
+)
+
+
+# Define transformer
+class ProtoDictTransformer(Transformer):  # type: ignore
+    def start(self, items: List):  # type: ignore
+        messages = {}
+        for item in items:
+            if type(item) is dict:
+                messages.update(item)
+            elif item is None:
+                pass
+            else:
+                print(item,)
+        return messages
+
+    def package_directive(self, items):  # type: ignore
+        return {"__package__": items[0]}
+
+    def dotted_identifier(self, items):  # type: ignore
+        return items
+
+    # Ignore the syntax directive
+    def syntax_directive(self, items) -> None:  # type: ignore
+        return None
+
+    # Ignore enums
+    def enum(self, items) -> None:  # type: ignore
+        return None
+
+    def declaration(self, items):  # type: ignore
+        if len(items) == 0:
+            return "ERROR"
+        if len(items) > 1:
+            print("Got more then one declaration, the grammar may be bugged", items)
+        return items[0]
+
+    def message(self, items):  # type: ignore
+        name, body = items
+        return {name: body}
+
+    def message_body(self, items):  # type: ignore
+        return {k: v for d in items for k, v in d.items()}
+
+    def field(self, items):  # type: ignore
+        # Check for repeated type
+        if len(items) == 3:
+            type_name, field_name, index = items
+            return {field_name: type_name}
+        print("unknown field syntax, the grammer may be bugged")
+        return {}
+
+    def CNAME(self, item: Token):  # type: ignore
+        return item.value
+
+    def type(self, items: List):  # type: ignore
+        if len(items) == 0:
+            return "ERROR"
+        if len(items) > 1:
+            print("Got more then one type, the grammar may be bugged", items)
+        return items[0]
+
+    # repeated_type has type tokens which get processed by type() into strings
+    def repeated_type(self, items: List[str]) -> str:
+        if len(items) == 0:
+            return "ERROR"
+        if len(items) > 1:
+            print("Got more then one repeated type, the grammar may be bugged", items)
+        return "REPEATED[" + items[0] + "]"
+
+
+def proto_to_dict(proto_str):  # type: ignore
+    tree = parser.parse(proto_str)
+    transformer = ProtoDictTransformer()
+    return transformer.transform(tree)
+
+
+################################################################################
+# Gets all of the field types of the proto.
+################################################################################
+with open("../proto/waypoint.proto") as f:
+    proto_field_types = proto_to_dict(f.read())  # type: ignore
+
+
+def get_proto_field_type(message: str, field: str) -> str:
+
+    field_type = message
+    multipart_field = field.split(".")
+
+    for field in multipart_field:
+        field_type = proto_field_types[field_type][field]
+
+    return field_type
diff --git a/xml_converter/generators/requirements.txt b/xml_converter/generators/requirements.txt
@@ -15,4 +15,5 @@ PyYAML==5.1
 tomli==2.0.1
 types-Markdown==3.4.0
 types-PyYAML==6.0.10
-typing_extensions==4.3.0
+typing_extensions==4.3.0
+lark==1.1.8
diff --git a/xml_converter/generators/web_templates/infotable.html b/xml_converter/generators/web_templates/infotable.html
@@ -34,7 +34,7 @@ <h2 id="{{field_row.binary_field}}">{{field_row.name}}</h2>
             <tr>
                 <td><a href="#{{field_row.binary_field}}">{% if field_row.is_sub_field %}&#8627; {% endif %}{{field_row.name}}</a></td>
                 <td>{{field_row.xml_attribute}}{% for alternate in field_row.alternate_xml_attributes %}, {{alternate}}{% endfor %}</td>
-                <td>{{field_row.binary_field}}</td>
+                <td>{{field_row.binary_field}} ({{field_row.binary_field_type}})</td>
                 <td>{{field_row.data_type}}</td>
                 <td>{{field_row.usable_on_html}}</td>
             </tr>