From 7a1c5e5070c3f916a9fb57b1e6a85898f3ae54db Mon Sep 17 00:00:00 2001
From: Asher Glick <asher.glick@gmail.com>
Date: Tue, 31 Oct 2023 16:51:07 -0500
Subject: [PATCH] Parsing proto field types out of the .proto file

Ultimately this became required in order to properly create the generated files. We actually already needed this but were using a hack where we assumed the message name was the same as the name defined in the frontmatter. This is the first step in order to get around that hack, and then also allow for the generator code to know what the proto field type is when creating generated code.
---
 xml_converter/generators/code_generator.py    |  23 ++-
 xml_converter/generators/protobuf_types.py    | 138 ++++++++++++++++++
 xml_converter/generators/requirements.txt     |   3 +-
 .../generators/web_templates/infotable.html   |   2 +-
 4 files changed, 163 insertions(+), 3 deletions(-)
 create mode 100644 xml_converter/generators/protobuf_types.py
diff --git a/xml_converter/generators/code_generator.py b/xml_converter/generators/code_generator.py
index 1e3cc815..ade3c76e 100644
--- a/xml_converter/generators/code_generator.py
+++ b/xml_converter/generators/code_generator.py
@@ -8,6 +8,7 @@
 from jinja2 import Template, FileSystemLoader, Environment
 from jinja_helpers import UnindentBlocks
 from schema import string_t, array_t, enum_t, union_t, union_partial_t, pattern_dictionary_t, object_t, boolean_t, DefType
+from protobuf_types import get_proto_field_type
 
 
 SchemaType = Dict[str, Any]
@@ -96,6 +97,7 @@ class FieldRow:
     xml_attribute: str
     alternate_xml_attributes: List[str]
     binary_field: str
+    binary_field_type: str
     data_type: str
     usable_on_html: str
     example: str
@@ -727,11 +729,19 @@ def generate_auto_docs(self, metadata: Dict[str, SchemaType], content: Dict[str,
                 )
                 # self.get_examples(fieldval['type'], fieldval['applies_to'], fieldval['xml_fieldsval'][0])
 
+            proto_field_type: str = ""
+            for marker_type in fieldval["applies_to"]:
+                proto_field_type = get_proto_field_type(marker_type, fieldval["protobuf_field"])
+                # TODO: catch discrepencies if the proto field types across
+                # different messages have differing types. This will be caught
+                # in the cpp code regardless.
+
             field_rows.append(FieldRow(
                 name=fieldval["name"],
                 xml_attribute=fieldval["xml_fields"][0],
                 alternate_xml_attributes=fieldval["xml_fields"][1:],
                 binary_field=fieldval["protobuf_field"],
+                binary_field_type=proto_field_type,
                 data_type=fieldval["type"],
                 usable_on_html="<br>".join(fieldval["applies_to"]),
                 example=example,
@@ -743,11 +753,22 @@ def generate_auto_docs(self, metadata: Dict[str, SchemaType], content: Dict[str,
 
             if fieldval['type'] == "CompoundValue":
                 for component_field in fieldval["components"]:
+
+                    binary_field_name = fieldval["protobuf_field"] + "." + component_field["protobuf_field"]
+
+                    component_field_type: str = ""
+                    for marker_type in fieldval["applies_to"]:
+                        component_field_type = get_proto_field_type(marker_type, binary_field_name)
+                        # TODO: catch discrepencies if the proto field types across
+                        # different messages have differing types. This will be caught
+                        # in the cpp code regardless.
+
                     field_rows.append(FieldRow(
                         name=component_field["name"],
                         xml_attribute=component_field["xml_fields"][0],
                         alternate_xml_attributes=component_field["xml_fields"][1:],
-                        binary_field=fieldval["protobuf_field"] + "." + component_field["protobuf_field"],
+                        binary_field=binary_field_name,
+                        binary_field_type=component_field_type,
                         data_type=component_field["type"],
                         usable_on_html="<br>".join(fieldval["applies_to"]),
                         example=self.build_example(
diff --git a/xml_converter/generators/protobuf_types.py b/xml_converter/generators/protobuf_types.py
new file mode 100644
index 00000000..69ecdce0
--- /dev/null
+++ b/xml_converter/generators/protobuf_types.py
@@ -0,0 +1,138 @@
+from lark import Lark, Transformer
+from lark.lexer import Token
+from typing import List
+
+################################################################################
+# This module parses a proto definition file with the goal of identifying the
+# proto field types based on the proto field name. It uses a lark grammar to
+# parse the proto file. The grammar is a stringy definition which makes typing
+# harder. Most type checks are ignored in this file, but hopefully we will be
+# able to add them back in over time once we can figure them out.
+################################################################################
+
+parser = Lark(
+    grammar="""?start: syntax_directive? package_directive? (declaration)*
+
+        syntax_directive: "syntax" "=" "\\"proto3\\"" ";"
+        package_directive: "package" dotted_identifier ";"
+
+        declaration: message | enum
+
+        message: "message" CNAME "{" message_body "}"
+        message_body: (field | enum)*
+
+        enum: "enum" CNAME "{" enum_body "}"
+        enum_body: (enum_value)*
+
+        enum_value: CNAME "=" SIGNED_INT ";"
+
+        field: (repeated_type | type) CNAME "=" SIGNED_INT ";"
+
+        repeated_type: "repeated" type
+
+        type: CNAME
+
+        dotted_identifier: CNAME ("." CNAME)*
+
+        LINE_COMMENT: /\\/\\/[^\\n]*/
+
+        %import common.CNAME
+        %import common.WS
+        %import common.SIGNED_INT
+        %ignore WS
+        %ignore LINE_COMMENT""",
+    start='start',
+    parser='lalr'
+)
+
+
+# Define transformer
+class ProtoDictTransformer(Transformer):  # type: ignore
+    def start(self, items: List):  # type: ignore
+        messages = {}
+        for item in items:
+            if type(item) is dict:
+                messages.update(item)
+            elif item is None:
+                pass
+            else:
+                print(item,)
+        return messages
+
+    def package_directive(self, items):  # type: ignore
+        return {"__package__": items[0]}
+
+    def dotted_identifier(self, items):  # type: ignore
+        return items
+
+    # Ignore the syntax directive
+    def syntax_directive(self, items) -> None:  # type: ignore
+        return None
+
+    # Ignore enums
+    def enum(self, items) -> None:  # type: ignore
+        return None
+
+    def declaration(self, items):  # type: ignore
+        if len(items) == 0:
+            return "ERROR"
+        if len(items) > 1:
+            print("Got more then one declaration, the grammar may be bugged", items)
+        return items[0]
+
+    def message(self, items):  # type: ignore
+        name, body = items
+        return {name: body}
+
+    def message_body(self, items):  # type: ignore
+        return {k: v for d in items for k, v in d.items()}
+
+    def field(self, items):  # type: ignore
+        # Check for repeated type
+        if len(items) == 3:
+            type_name, field_name, index = items
+            return {field_name: type_name}
+        print("unknown field syntax, the grammer may be bugged")
+        return {}
+
+    def CNAME(self, item: Token):  # type: ignore
+        return item.value
+
+    def type(self, items: List):  # type: ignore
+        if len(items) == 0:
+            return "ERROR"
+        if len(items) > 1:
+            print("Got more then one type, the grammar may be bugged", items)
+        return items[0]
+
+    # repeated_type has type tokens which get processed by type() into strings
+    def repeated_type(self, items: List[str]) -> str:
+        if len(items) == 0:
+            return "ERROR"
+        if len(items) > 1:
+            print("Got more then one repeated type, the grammar may be bugged", items)
+        return "REPEATED[" + items[0] + "]"
+
+
+def proto_to_dict(proto_str):  # type: ignore
+    tree = parser.parse(proto_str)
+    transformer = ProtoDictTransformer()
+    return transformer.transform(tree)
+
+
+################################################################################
+# Gets all of the field types of the proto.
+################################################################################
+with open("../proto/waypoint.proto") as f:
+    proto_field_types = proto_to_dict(f.read())  # type: ignore
+
+
+def get_proto_field_type(message: str, field: str) -> str:
+
+    field_type = message
+    multipart_field = field.split(".")
+
+    for field in multipart_field:
+        field_type = proto_field_types[field_type][field]
+
+    return field_type
diff --git a/xml_converter/generators/requirements.txt b/xml_converter/generators/requirements.txt
index 3a17b260..da2e2c05 100644
--- a/xml_converter/generators/requirements.txt
+++ b/xml_converter/generators/requirements.txt
@@ -15,4 +15,5 @@ PyYAML==5.1
 tomli==2.0.1
 types-Markdown==3.4.0
 types-PyYAML==6.0.10
-typing_extensions==4.3.0
\ No newline at end of file
+typing_extensions==4.3.0
+lark==1.1.8
\ No newline at end of file
diff --git a/xml_converter/generators/web_templates/infotable.html b/xml_converter/generators/web_templates/infotable.html
index 6bc9e25d..635e2f28 100644
--- a/xml_converter/generators/web_templates/infotable.html
+++ b/xml_converter/generators/web_templates/infotable.html
@@ -34,7 +34,7 @@ <h2 id="{{field_row.binary_field}}">{{field_row.name}}</h2>
             <tr>
                 <td><a href="#{{field_row.binary_field}}">{% if field_row.is_sub_field %}&#8627; {% endif %}{{field_row.name}}</a></td>
                 <td>{{field_row.xml_attribute}}{% for alternate in field_row.alternate_xml_attributes %}, {{alternate}}{% endfor %}</td>
-                <td>{{field_row.binary_field}}</td>
+                <td>{{field_row.binary_field}} ({{field_row.binary_field_type}})</td>
                 <td>{{field_row.data_type}}</td>
                 <td>{{field_row.usable_on_html}}</td>
             </tr>