diff --git a/parser/BUILD b/parser/BUILD index d2815af47..6c0f63bcf 100644 --- a/parser/BUILD +++ b/parser/BUILD @@ -51,6 +51,7 @@ cc_library( "//parser/internal:cel_cc_parser", "@antlr4_runtimes//:cpp", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/container:btree", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/functional:overload", diff --git a/parser/options.h b/parser/options.h index 892a406fd..ffecc9692 100644 --- a/parser/options.h +++ b/parser/options.h @@ -59,6 +59,31 @@ struct ParserOptions final { // // Limited to field specifiers in select and message creation. bool enable_quoted_identifiers = false; + + // Enables support for the cel.annotate macro. + // + // Annotations are normally injected by higher level CEL tools to provide + // additional metadata about how to interpret or analyze the expression. This + // macro is intended for adding annotations in the source expression, using + // the same internal mechanisms as annotations added by tools. + // + // The macro takes two arguments: + // + // 1. The expression to annotate. + // 2. A list of annotations to apply to the expression. + // + // example: + // cel.annotate(foo.bar in baz, + // [cel.Annotation{name: "com.example.Explain", + // inspect_only: true, + // value: "check if foo.bar is in baz"}] + // ) + // + // Permits the short hand if the annotation has no value: + // cel.annotate(foo.bar in baz, "com.example.MyAnnotation") + // + // The annotation is recorded in the source_info of the parsed expression. + bool enable_annotations = false; }; } // namespace cel diff --git a/parser/parser.cc b/parser/parser.cc index 1437f6613..7c6fadad1 100644 --- a/parser/parser.cc +++ b/parser/parser.cc @@ -32,6 +32,7 @@ #include "cel/expr/syntax.pb.h" #include "absl/base/macros.h" +#include "absl/base/nullability.h" #include "absl/base/optimization.h" #include "absl/container/btree_map.h" #include "absl/container/flat_hash_map.h" @@ -601,23 +602,151 @@ Expr ExpressionBalancer::BalancedTree(int lo, int hi) { return factory_.NewCall(ops_[mid], function_, std::move(arguments)); } +// Lightweight overlay for a registry. +// Adds stateful macros that are relevant per Parse call. +class AugmentedMacroRegistry { + public: + explicit AugmentedMacroRegistry(const cel::MacroRegistry& registry) + : base_(registry) {} + + cel::MacroRegistry& overlay() { return overlay_; } + + absl::optional FindMacro(absl::string_view name, size_t arg_count, + bool receiver_style) const; + + private: + const cel::MacroRegistry& base_; + cel::MacroRegistry overlay_; +}; + +absl::optional AugmentedMacroRegistry::FindMacro( + absl::string_view name, size_t arg_count, bool receiver_style) const { + auto result = overlay_.FindMacro(name, arg_count, receiver_style); + if (result.has_value()) { + return result; + } + + return base_.FindMacro(name, arg_count, receiver_style); +} + +bool IsSupportedAnnotation(const Expr& e) { + if (e.has_const_expr() && e.const_expr().has_string_value()) { + return true; + } else if (e.has_struct_expr() && + e.struct_expr().name() == "cel.Annotation") { + for (const auto& field : e.struct_expr().fields()) { + if (field.name() != "name" && field.name() != "inspect_only" && + field.name() != "value") { + return false; + } + } + return true; + } + return false; +} + +class AnnotationCollector { + private: + struct AnnotationRep { + Expr expr; + }; + + struct MacroImpl { + absl::Nonnull parent; + + // Record a single annotation. Returns a non-empty optional if + // an error is encountered. + absl::optional RecordAnnotation(cel::MacroExprFactory& mef, + int64_t id, Expr e) const; + + // MacroExpander for "cel.annotate" + absl::optional operator()(cel::MacroExprFactory& mef, Expr& target, + absl::Span args) const; + }; + + void Add(int64_t annotated_expr, Expr value); + + public: + const absl::btree_map>& annotations() { + return annotations_; + } + + absl::btree_map> consume_annotations() { + using std::swap; + absl::btree_map> result; + swap(result, annotations_); + return result; + } + + Macro MakeAnnotationImpl() { + auto impl = Macro::Receiver("annotate", 2, MacroImpl{this}); + ABSL_CHECK_OK(impl.status()); + return std::move(impl).value(); + } + + private: + absl::btree_map> annotations_; +}; + +absl::optional AnnotationCollector::MacroImpl::RecordAnnotation( + cel::MacroExprFactory& mef, int64_t id, Expr e) const { + if (IsSupportedAnnotation(e)) { + parent->Add(id, std::move(e)); + return absl::nullopt; + } + + return mef.ReportErrorAt( + e, + "cel.annotate argument is not a cel.Annotation{} or string expression"); +} + +absl::optional AnnotationCollector::MacroImpl::operator()( + cel::MacroExprFactory& mef, Expr& target, absl::Span args) const { + if (!target.has_ident_expr() || target.ident_expr().name() != "cel") { + return absl::nullopt; + } + + if (args.size() != 2) { + return mef.ReportErrorAt( + target, "wrong number of arguments for cel.annotate macro"); + } + + // arg0 (the annotated expression) is the expansion result. The remainder are + // annotations to record. + int64_t id = args[0].id(); + + absl::optional result; + if (args[1].has_list_expr()) { + auto list = args[1].release_list_expr(); + for (auto& e : list.mutable_elements()) { + result = RecordAnnotation(mef, id, e.release_expr()); + if (result) { + break; + } + } + } else { + result = RecordAnnotation(mef, id, std::move(args[1])); + } + + if (result) { + return result; + } + + return std::move(args[0]); +} + +void AnnotationCollector::Add(int64_t annotated_expr, Expr value) { + annotations_[annotated_expr].push_back({std::move(value)}); +} + class ParserVisitor final : public CelBaseVisitor, public antlr4::BaseErrorListener { public: ParserVisitor(const cel::Source& source, int max_recursion_depth, absl::string_view accu_var, - const cel::MacroRegistry& macro_registry, - bool add_macro_calls = false, - bool enable_optional_syntax = false, - bool enable_quoted_identifiers = false) - : source_(source), - factory_(source_, accu_var), - macro_registry_(macro_registry), - recursion_depth_(0), - max_recursion_depth_(max_recursion_depth), - add_macro_calls_(add_macro_calls), - enable_optional_syntax_(enable_optional_syntax), - enable_quoted_identifiers_(enable_quoted_identifiers) {} + const cel::MacroRegistry& macro_registry, bool add_macro_calls, + bool enable_optional_syntax, bool enable_quoted_identifiers, + bool enable_annotations); ~ParserVisitor() override = default; @@ -675,6 +804,8 @@ class ParserVisitor final : public CelBaseVisitor, std::string ErrorMessage(); + Expr PackAnnotations(Expr ast); + private: template Expr GlobalCallOrMacro(int64_t expr_id, absl::string_view function, @@ -702,14 +833,38 @@ class ParserVisitor final : public CelBaseVisitor, private: const cel::Source& source_; cel::ParserMacroExprFactory factory_; - const cel::MacroRegistry& macro_registry_; + AugmentedMacroRegistry macro_registry_; + AnnotationCollector annotations_; int recursion_depth_; const int max_recursion_depth_; const bool add_macro_calls_; const bool enable_optional_syntax_; const bool enable_quoted_identifiers_; + const bool enable_annotations_; }; +ParserVisitor::ParserVisitor(const cel::Source& source, int max_recursion_depth, + absl::string_view accu_var, + const cel::MacroRegistry& macro_registry, + bool add_macro_calls, bool enable_optional_syntax, + bool enable_quoted_identifiers, + bool enable_annotations) + : source_(source), + factory_(source_, accu_var), + macro_registry_(macro_registry), + recursion_depth_(0), + max_recursion_depth_(max_recursion_depth), + add_macro_calls_(add_macro_calls), + enable_optional_syntax_(enable_optional_syntax), + enable_quoted_identifiers_(enable_quoted_identifiers), + enable_annotations_(enable_annotations) { + if (enable_annotations_) { + macro_registry_.overlay() + .RegisterMacro(annotations_.MakeAnnotationImpl()) + .IgnoreError(); + } +} + template ::value>> T* tree_as(antlr4::tree::ParseTree* tree) { @@ -1638,6 +1793,61 @@ struct ParseResult { EnrichedSourceInfo enriched_source_info; }; +Expr NormalizeAnnotation(cel::ParserMacroExprFactory& mef, Expr expr) { + if (expr.has_struct_expr()) { + return expr; + } + + if (expr.has_const_expr()) { + std::vector fields; + fields.reserve(2); + fields.push_back( + mef.NewStructField(mef.NextId({}), "name", std::move(expr))); + auto bool_const = mef.NewBoolConst(mef.NextId({}), true); + fields.push_back(mef.NewStructField(mef.NextId({}), "inspect_only", + std::move(bool_const))); + return mef.NewStruct(mef.NextId({}), "cel.Annotation", std::move(fields)); + } + + return mef.ReportError("invalid annotation encountered finalizing AST"); +} + +Expr ParserVisitor::PackAnnotations(Expr ast) { + if (annotations_.annotations().empty()) { + return ast; + } + + auto annotations = annotations_.consume_annotations(); + std::vector entries; + entries.reserve(annotations.size()); + + for (auto& annotation : annotations) { + std::vector annotation_values; + annotation_values.reserve(annotation.second.size()); + + for (auto& annotation_value : annotation.second) { + auto annotation = + NormalizeAnnotation(factory_, std::move(annotation_value.expr)); + annotation_values.push_back( + factory_.NewListElement(std::move(annotation))); + } + auto id = factory_.NewIntConst(factory_.NextId({}), annotation.first); + auto annotation_list = + factory_.NewList(factory_.NextId({}), std::move(annotation_values)); + entries.push_back(factory_.NewMapEntry(factory_.NextId({}), std::move(id), + std::move(annotation_list))); + } + + std::vector args; + args.push_back(std::move(ast)); + args.push_back(factory_.NewMap(factory_.NextId({}), std::move(entries))); + + auto result = + factory_.NewCall(factory_.NextId({}), "cel.@annotated", std::move(args)); + + return result; +} + absl::StatusOr ParseImpl(const cel::Source& source, const cel::MacroRegistry& registry, const ParserOptions& options) { @@ -1656,10 +1866,10 @@ absl::StatusOr ParseImpl(const cel::Source& source, if (options.enable_hidden_accumulator_var) { accu_var = cel::kHiddenAccumulatorVariableName; } - ParserVisitor visitor(source, options.max_recursion_depth, accu_var, - registry, options.add_macro_calls, - options.enable_optional_syntax, - options.enable_quoted_identifiers); + ParserVisitor visitor( + source, options.max_recursion_depth, accu_var, registry, + options.add_macro_calls, options.enable_optional_syntax, + options.enable_quoted_identifiers, options.enable_annotations); lexer.removeErrorListeners(); parser.removeErrorListeners(); @@ -1686,7 +1896,9 @@ absl::StatusOr ParseImpl(const cel::Source& source, if (visitor.HasErrored()) { return absl::InvalidArgumentError(visitor.ErrorMessage()); } - + if (options.enable_annotations) { + expr = visitor.PackAnnotations(std::move(expr)); + } return { ParseResult{.expr = std::move(expr), .source_info = visitor.GetSourceInfo(), diff --git a/parser/parser_test.cc b/parser/parser_test.cc index a29c62626..ce9ce5461 100644 --- a/parser/parser_test.cc +++ b/parser/parser_test.cc @@ -16,7 +16,6 @@ #include #include -#include #include #include @@ -54,11 +53,20 @@ using ::testing::HasSubstr; using ::testing::Not; struct TestInfo { - TestInfo(const std::string& I, const std::string& P, - const std::string& E = "", const std::string& L = "", - const std::string& R = "", const std::string& M = "") + TestInfo(absl::string_view I, absl::string_view P, absl::string_view E = "", + absl::string_view L = "", absl::string_view R = "", + absl::string_view M = "") : I(I), P(P), E(E), L(L), R(R), M(M) {} + static TestInfo MacroCallCase(absl::string_view I, absl::string_view P, + absl::string_view M) { + return TestInfo(I, P, /*E=*/"", /*L=*/"", /*R=*/"", M); + } + + static TestInfo ErrorCase(absl::string_view I, absl::string_view E) { + return TestInfo(I, /*P=*/"", E, /*L=*/"", /*R=*/"", /*M=*/""); + } + // I contains the input expression to be parsed. std::string I; @@ -1889,6 +1897,244 @@ TEST_P(UpdatedAccuVarDisabledTest, Parse) { } } +const std::vector& AnnotationsTestCases() { + static const std::vector* kInstance = new std::vector{ + TestInfo::MacroCallCase("cel.annotate(" + " foo.bar," + " 'com.example.SimpleAnnotation'" + ")", + R"( +cel.@annotated( + foo^#3:Expr.Ident#.bar^#4:Expr.Select#, + { + 4^#10:int64#:[ + cel.Annotation{ + name:"com.example.SimpleAnnotation"^#5:string#^#6:Expr.CreateStruct.Entry#, + inspect_only:true^#7:bool#^#8:Expr.CreateStruct.Entry# + }^#9:Expr.CreateStruct# + ]^#11:Expr.CreateList#^#12:Expr.CreateStruct.Entry# + }^#13:Expr.CreateStruct# +)^#14:Expr.Call#)", + "cel^#1:Expr.Ident#.annotate(\n" + " foo^#3:Expr.Ident#.bar^#4:annotate#,\n" + " \"com.example.SimpleAnnotation\"^#5:string#\n" + ")^#4:annotate"), + TestInfo::MacroCallCase( + R"cel( + cel.annotate( + foo.bar, + 'com.example.SimpleAnnotation') || + cel.annotate( + foo.baz, + 'com.example.MyOtherAnnotation'))cel", + R"( +cel.@annotated( + _||_( + foo^#3:Expr.Ident#.bar^#4:Expr.Select#, + foo^#8:Expr.Ident#.baz^#9:Expr.Select# + )^#11:Expr.Call#, + { + 4^#16:int64#:[ + cel.Annotation{ + name:"com.example.SimpleAnnotation"^#5:string#^#12:Expr.CreateStruct.Entry#, + inspect_only:true^#13:bool#^#14:Expr.CreateStruct.Entry# + }^#15:Expr.CreateStruct# + ]^#17:Expr.CreateList#^#18:Expr.CreateStruct.Entry#, + 9^#23:int64#:[ + cel.Annotation{ + name:"com.example.MyOtherAnnotation"^#10:string#^#19:Expr.CreateStruct.Entry#, + inspect_only:true^#20:bool#^#21:Expr.CreateStruct.Entry# + }^#22:Expr.CreateStruct# + ]^#24:Expr.CreateList#^#25:Expr.CreateStruct.Entry# + }^#26:Expr.CreateStruct# +)^#27:Expr.Call#)", + /*M=*/ + "cel^#6:Expr.Ident#.annotate(\n" + " foo^#8:Expr.Ident#.baz^#9:annotate#,\n" + " \"com.example.MyOtherAnnotation\"^#10:string#\n" + ")^#9:annotate#,\n" + "cel^#1:Expr.Ident#.annotate(\n" + " foo^#3:Expr.Ident#.bar^#4:annotate#,\n" + " \"com.example.SimpleAnnotation\"^#5:string#\n" + ")^#4:annotate"), + TestInfo::MacroCallCase(R"cel( + cel.annotate( + foo.bar, + ['com.example.SimpleAnnotation', + 'com.example.MyOtherAnnotation'] + ))cel", + + /*P=*/R"( +cel.@annotated( + foo^#3:Expr.Ident#.bar^#4:Expr.Select#, + { + 4^#16:int64#:[ + cel.Annotation{ + name:"com.example.SimpleAnnotation"^#6:string#^#8:Expr.CreateStruct.Entry#, + inspect_only:true^#9:bool#^#10:Expr.CreateStruct.Entry# + }^#11:Expr.CreateStruct#, + cel.Annotation{ + name:"com.example.MyOtherAnnotation"^#7:string#^#12:Expr.CreateStruct.Entry#, + inspect_only:true^#13:bool#^#14:Expr.CreateStruct.Entry# + }^#15:Expr.CreateStruct# + ]^#17:Expr.CreateList#^#18:Expr.CreateStruct.Entry# + }^#19:Expr.CreateStruct# +)^#20:Expr.Call#)", + + /*M=*/R"(cel^#1:Expr.Ident#.annotate( + foo^#3:Expr.Ident#.bar^#4:annotate#, + [ + "com.example.SimpleAnnotation"^#6:string#, + "com.example.MyOtherAnnotation"^#7:string# + ]^#5:Expr.CreateList# +)^#4:annotate)"), + TestInfo::MacroCallCase(R"cel( + cel.annotate( + baz in foo.bar, + cel.Annotation{ + name: 'com.example.Explainer', + value: "baz is in foo.bar." + cel.annotation_value ? " oh no" : "" + } + ))cel", + + R"( +cel.@annotated( + @in( + baz^#3:Expr.Ident#, + foo^#5:Expr.Ident#.bar^#6:Expr.Select# + )^#4:Expr.Call#, + { + 4^#18:int64#:[ + cel.Annotation{ + name:"com.example.Explainer"^#9:string#^#8:Expr.CreateStruct.Entry#, + value:_?_:_( + _+_( + "baz is in foo.bar."^#11:string#, + cel^#13:Expr.Ident#.annotation_value^#14:Expr.Select# + )^#12:Expr.Call#, + " oh no"^#16:string#, + ""^#17:string# + )^#15:Expr.Call#^#10:Expr.CreateStruct.Entry# + }^#7:Expr.CreateStruct# + ]^#19:Expr.CreateList#^#20:Expr.CreateStruct.Entry# + }^#21:Expr.CreateStruct# +)^#22:Expr.Call#)", + + /*M=*/R"(cel^#1:Expr.Ident#.annotate( + @in( + baz^#3:Expr.Ident#, + foo^#5:Expr.Ident#.bar^#6:Expr.Select# + )^#4:annotate#, + cel.Annotation{ + name:"com.example.Explainer"^#9:string#^#8:Expr.CreateStruct.Entry#, + value:_?_:_( + _+_( + "baz is in foo.bar."^#11:string#, + cel^#13:Expr.Ident#.annotation_value^#14:Expr.Select# + )^#12:Expr.Call#, + " oh no"^#16:string#, + ""^#17:string# + )^#15:Expr.Call#^#10:Expr.CreateStruct.Entry# + }^#7:Expr.CreateStruct# +)^#4:annotate)"), + + TestInfo::MacroCallCase(R"cel( + cel.annotate( + baz in foo.bar, + [ + cel.Annotation{ + name: 'com.example.Explainer', + value: "baz is in foo.bar. oh no" + }, + "com.example.SimpleAnnotation" + ] + ))cel", + + /*P=*/R"( +cel.@annotated( + @in( + baz^#3:Expr.Ident#, + foo^#5:Expr.Ident#.bar^#6:Expr.Select# + )^#4:Expr.Call#, + { + 4^#18:int64#:[ + cel.Annotation{ + name:"com.example.Explainer"^#10:string#^#9:Expr.CreateStruct.Entry#, + value:"baz is in foo.bar. oh no"^#12:string#^#11:Expr.CreateStruct.Entry# + }^#8:Expr.CreateStruct#, + cel.Annotation{ + name:"com.example.SimpleAnnotation"^#13:string#^#14:Expr.CreateStruct.Entry#, + inspect_only:true^#15:bool#^#16:Expr.CreateStruct.Entry# + }^#17:Expr.CreateStruct# + ]^#19:Expr.CreateList#^#20:Expr.CreateStruct.Entry# + }^#21:Expr.CreateStruct# +)^#22:Expr.Call#)", + + /*M=*/R"(cel^#1:Expr.Ident#.annotate( + @in( + baz^#3:Expr.Ident#, + foo^#5:Expr.Ident#.bar^#6:Expr.Select# + )^#4:annotate#, + [ + cel.Annotation{ + name:"com.example.Explainer"^#10:string#^#9:Expr.CreateStruct.Entry#, + value:"baz is in foo.bar. oh no"^#12:string#^#11:Expr.CreateStruct.Entry# + }^#8:Expr.CreateStruct#, + "com.example.SimpleAnnotation"^#13:string# + ]^#7:Expr.CreateList# +)^#4:annotate)")}; + + return *kInstance; +} + +class AnnotationsTest : public testing::TestWithParam {}; + +TEST_P(AnnotationsTest, Parse) { + const TestInfo& test_info = GetParam(); + ParserOptions options; + options.enable_annotations = true; + + if (!test_info.M.empty()) { + options.add_macro_calls = true; + } + + auto result = + EnrichedParse(test_info.I, Macro::AllMacros(), "", options); + if (test_info.E.empty()) { + EXPECT_THAT(result, IsOk()); + } else { + EXPECT_THAT(result, Not(IsOk())); + EXPECT_EQ(test_info.E, result.status().message()); + } + + if (!test_info.P.empty()) { + KindAndIdAdorner kind_and_id_adorner; + ExprPrinter w(kind_and_id_adorner); + std::string adorned_string = w.PrintProto(result->parsed_expr().expr()); + EXPECT_EQ(absl::StripAsciiWhitespace(test_info.P), adorned_string) + << result->parsed_expr(); + } + + if (!test_info.L.empty()) { + LocationAdorner location_adorner(result->parsed_expr().source_info()); + ExprPrinter w(location_adorner); + std::string adorned_string = w.PrintProto(result->parsed_expr().expr()); + EXPECT_EQ(test_info.L, adorned_string) << result->parsed_expr(); + } + + if (!test_info.R.empty()) { + EXPECT_EQ(test_info.R, ConvertEnrichedSourceInfoToString( + result->enriched_source_info())); + } + + if (!test_info.M.empty()) { + EXPECT_EQ( + absl::StripAsciiWhitespace(test_info.M), + ConvertMacroCallsToString(result.value().parsed_expr().source_info())) + << result->parsed_expr(); + } +} + TEST(NewParserBuilderTest, Defaults) { auto builder = cel::NewParserBuilder(); ASSERT_OK_AND_ASSIGN(auto parser, std::move(*builder).Build()); @@ -1954,5 +2200,8 @@ INSTANTIATE_TEST_SUITE_P(UpdatedAccuVarTest, UpdatedAccuVarDisabledTest, testing::ValuesIn(UpdatedAccuVarTestCases()), TestName); +INSTANTIATE_TEST_SUITE_P(AnnotationsTest, AnnotationsTest, + testing::ValuesIn(AnnotationsTestCases()), TestName); + } // namespace } // namespace google::api::expr::parser