[pkg/ottl] Change context inferrer to use functions and enums as hints (

#36869)  #### Description This PR is part of #29017, and a spin-off from #36820. It changes the existing context inferrer logic to also take into consideration the functions and enums used on the statements. (#36820 (comment)) New logic: - Find all `path`, function names(`editor`, `converter`), and `enumSymbol` on the statements - Pick the highest priority context (same existing logic) based on the `path.Context` values - If the chosen context does not support all used functions and enums, it goes through it's lower contexts (wide scope contexts that does support the chosen context as a path context) testing them and choosing the first one that supports them. - If no context that can handle the functions and enums is found, the inference fail and an empty value is returned. The parser collection was adapted to support the new context inferrer configuration requirements. **Other important changes:** Currently, it's possible to have paths to contexts root objects, for example: `set(attributes["body"], resource)`. Given `resource` has no dot separators on the path, the grammar extracts it into the `path.Fields` slice, letting the `path.Context` value empty. Why? This grammar behaviour is still necessary to keep backward compatibility with paths without context, otherwise it would start requiring contexts for all paths independently of the parser configuration. Previous PRs didn't take this edge case into consideration, and a few places needed to be changed to address it: - Context inferrer (`getContextCandidate`) - Parser `prependContextToStatementPaths` function. - Reusable OTTL contexts (`contexts/internal`) (not part of this PR, it will be fixed by #36820) When/If we reach the point to deprecate paths _without_ context, all those conditions can be removed, and the grammar changed to require and extract the `path` context properly.  #### Link to tracking issue #29017  #### Testing Unit tests  #### Documentation No changes
open-telemetry · Dec 19, 2024 · bc3d400 · bc3d400
1 parent 8058e0c
commit bc3d400
Show file tree

Hide file tree

Showing 6 changed files with 382 additions and 55 deletions.
diff --git a/pkg/ottl/context_inferrer.go b/pkg/ottl/context_inferrer.go
@@ -3,75 +3,234 @@
 
 package ottl // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
 
-import "math"
+import (
+	"cmp"
+	"math"
+	"slices"
+)
 
 var defaultContextInferPriority = []string{
 	"log",
-	"metric",
 	"datapoint",
+	"metric",
 	"spanevent",
 	"span",
 	"resource",
 	"scope",
 	"instrumentation_scope",
 }
 
-// contextInferrer is an interface used to infer the OTTL context from statements paths.
+// contextInferrer is an interface used to infer the OTTL context from statements.
 type contextInferrer interface {
-	// infer returns the OTTL context inferred from the given statements paths.
+	// infer returns the OTTL context inferred from the given statements.
 	infer(statements []string) (string, error)
 }
 
 type priorityContextInferrer struct {
-	contextPriority map[string]int
+	contextPriority  map[string]int
+	contextCandidate map[string]*priorityContextInferrerCandidate
+}
+
+type priorityContextInferrerCandidate struct {
+	hasEnumSymbol    func(enum *EnumSymbol) bool
+	hasFunctionName  func(name string) bool
+	getLowerContexts func(context string) []string
+}
+
+type priorityContextInferrerOption func(*priorityContextInferrer)
+
+// newPriorityContextInferrer creates a new priority-based context inferrer. To infer the context,
+// it uses a slice of priorities (withContextInferrerPriorities) and a set of hints extracted from
+// the parsed statements.
+//
+// To be eligible, a context must support all functions and enums symbols present on the statements.
+// If the path context with the highest priority does not meet this requirement, it falls back to its
+// lower contexts, testing them with the same logic and choosing the first one that meets all requirements.
+//
+// If non-prioritized contexts are found on the statements, they get assigned the lowest possible priority,
+// and are only selected if no other prioritized context is found.
+func newPriorityContextInferrer(contextsCandidate map[string]*priorityContextInferrerCandidate, options ...priorityContextInferrerOption) contextInferrer {
+	c := &priorityContextInferrer{
+		contextCandidate: contextsCandidate,
+	}
+	for _, opt := range options {
+		opt(c)
+	}
+	if len(c.contextPriority) == 0 {
+		withContextInferrerPriorities(defaultContextInferPriority)(c)
+	}
+	return c
+}
+
+// withContextInferrerPriorities sets the contexts candidates priorities. The lower the
+// context position is in the array, the more priority it will have over other items.
+func withContextInferrerPriorities(priorities []string) priorityContextInferrerOption {
+	return func(c *priorityContextInferrer) {
+		contextPriority := map[string]int{}
+		for pri, context := range priorities {
+			contextPriority[context] = pri
+		}
+		c.contextPriority = contextPriority
+	}
 }
 
 func (s *priorityContextInferrer) infer(statements []string) (string, error) {
+	requiredFunctions := map[string]struct{}{}
+	requiredEnums := map[enumSymbol]struct{}{}
+
 	var inferredContext string
 	var inferredContextPriority int
-
 	for _, statement := range statements {
 		parsed, err := parseStatement(statement)
 		if err != nil {
-			return inferredContext, err
+			return "", err
 		}
 
-		for _, p := range getParsedStatementPaths(parsed) {
-			pathContextPriority, ok := s.contextPriority[p.Context]
+		statementPaths, statementFunctions, statementEnums := s.getParsedStatementHints(parsed)
+		for _, p := range statementPaths {
+			candidate := p.Context
+			candidatePriority, ok := s.contextPriority[candidate]
 			if !ok {
-				// Lowest priority
-				pathContextPriority = math.MaxInt
+				candidatePriority = math.MaxInt
 			}
-
-			if inferredContext == "" || pathContextPriority < inferredContextPriority {
-				inferredContext = p.Context
-				inferredContextPriority = pathContextPriority
+			if inferredContext == "" || candidatePriority < inferredContextPriority {
+				inferredContext = candidate
+				inferredContextPriority = candidatePriority
 			}
 		}
+		for function := range statementFunctions {
+			requiredFunctions[function] = struct{}{}
+		}
+		for enum := range statementEnums {
+			requiredEnums[enum] = struct{}{}
+		}
+	}
+	// No inferred context or nothing left to verify.
+	if inferredContext == "" || (len(requiredFunctions) == 0 && len(requiredEnums) == 0) {
+		return inferredContext, nil
+	}
+	ok := s.validateContextCandidate(inferredContext, requiredFunctions, requiredEnums)
+	if ok {
+		return inferredContext, nil
+	}
+	return s.inferFromLowerContexts(inferredContext, requiredFunctions, requiredEnums), nil
+}
+
+// validateContextCandidate checks if the given context candidate has all required functions names
+// and enums symbols. The functions arity are not verified.
+func (s *priorityContextInferrer) validateContextCandidate(
+	context string,
+	requiredFunctions map[string]struct{},
+	requiredEnums map[enumSymbol]struct{},
+) bool {
+	candidate, ok := s.contextCandidate[context]
+	if !ok {
+		return false
+	}
+	if len(requiredFunctions) == 0 && len(requiredEnums) == 0 {
+		return true
+	}
+	for function := range requiredFunctions {
+		if !candidate.hasFunctionName(function) {
+			return false
+		}
+	}
+	for enum := range requiredEnums {
+		if !candidate.hasEnumSymbol((*EnumSymbol)(&enum)) {
+			return false
+		}
 	}
+	return true
+}
+
+// inferFromLowerContexts returns the first lower context that supports all required functions
+// and enum symbols used on the statements.
+// If no lower context meets the requirements, or if the context candidate is unknown, it
+// returns an empty string.
+func (s *priorityContextInferrer) inferFromLowerContexts(
+	context string,
+	requiredFunctions map[string]struct{},
+	requiredEnums map[enumSymbol]struct{},
+) string {
+	inferredContextCandidate, ok := s.contextCandidate[context]
+	if !ok {
+		return ""
+	}
+
+	lowerContextCandidates := inferredContextCandidate.getLowerContexts(context)
+	if len(lowerContextCandidates) == 0 {
+		return ""
+	}
+
+	s.sortContextCandidates(lowerContextCandidates)
+	for _, lowerCandidate := range lowerContextCandidates {
+		ok = s.validateContextCandidate(lowerCandidate, requiredFunctions, requiredEnums)
+		if ok {
+			return lowerCandidate
+		}
+	}
+	return ""
+}
+
+// sortContextCandidates sorts the slice candidates using the priorityContextInferrer.contextsPriority order.
+func (s *priorityContextInferrer) sortContextCandidates(candidates []string) {
+	slices.SortFunc(candidates, func(l, r string) int {
+		lp, ok := s.contextPriority[l]
+		if !ok {
+			lp = math.MaxInt
+		}
+		rp, ok := s.contextPriority[r]
+		if !ok {
+			rp = math.MaxInt
+		}
+		return cmp.Compare(lp, rp)
+	})
+}
 
-	return inferredContext, nil
+// getParsedStatementHints extracts all path, function names (editor and converter), and enumSymbol
+// from the given parsed statements. These values are used by the context inferrer as hints to
+// select a context in which the function/enum are supported.
+func (s *priorityContextInferrer) getParsedStatementHints(parsed *parsedStatement) ([]path, map[string]struct{}, map[enumSymbol]struct{}) {
+	visitor := newGrammarContextInferrerVisitor()
+	parsed.Editor.accept(&visitor)
+	if parsed.WhereClause != nil {
+		parsed.WhereClause.accept(&visitor)
+	}
+	return visitor.paths, visitor.functions, visitor.enumsSymbols
 }
 
-// defaultPriorityContextInferrer is like newPriorityContextInferrer, but using the default
-// context priorities and ignoring unknown/non-prioritized contexts.
-func defaultPriorityContextInferrer() contextInferrer {
-	return newPriorityContextInferrer(defaultContextInferPriority)
+// priorityContextInferrerHintsVisitor is a grammarVisitor implementation that collects
+// all path, function names (converter.Function and editor.Function), and enumSymbol.
+type priorityContextInferrerHintsVisitor struct {
+	paths        []path
+	functions    map[string]struct{}
+	enumsSymbols map[enumSymbol]struct{}
 }
 
-// newPriorityContextInferrer creates a new priority-based context inferrer.
-// To infer the context, it compares all [ottl.Path.Context] values, prioritizing them based
-// on the provide contextsPriority argument, the lower the context position is in the array,
-// the more priority it will have over other items.
-// If unknown/non-prioritized contexts are found on the statements, they can be either ignored
-// or considered when no other prioritized context is found. To skip unknown contexts, the
-// ignoreUnknownContext argument must be set to false.
-func newPriorityContextInferrer(contextsPriority []string) contextInferrer {
-	contextPriority := make(map[string]int, len(contextsPriority))
-	for i, ctx := range contextsPriority {
-		contextPriority[ctx] = i
+func newGrammarContextInferrerVisitor() priorityContextInferrerHintsVisitor {
+	return priorityContextInferrerHintsVisitor{
+		paths:        []path{},
+		functions:    make(map[string]struct{}),
+		enumsSymbols: make(map[enumSymbol]struct{}),
 	}
-	return &priorityContextInferrer{
-		contextPriority: contextPriority,
+}
+
+func (v *priorityContextInferrerHintsVisitor) visitMathExprLiteral(_ *mathExprLiteral) {}
+
+func (v *priorityContextInferrerHintsVisitor) visitEditor(e *editor) {
+	v.functions[e.Function] = struct{}{}
+}
+
+func (v *priorityContextInferrerHintsVisitor) visitConverter(c *converter) {
+	v.functions[c.Function] = struct{}{}
+}
+
+func (v *priorityContextInferrerHintsVisitor) visitValue(va *value) {
+	if va.Enum != nil {
+		v.enumsSymbols[*va.Enum] = struct{}{}
 	}
 }
+
+func (v *priorityContextInferrerHintsVisitor) visitPath(value *path) {
+	v.paths = append(v.paths, *value)
+}