From 554ebd647b2b6b12e1ea358831d97826efd605b3 Mon Sep 17 00:00:00 2001 From: Yuriy Tseretyan Date: Tue, 12 Jul 2022 16:51:32 -0400 Subject: [PATCH] Alerting: Refactor Evaluator (#51673) * AlertRule to return condition * update ConditionEval to not return an error because it's always nil * make getExprRequest private * refactor executeCondition to just converter and move execution to the ConditionEval as this makes code more readable. * log error if results have errors * change signature of evaluate function to not return an error --- pkg/services/ngalert/api/api_testing.go | 5 +- pkg/services/ngalert/api/api_testing_test.go | 4 +- pkg/services/ngalert/eval/eval.go | 48 ++++++++++---------- pkg/services/ngalert/eval/evaluator_mock.go | 23 ++++------ pkg/services/ngalert/models/alert_rule.go | 8 ++++ pkg/services/ngalert/schedule/schedule.go | 22 ++++----- 6 files changed, 51 insertions(+), 59 deletions(-) diff --git a/pkg/services/ngalert/api/api_testing.go b/pkg/services/ngalert/api/api_testing.go index 32fd5e4e1a92d..5ff65e3731c78 100644 --- a/pkg/services/ngalert/api/api_testing.go +++ b/pkg/services/ngalert/api/api_testing.go @@ -55,10 +55,7 @@ func (srv TestingApiSrv) RouteTestGrafanaRuleConfig(c *models.ReqContext, body a now = timeNow() } - evalResults, err := srv.evaluator.ConditionEval(&evalCond, now) - if err != nil { - return ErrResp(http.StatusBadRequest, err, "Failed to evaluate conditions") - } + evalResults := srv.evaluator.ConditionEval(evalCond, now) frame := evalResults.AsDataFrame() return response.JSONStreaming(http.StatusOK, util.DynMap{ diff --git a/pkg/services/ngalert/api/api_testing_test.go b/pkg/services/ngalert/api/api_testing_test.go index ecebe2eedab12..f2f33984bcbb5 100644 --- a/pkg/services/ngalert/api/api_testing_test.go +++ b/pkg/services/ngalert/api/api_testing_test.go @@ -69,7 +69,7 @@ func TestRouteTestGrafanaRuleConfig(t *testing.T) { evaluator := &eval.FakeEvaluator{} var result []eval.Result - evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result, nil) + evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result) srv := createTestingApiSrv(ds, ac, evaluator) @@ -109,7 +109,7 @@ func TestRouteTestGrafanaRuleConfig(t *testing.T) { evaluator := &eval.FakeEvaluator{} var result []eval.Result - evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result, nil) + evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result) srv := createTestingApiSrv(ds, ac, evaluator) diff --git a/pkg/services/ngalert/eval/eval.go b/pkg/services/ngalert/eval/eval.go index c98abf8ff09c9..c5c942a207a7c 100644 --- a/pkg/services/ngalert/eval/eval.go +++ b/pkg/services/ngalert/eval/eval.go @@ -28,7 +28,7 @@ import ( //go:generate mockery --name Evaluator --structname FakeEvaluator --inpackage --filename evaluator_mock.go --with-expecter type Evaluator interface { // ConditionEval executes conditions and evaluates the result. - ConditionEval(condition *models.Condition, now time.Time) (Results, error) + ConditionEval(condition models.Condition, now time.Time) Results // QueriesAndExpressionsEval executes queries and expressions and returns the result. QueriesAndExpressionsEval(orgID int64, data []models.AlertQuery, now time.Time) (*backend.QueryDataResponse, error) } @@ -89,6 +89,15 @@ type ExecutionResults struct { // Results is a slice of evaluated alert instances states. type Results []Result +func (evalResults Results) HasErrors() bool { + for _, r := range evalResults { + if r.State == Error { + return true + } + } + return false +} + // Result contains the evaluated State of an alert instance // identified by its labels. type Result struct { @@ -153,8 +162,8 @@ type AlertExecCtx struct { Ctx context.Context } -// GetExprRequest validates the condition, gets the datasource information and creates an expr.Request from it. -func GetExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, dsCacheService datasources.CacheService, secretsService secrets.Service) (*expr.Request, error) { +// getExprRequest validates the condition, gets the datasource information and creates an expr.Request from it. +func getExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, dsCacheService datasources.CacheService, secretsService secrets.Service) (*expr.Request, error) { req := &expr.Request{ OrgId: ctx.OrgID, Headers: map[string]string{ @@ -166,8 +175,7 @@ func GetExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, d datasources := make(map[string]*datasources.DataSource, len(data)) - for i := range data { - q := data[i] + for _, q := range data { model, err := q.GetModel() if err != nil { return nil, fmt.Errorf("failed to get query model: %w", err) @@ -259,12 +267,7 @@ type NumberValueCapture struct { Value *float64 } -func executeCondition(ctx AlertExecCtx, c *models.Condition, now time.Time, exprService *expr.Service, dsCacheService datasources.CacheService, secretsService secrets.Service) ExecutionResults { - execResp, err := executeQueriesAndExpressions(ctx, c.Data, now, exprService, dsCacheService, secretsService) - if err != nil { - return ExecutionResults{Error: err} - } - +func queryDataResponseToExecutionResults(c models.Condition, execResp *backend.QueryDataResponse) ExecutionResults { // eval captures for the '__value_string__' annotation and the Value property of the API response. captures := make([]NumberValueCapture, 0, len(execResp.Responses)) captureVal := func(refID string, labels data.Labels, value *float64) { @@ -356,7 +359,7 @@ func executeQueriesAndExpressions(ctx AlertExecCtx, data []models.AlertQuery, no } }() - queryDataReq, err := GetExprRequest(ctx, data, now, dsCacheService, secretsService) + queryDataReq, err := getExprRequest(ctx, data, now, dsCacheService, secretsService) if err != nil { return nil, err } @@ -564,8 +567,6 @@ func (evalResults Results) AsDataFrame() data.Frame { labelColumns = append(labelColumns, k) } - labelColumns = sort.StringSlice(labelColumns) - frame := data.NewFrame("evaluation results") for _, lKey := range labelColumns { frame.Fields = append(frame.Fields, data.NewField(lKey, nil, make([]string, fieldLen))) @@ -591,16 +592,15 @@ func (evalResults Results) AsDataFrame() data.Frame { } // ConditionEval executes conditions and evaluates the result. -func (e *evaluatorImpl) ConditionEval(condition *models.Condition, now time.Time) (Results, error) { - alertCtx, cancelFn := context.WithTimeout(context.Background(), e.cfg.UnifiedAlerting.EvaluationTimeout) - defer cancelFn() - - alertExecCtx := AlertExecCtx{OrgID: condition.OrgID, Ctx: alertCtx, ExpressionsEnabled: e.cfg.ExpressionsEnabled, Log: e.log} - - execResult := executeCondition(alertExecCtx, condition, now, e.expressionService, e.dataSourceCache, e.secretsService) - - evalResults := evaluateExecutionResult(execResult, now) - return evalResults, nil +func (e *evaluatorImpl) ConditionEval(condition models.Condition, now time.Time) Results { + execResp, err := e.QueriesAndExpressionsEval(condition.OrgID, condition.Data, now) + var execResults ExecutionResults + if err != nil { + execResults = ExecutionResults{Error: err} + } else { + execResults = queryDataResponseToExecutionResults(condition, execResp) + } + return evaluateExecutionResult(execResults, now) } // QueriesAndExpressionsEval executes queries and expressions and returns the result. diff --git a/pkg/services/ngalert/eval/evaluator_mock.go b/pkg/services/ngalert/eval/evaluator_mock.go index ed3f46b7f2a4a..94e6064d11b07 100644 --- a/pkg/services/ngalert/eval/evaluator_mock.go +++ b/pkg/services/ngalert/eval/evaluator_mock.go @@ -25,11 +25,11 @@ func (_m *FakeEvaluator) EXPECT() *FakeEvaluator_Expecter { } // ConditionEval provides a mock function with given fields: condition, now -func (_m *FakeEvaluator) ConditionEval(condition *models.Condition, now time.Time) (Results, error) { +func (_m *FakeEvaluator) ConditionEval(condition models.Condition, now time.Time) Results { ret := _m.Called(condition, now) var r0 Results - if rf, ok := ret.Get(0).(func(*models.Condition, time.Time) Results); ok { + if rf, ok := ret.Get(0).(func(models.Condition, time.Time) Results); ok { r0 = rf(condition, now) } else { if ret.Get(0) != nil { @@ -37,14 +37,7 @@ func (_m *FakeEvaluator) ConditionEval(condition *models.Condition, now time.Tim } } - var r1 error - if rf, ok := ret.Get(1).(func(*models.Condition, time.Time) error); ok { - r1 = rf(condition, now) - } else { - r1 = ret.Error(1) - } - - return r0, r1 + return r0 } // FakeEvaluator_ConditionEval_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ConditionEval' @@ -53,21 +46,21 @@ type FakeEvaluator_ConditionEval_Call struct { } // ConditionEval is a helper method to define mock.On call -// - condition *models.Condition +// - condition models.Condition // - now time.Time func (_e *FakeEvaluator_Expecter) ConditionEval(condition interface{}, now interface{}) *FakeEvaluator_ConditionEval_Call { return &FakeEvaluator_ConditionEval_Call{Call: _e.mock.On("ConditionEval", condition, now)} } -func (_c *FakeEvaluator_ConditionEval_Call) Run(run func(condition *models.Condition, now time.Time)) *FakeEvaluator_ConditionEval_Call { +func (_c *FakeEvaluator_ConditionEval_Call) Run(run func(condition models.Condition, now time.Time)) *FakeEvaluator_ConditionEval_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*models.Condition), args[1].(time.Time)) + run(args[0].(models.Condition), args[1].(time.Time)) }) return _c } -func (_c *FakeEvaluator_ConditionEval_Call) Return(_a0 Results, _a1 error) *FakeEvaluator_ConditionEval_Call { - _c.Call.Return(_a0, _a1) +func (_c *FakeEvaluator_ConditionEval_Call) Return(_a0 Results) *FakeEvaluator_ConditionEval_Call { + _c.Call.Return(_a0) return _c } diff --git a/pkg/services/ngalert/models/alert_rule.go b/pkg/services/ngalert/models/alert_rule.go index 92d02b1a0e69d..0bbf8359e09a8 100644 --- a/pkg/services/ngalert/models/alert_rule.go +++ b/pkg/services/ngalert/models/alert_rule.go @@ -170,6 +170,14 @@ func (alertRule *AlertRule) GetLabels(opts ...LabelOption) map[string]string { return labels } +func (alertRule *AlertRule) GetEvalCondition() Condition { + return Condition{ + Condition: alertRule.Condition, + OrgID: alertRule.OrgID, + Data: alertRule.Data, + } +} + // Diff calculates diff between two alert rules. Returns nil if two rules are equal. Otherwise, returns cmputil.DiffReport func (alertRule *AlertRule) Diff(rule *AlertRule, ignore ...string) cmputil.DiffReport { var reporter cmputil.DiffReporter diff --git a/pkg/services/ngalert/schedule/schedule.go b/pkg/services/ngalert/schedule/schedule.go index 20ec97aef0bbc..8d4fbf9874384 100644 --- a/pkg/services/ngalert/schedule/schedule.go +++ b/pkg/services/ngalert/schedule/schedule.go @@ -388,32 +388,25 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR return q.Result, nil } - evaluate := func(ctx context.Context, r *ngmodels.AlertRule, attempt int64, e *evaluation) error { + evaluate := func(ctx context.Context, r *ngmodels.AlertRule, attempt int64, e *evaluation) { logger := logger.New("version", r.Version, "attempt", attempt, "now", e.scheduledAt) start := sch.clock.Now() - condition := ngmodels.Condition{ - Condition: r.Condition, - OrgID: r.OrgID, - Data: r.Data, - } - results, err := sch.evaluator.ConditionEval(&condition, e.scheduledAt) + results := sch.evaluator.ConditionEval(r.GetEvalCondition(), e.scheduledAt) dur := sch.clock.Now().Sub(start) evalTotal.Inc() evalDuration.Observe(dur.Seconds()) - if err != nil { + if results.HasErrors() { evalTotalFailures.Inc() - // consider saving alert instance on error - logger.Error("failed to evaluate alert rule", "duration", dur, "err", err) - return err + logger.Error("failed to evaluate alert rule", "results", results, "duration", dur) + } else { + logger.Debug("alert rule evaluated", "results", results, "duration", dur) } - logger.Debug("alert rule evaluated", "results", results, "duration", dur) processedStates := sch.stateManager.ProcessEvalResults(ctx, e.scheduledAt, r, results) sch.saveAlertStates(ctx, processedStates) alerts := FromAlertStateToPostableAlerts(processedStates, sch.stateManager, sch.appURL) sch.alertsSender.Send(key, alerts) - return nil } retryIfError := func(f func(attempt int64) error) error { @@ -475,7 +468,8 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR currentRule = newRule logger.Debug("new alert rule version fetched", "title", newRule.Title, "version", newRule.Version) } - return evaluate(grafanaCtx, currentRule, attempt, ctx) + evaluate(grafanaCtx, currentRule, attempt, ctx) + return nil }) if err != nil { logger.Error("evaluation failed after all retries", "err", err)