Skip to content

Commit

Permalink
Alerting: Refactor Evaluator (grafana#51673)
Browse files Browse the repository at this point in the history
* AlertRule to return condition
* update ConditionEval to not return an error because it's always nil
* make getExprRequest private
* refactor executeCondition to just converter and move execution to the ConditionEval as this makes code more readable.
* log error if results have errors
* change signature of evaluate function to not return an error
  • Loading branch information
yuri-tceretian authored Jul 12, 2022
1 parent 2d8a91a commit 554ebd6
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 59 deletions.
5 changes: 1 addition & 4 deletions pkg/services/ngalert/api/api_testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,7 @@ func (srv TestingApiSrv) RouteTestGrafanaRuleConfig(c *models.ReqContext, body a
now = timeNow()
}

evalResults, err := srv.evaluator.ConditionEval(&evalCond, now)
if err != nil {
return ErrResp(http.StatusBadRequest, err, "Failed to evaluate conditions")
}
evalResults := srv.evaluator.ConditionEval(evalCond, now)

frame := evalResults.AsDataFrame()
return response.JSONStreaming(http.StatusOK, util.DynMap{
Expand Down
4 changes: 2 additions & 2 deletions pkg/services/ngalert/api/api_testing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func TestRouteTestGrafanaRuleConfig(t *testing.T) {

evaluator := &eval.FakeEvaluator{}
var result []eval.Result
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result, nil)
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result)

srv := createTestingApiSrv(ds, ac, evaluator)

Expand Down Expand Up @@ -109,7 +109,7 @@ func TestRouteTestGrafanaRuleConfig(t *testing.T) {

evaluator := &eval.FakeEvaluator{}
var result []eval.Result
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result, nil)
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result)

srv := createTestingApiSrv(ds, ac, evaluator)

Expand Down
48 changes: 24 additions & 24 deletions pkg/services/ngalert/eval/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import (
//go:generate mockery --name Evaluator --structname FakeEvaluator --inpackage --filename evaluator_mock.go --with-expecter
type Evaluator interface {
// ConditionEval executes conditions and evaluates the result.
ConditionEval(condition *models.Condition, now time.Time) (Results, error)
ConditionEval(condition models.Condition, now time.Time) Results
// QueriesAndExpressionsEval executes queries and expressions and returns the result.
QueriesAndExpressionsEval(orgID int64, data []models.AlertQuery, now time.Time) (*backend.QueryDataResponse, error)
}
Expand Down Expand Up @@ -89,6 +89,15 @@ type ExecutionResults struct {
// Results is a slice of evaluated alert instances states.
type Results []Result

func (evalResults Results) HasErrors() bool {
for _, r := range evalResults {
if r.State == Error {
return true
}
}
return false
}

// Result contains the evaluated State of an alert instance
// identified by its labels.
type Result struct {
Expand Down Expand Up @@ -153,8 +162,8 @@ type AlertExecCtx struct {
Ctx context.Context
}

// GetExprRequest validates the condition, gets the datasource information and creates an expr.Request from it.
func GetExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, dsCacheService datasources.CacheService, secretsService secrets.Service) (*expr.Request, error) {
// getExprRequest validates the condition, gets the datasource information and creates an expr.Request from it.
func getExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, dsCacheService datasources.CacheService, secretsService secrets.Service) (*expr.Request, error) {
req := &expr.Request{
OrgId: ctx.OrgID,
Headers: map[string]string{
Expand All @@ -166,8 +175,7 @@ func GetExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, d

datasources := make(map[string]*datasources.DataSource, len(data))

for i := range data {
q := data[i]
for _, q := range data {
model, err := q.GetModel()
if err != nil {
return nil, fmt.Errorf("failed to get query model: %w", err)
Expand Down Expand Up @@ -259,12 +267,7 @@ type NumberValueCapture struct {
Value *float64
}

func executeCondition(ctx AlertExecCtx, c *models.Condition, now time.Time, exprService *expr.Service, dsCacheService datasources.CacheService, secretsService secrets.Service) ExecutionResults {
execResp, err := executeQueriesAndExpressions(ctx, c.Data, now, exprService, dsCacheService, secretsService)
if err != nil {
return ExecutionResults{Error: err}
}

func queryDataResponseToExecutionResults(c models.Condition, execResp *backend.QueryDataResponse) ExecutionResults {
// eval captures for the '__value_string__' annotation and the Value property of the API response.
captures := make([]NumberValueCapture, 0, len(execResp.Responses))
captureVal := func(refID string, labels data.Labels, value *float64) {
Expand Down Expand Up @@ -356,7 +359,7 @@ func executeQueriesAndExpressions(ctx AlertExecCtx, data []models.AlertQuery, no
}
}()

queryDataReq, err := GetExprRequest(ctx, data, now, dsCacheService, secretsService)
queryDataReq, err := getExprRequest(ctx, data, now, dsCacheService, secretsService)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -564,8 +567,6 @@ func (evalResults Results) AsDataFrame() data.Frame {
labelColumns = append(labelColumns, k)
}

labelColumns = sort.StringSlice(labelColumns)

frame := data.NewFrame("evaluation results")
for _, lKey := range labelColumns {
frame.Fields = append(frame.Fields, data.NewField(lKey, nil, make([]string, fieldLen)))
Expand All @@ -591,16 +592,15 @@ func (evalResults Results) AsDataFrame() data.Frame {
}

// ConditionEval executes conditions and evaluates the result.
func (e *evaluatorImpl) ConditionEval(condition *models.Condition, now time.Time) (Results, error) {
alertCtx, cancelFn := context.WithTimeout(context.Background(), e.cfg.UnifiedAlerting.EvaluationTimeout)
defer cancelFn()

alertExecCtx := AlertExecCtx{OrgID: condition.OrgID, Ctx: alertCtx, ExpressionsEnabled: e.cfg.ExpressionsEnabled, Log: e.log}

execResult := executeCondition(alertExecCtx, condition, now, e.expressionService, e.dataSourceCache, e.secretsService)

evalResults := evaluateExecutionResult(execResult, now)
return evalResults, nil
func (e *evaluatorImpl) ConditionEval(condition models.Condition, now time.Time) Results {
execResp, err := e.QueriesAndExpressionsEval(condition.OrgID, condition.Data, now)
var execResults ExecutionResults
if err != nil {
execResults = ExecutionResults{Error: err}
} else {
execResults = queryDataResponseToExecutionResults(condition, execResp)
}
return evaluateExecutionResult(execResults, now)
}

// QueriesAndExpressionsEval executes queries and expressions and returns the result.
Expand Down
23 changes: 8 additions & 15 deletions pkg/services/ngalert/eval/evaluator_mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions pkg/services/ngalert/models/alert_rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,14 @@ func (alertRule *AlertRule) GetLabels(opts ...LabelOption) map[string]string {
return labels
}

func (alertRule *AlertRule) GetEvalCondition() Condition {
return Condition{
Condition: alertRule.Condition,
OrgID: alertRule.OrgID,
Data: alertRule.Data,
}
}

// Diff calculates diff between two alert rules. Returns nil if two rules are equal. Otherwise, returns cmputil.DiffReport
func (alertRule *AlertRule) Diff(rule *AlertRule, ignore ...string) cmputil.DiffReport {
var reporter cmputil.DiffReporter
Expand Down
22 changes: 8 additions & 14 deletions pkg/services/ngalert/schedule/schedule.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,32 +388,25 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
return q.Result, nil
}

evaluate := func(ctx context.Context, r *ngmodels.AlertRule, attempt int64, e *evaluation) error {
evaluate := func(ctx context.Context, r *ngmodels.AlertRule, attempt int64, e *evaluation) {
logger := logger.New("version", r.Version, "attempt", attempt, "now", e.scheduledAt)
start := sch.clock.Now()

condition := ngmodels.Condition{
Condition: r.Condition,
OrgID: r.OrgID,
Data: r.Data,
}
results, err := sch.evaluator.ConditionEval(&condition, e.scheduledAt)
results := sch.evaluator.ConditionEval(r.GetEvalCondition(), e.scheduledAt)
dur := sch.clock.Now().Sub(start)
evalTotal.Inc()
evalDuration.Observe(dur.Seconds())
if err != nil {
if results.HasErrors() {
evalTotalFailures.Inc()
// consider saving alert instance on error
logger.Error("failed to evaluate alert rule", "duration", dur, "err", err)
return err
logger.Error("failed to evaluate alert rule", "results", results, "duration", dur)
} else {
logger.Debug("alert rule evaluated", "results", results, "duration", dur)
}
logger.Debug("alert rule evaluated", "results", results, "duration", dur)

processedStates := sch.stateManager.ProcessEvalResults(ctx, e.scheduledAt, r, results)
sch.saveAlertStates(ctx, processedStates)
alerts := FromAlertStateToPostableAlerts(processedStates, sch.stateManager, sch.appURL)
sch.alertsSender.Send(key, alerts)
return nil
}

retryIfError := func(f func(attempt int64) error) error {
Expand Down Expand Up @@ -475,7 +468,8 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
currentRule = newRule
logger.Debug("new alert rule version fetched", "title", newRule.Title, "version", newRule.Version)
}
return evaluate(grafanaCtx, currentRule, attempt, ctx)
evaluate(grafanaCtx, currentRule, attempt, ctx)
return nil
})
if err != nil {
logger.Error("evaluation failed after all retries", "err", err)
Expand Down

0 comments on commit 554ebd6

Please sign in to comment.