diff --git a/book/src/libs/wasp/benchspy/real_world.md b/book/src/libs/wasp/benchspy/real_world.md index ae6af9d62..1d75e9160 100644 --- a/book/src/libs/wasp/benchspy/real_world.md +++ b/book/src/libs/wasp/benchspy/real_world.md @@ -87,15 +87,17 @@ This function fetches the current report (for version passed as environment vari Let’s assume you want to ensure that none of the performance metrics degrade by more than **1%** between releases (and that error rate has not changed at all). Here's how you can write assertions using a convenient function for the `Direct` query executor: ```go -hasErrors, errors := benchspy.CompareDirectWithThresholds( +hasFailed, error := benchspy.CompareDirectWithThresholds( 1.0, // Max 1% worse median latency 1.0, // Max 1% worse p95 latency 1.0, // Max 1% worse maximum latency 0.0, // No increase in error rate currentReport, previousReport) -require.False(t, hasErrors, fmt.Sprintf("errors found: %v", errors)) +require.False(t, hasError, fmt.Sprintf("issues found: %v", error)) ``` +Error returned by this function is a concatenation of all threshold violations found for each standard metric and generator. + --- ## Conclusion diff --git a/wasp/benchspy/report.go b/wasp/benchspy/report.go index ec6b8f1f9..d2ba0054b 100644 --- a/wasp/benchspy/report.go +++ b/wasp/benchspy/report.go @@ -3,6 +3,7 @@ package benchspy import ( "context" "encoding/json" + goerrors "errors" "fmt" "os" "strings" @@ -150,21 +151,27 @@ func MustAllPrometheusResults(sr *StandardReport) map[string]model.Value { } func calculateDiffPercentage(current, previous float64) float64 { - var diffPrecentage float64 - if previous != 0.0 && current != 0.0 { - diffPrecentage = (current - previous) / previous * 100 - } else if previous == 0.0 && current == 0.0 { - diffPrecentage = 0.0 - } else { - diffPrecentage = 100.0 + if previous == 0.0 { + if current == 0.0 { + return 0.0 + } + return 999.0 // Convention for infinite change when previous is 0 + } + + if current == 0.0 { + return -100.0 // Complete improvement when current is 0 } - return diffPrecentage + return (current - previous) / previous * 100 } // CompareDirectWithThresholds evaluates the current and previous reports against specified thresholds. // It checks for significant differences in metrics and returns any discrepancies found, aiding in performance analysis. -func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64, currentReport, previousReport *StandardReport) (bool, map[string][]error) { +func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64, currentReport, previousReport *StandardReport) (bool, error) { + if currentReport == nil || previousReport == nil { + return true, errors.New("one or both reports are nil") + } + L.Info(). Str("Current report", currentReport.CommitOrTag). Str("Previous report", previousReport.CommitOrTag). @@ -174,6 +181,10 @@ func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, er Float64("Error rate threshold", errorRateThreshold). Msg("Comparing Direct metrics with thresholds") + if thresholdsErr := validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold); thresholdsErr != nil { + return true, thresholdsErr + } + allCurrentResults := MustAllDirectResults(currentReport) allPreviousResults := MustAllDirectResults(previousReport) @@ -239,7 +250,46 @@ func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, er Int("Number of meaningful differences", len(errors)). Msg("Finished comparing Direct metrics with thresholds") - return len(errors) > 0, errors + return len(errors) > 0, concatenateGeneratorErrors(errors) +} + +func concatenateGeneratorErrors(errors map[string][]error) error { + var errs []error + for generatorName, errors := range errors { + for _, err := range errors { + errs = append(errs, fmt.Errorf("[%s] %w", generatorName, err)) + } + } + return goerrors.Join(errs...) +} + +func validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64) error { + var errs []error + + var validateThreshold = func(name string, threshold float64) error { + if threshold < 0 || threshold > 100 { + return fmt.Errorf("%s threshold %.4f is not in the range [0, 100]", name, threshold) + } + return nil + } + + if err := validateThreshold("median", medianThreshold); err != nil { + errs = append(errs, err) + } + + if err := validateThreshold("p95", p95Threshold); err != nil { + errs = append(errs, err) + } + + if err := validateThreshold("max", maxThreshold); err != nil { + errs = append(errs, err) + } + + if err := validateThreshold("error rate", errorRateThreshold); err != nil { + errs = append(errs, err) + } + + return goerrors.Join(errs...) } // PrintStandardDirectMetrics outputs a comparison of direct metrics between two reports. diff --git a/wasp/benchspy/report_test.go b/wasp/benchspy/report_test.go index 7e07c5216..6e1eed711 100644 --- a/wasp/benchspy/report_test.go +++ b/wasp/benchspy/report_test.go @@ -1384,13 +1384,10 @@ func TestBenchSpy_CompareDirectWithThresholds(t *testing.T) { }, } - failed, errs := CompareDirectWithThresholds(10.0, 1.0, 1.0, 1.0, currentReport, previousReport) + failed, err := CompareDirectWithThresholds(10.0, 1.0, 1.0, 1.0, currentReport, previousReport) assert.True(t, failed) - assert.Len(t, errs, 1) - assert.Len(t, errs["test-gen"], 1) - for _, err := range errs["test-gen"] { - assert.Contains(t, err.Error(), "different, which is higher than the threshold") - } + assert.NotNil(t, err) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s is 50.0000%% different, which is higher than the threshold", string(MedianLatency))) }) t.Run("all metrics exceed thresholds", func(t *testing.T) { @@ -1442,13 +1439,13 @@ func TestBenchSpy_CompareDirectWithThresholds(t *testing.T) { }, } - failed, errs := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, currentReport, previousReport) + failed, err := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, currentReport, previousReport) assert.True(t, failed) - assert.Len(t, errs, 1) - assert.Len(t, errs["test-gen"], 4) - for _, err := range errs["test-gen"] { - assert.Contains(t, err.Error(), "different, which is higher than the threshold") - } + assert.NotNil(t, err) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s is 50.0000%% different, which is higher than the threshold", string(MedianLatency))) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s is 50.0000%% different, which is higher than the threshold", string(Percentile95Latency))) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s is 50.0000%% different, which is higher than the threshold", string(MaxLatency))) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s is 100.0000%% different, which is higher than the threshold", string(ErrorRate))) }) t.Run("handle zero values", func(t *testing.T) { @@ -1500,12 +1497,67 @@ func TestBenchSpy_CompareDirectWithThresholds(t *testing.T) { }, } - failed, errs := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, currentReport, previousReport) + failed, err := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, currentReport, previousReport) assert.False(t, failed) - assert.Empty(t, errs) + assert.Nil(t, err) }) - t.Run("handle missing metrics", func(t *testing.T) { + t.Run("handle missing metrics from current report", func(t *testing.T) { + previousReport := &StandardReport{ + BasicData: BasicData{ + GeneratorConfigs: map[string]*wasp.Config{ + "test-gen": { + GenName: "test-gen", + }, + }, + }, + QueryExecutors: []QueryExecutor{ + &MockQueryExecutor{ + KindFn: func() string { return string(StandardQueryExecutor_Direct) }, + ResultsFn: func() map[string]interface{} { + return map[string]interface{}{ + string(MedianLatency): 100.0, + string(Percentile95Latency): 0.0, + string(MaxLatency): 0.0, + string(ErrorRate): 0.0, + } + }, + GeneratorNameFn: func() string { return "test-gen" }, + }, + }, + } + + currentReport := &StandardReport{ + BasicData: BasicData{ + GeneratorConfigs: map[string]*wasp.Config{ + "test-gen": { + GenName: "test-gen", + }, + }, + }, + QueryExecutors: []QueryExecutor{ + &MockQueryExecutor{ + KindFn: func() string { return string(StandardQueryExecutor_Direct) }, + ResultsFn: func() map[string]interface{} { + return map[string]interface{}{ + string(MedianLatency): 105.0, + // missing other metrics + } + }, + GeneratorNameFn: func() string { return "test-gen" }, + }, + }, + } + + failed, err := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, currentReport, previousReport) + assert.True(t, failed) + assert.NotNil(t, err) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s metric results were missing from current report", string(Percentile95Latency))) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s metric results were missing from current report", string(MaxLatency))) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s metric results were missing from current report", string(ErrorRate))) + }) + + t.Run("handle missing metrics from previous report", func(t *testing.T) { previousReport := &StandardReport{ BasicData: BasicData{ GeneratorConfigs: map[string]*wasp.Config{ @@ -1528,6 +1580,63 @@ func TestBenchSpy_CompareDirectWithThresholds(t *testing.T) { }, } + currentReport := &StandardReport{ + BasicData: BasicData{ + GeneratorConfigs: map[string]*wasp.Config{ + "test-gen": { + GenName: "test-gen", + }, + }, + }, + QueryExecutors: []QueryExecutor{ + &MockQueryExecutor{ + KindFn: func() string { return string(StandardQueryExecutor_Direct) }, + ResultsFn: func() map[string]interface{} { + return map[string]interface{}{ + string(MedianLatency): 105.0, + string(Percentile95Latency): 0.0, + string(MaxLatency): 0.0, + string(ErrorRate): 0.0, + } + }, + GeneratorNameFn: func() string { return "test-gen" }, + }, + }, + } + + failed, err := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, currentReport, previousReport) + assert.True(t, failed) + assert.NotNil(t, err) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s metric results were missing from previous report", string(Percentile95Latency))) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s metric results were missing from previous report", string(MaxLatency))) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s metric results were missing from previous report", string(ErrorRate))) + }) + + t.Run("handle missing metrics from both reports", func(t *testing.T) { + previousReport := &StandardReport{ + BasicData: BasicData{ + GeneratorConfigs: map[string]*wasp.Config{ + "test-gen": { + GenName: "test-gen", + }, + }, + }, + QueryExecutors: []QueryExecutor{ + &MockQueryExecutor{ + KindFn: func() string { return string(StandardQueryExecutor_Direct) }, + ResultsFn: func() map[string]interface{} { + return map[string]interface{}{ + string(MedianLatency): 100.0, + string(Percentile95Latency): 0.0, + string(MaxLatency): 0.0, + string(ErrorRate): 0.0, + } + }, + GeneratorNameFn: func() string { return "test-gen" }, + }, + }, + } + currentReport := &StandardReport{ BasicData: BasicData{ GeneratorConfigs: map[string]*wasp.Config{ @@ -1550,13 +1659,12 @@ func TestBenchSpy_CompareDirectWithThresholds(t *testing.T) { }, } - failed, errs := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, currentReport, previousReport) + failed, err := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, currentReport, previousReport) assert.True(t, failed) - assert.Len(t, errs, 1) - assert.Len(t, errs["test-gen"], 3) // Should have errors for missing P95, Max, and Error Rate - for _, err := range errs["test-gen"] { - assert.Contains(t, err.Error(), "results were missing") - } + assert.NotNil(t, err) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s metric results were missing from current report", string(Percentile95Latency))) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s metric results were missing from current report", string(MaxLatency))) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s metric results were missing from current report", string(ErrorRate))) }) t.Run("handle zero to non-zero transition", func(t *testing.T) { @@ -1608,13 +1716,207 @@ func TestBenchSpy_CompareDirectWithThresholds(t *testing.T) { }, } - failed, errs := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, currentReport, previousReport) + failed, err := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, currentReport, previousReport) assert.True(t, failed) - assert.Len(t, errs, 1) - assert.Len(t, errs["test-gen"], 4) - for _, err := range errs["test-gen"] { - assert.Contains(t, err.Error(), "100.0000% different") + assert.NotNil(t, err) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s is 999.0000%% different, which is higher than the threshold", string(MedianLatency))) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s is 999.0000%% different, which is higher than the threshold", string(Percentile95Latency))) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s is 999.0000%% different, which is higher than the threshold", string(MaxLatency))) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s is 999.0000%% different, which is higher than the threshold", string(ErrorRate))) + }) + + t.Run("handle non-zero to zero transition", func(t *testing.T) { + previousReport := &StandardReport{ + BasicData: BasicData{ + GeneratorConfigs: map[string]*wasp.Config{ + "test-gen": { + GenName: "test-gen", + }, + }, + }, + QueryExecutors: []QueryExecutor{ + &MockQueryExecutor{ + KindFn: func() string { return string(StandardQueryExecutor_Direct) }, + ResultsFn: func() map[string]interface{} { + return map[string]interface{}{ + string(MedianLatency): 10.0, + string(Percentile95Latency): 20.0, + string(MaxLatency): 311.0, + string(ErrorRate): 1.0, + } + }, + GeneratorNameFn: func() string { return "test-gen" }, + }, + }, } + + currentReport := &StandardReport{ + BasicData: BasicData{ + GeneratorConfigs: map[string]*wasp.Config{ + "test-gen": { + GenName: "test-gen", + }, + }, + }, + QueryExecutors: []QueryExecutor{ + &MockQueryExecutor{ + KindFn: func() string { return string(StandardQueryExecutor_Direct) }, + ResultsFn: func() map[string]interface{} { + return map[string]interface{}{ + string(MedianLatency): 0.0, + string(Percentile95Latency): 0.0, + string(MaxLatency): 0.0, + string(ErrorRate): 0.0, + } + }, + GeneratorNameFn: func() string { return "test-gen" }, + }, + }, + } + + failed, err := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, currentReport, previousReport) + assert.False(t, failed) + assert.Nil(t, err) + }) + + t.Run("handle edge-cases", func(t *testing.T) { + previousReport := &StandardReport{ + BasicData: BasicData{ + GeneratorConfigs: map[string]*wasp.Config{ + "test-gen": { + GenName: "test-gen", + }, + }, + }, + QueryExecutors: []QueryExecutor{ + &MockQueryExecutor{ + KindFn: func() string { return string(StandardQueryExecutor_Direct) }, + ResultsFn: func() map[string]interface{} { + return map[string]interface{}{ + string(MedianLatency): 10.1, + string(Percentile95Latency): 10.1, + string(MaxLatency): 10.0, + string(ErrorRate): 10.0, + } + }, + GeneratorNameFn: func() string { return "test-gen" }, + }, + }, + } + + currentReport := &StandardReport{ + BasicData: BasicData{ + GeneratorConfigs: map[string]*wasp.Config{ + "test-gen": { + GenName: "test-gen", + }, + }, + }, + QueryExecutors: []QueryExecutor{ + &MockQueryExecutor{ + KindFn: func() string { return string(StandardQueryExecutor_Direct) }, + ResultsFn: func() map[string]interface{} { + return map[string]interface{}{ + string(MedianLatency): 10.2, + string(Percentile95Latency): 10.1999, + string(MaxLatency): 0.0, + string(ErrorRate): 0.0, + } + }, + GeneratorNameFn: func() string { return "test-gen" }, + }, + }, + } + + failed, err := CompareDirectWithThresholds(0.99, 0.9892, 10.0, 10.0, currentReport, previousReport) + assert.True(t, failed) + assert.NotNil(t, err) + assert.Contains(t, err.Error(), fmt.Sprintf("[test-gen] %s is 0.9901%% different, which is higher than the threshold", string(MedianLatency))) + }) + + t.Run("handle nil reports", func(t *testing.T) { + report := &StandardReport{ + BasicData: BasicData{ + GeneratorConfigs: map[string]*wasp.Config{ + "test-gen": { + GenName: "test-gen", + }, + }, + }, + QueryExecutors: []QueryExecutor{ + &MockQueryExecutor{ + KindFn: func() string { return string(StandardQueryExecutor_Direct) }, + ResultsFn: func() map[string]interface{} { + return map[string]interface{}{ + string(MedianLatency): 10.2, + string(Percentile95Latency): 10.1999, + string(MaxLatency): 0.0, + string(ErrorRate): 0.0, + } + }, + GeneratorNameFn: func() string { return "test-gen" }, + }, + }, + } + + failed, err := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, report, nil) + assert.True(t, failed) + assert.NotNil(t, err) + assert.Contains(t, err.Error(), "one or both reports are nil") + + failed, err = CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, nil, report) + assert.True(t, failed) + assert.NotNil(t, err) + assert.Contains(t, err.Error(), "one or both reports are nil") + + failed, err = CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, nil, nil) + assert.True(t, failed) + assert.NotNil(t, err) + assert.Contains(t, err.Error(), "one or both reports are nil") + }) + + t.Run("handle incorrect thresholds", func(t *testing.T) { + report := &StandardReport{ + BasicData: BasicData{ + GeneratorConfigs: map[string]*wasp.Config{ + "test-gen": { + GenName: "test-gen", + }, + }, + }, + QueryExecutors: []QueryExecutor{ + &MockQueryExecutor{ + KindFn: func() string { return string(StandardQueryExecutor_Direct) }, + ResultsFn: func() map[string]interface{} { + return map[string]interface{}{ + string(MedianLatency): 10.0, + string(Percentile95Latency): 10.0, + string(MaxLatency): 0.0, + string(ErrorRate): 0.0, + } + }, + GeneratorNameFn: func() string { return "test-gen" }, + }, + }, + } + + failed, err := CompareDirectWithThresholds(-0.1, 100.0, 0.0, 100.0, report, report) + assert.True(t, failed) + assert.NotNil(t, err) + assert.Contains(t, err.Error(), "median threshold -0.1000 is not in the range [0, 100]") + + failed, err = CompareDirectWithThresholds(1.0, 101.0, 0.0, 100.0, report, report) + assert.True(t, failed) + assert.NotNil(t, err) + assert.Contains(t, err.Error(), "p95 threshold 101.0000 is not in the range [0, 100]") + + failed, err = CompareDirectWithThresholds(-1, -1, -1, -1, report, report) + assert.True(t, failed) + assert.NotNil(t, err) + assert.Contains(t, err.Error(), "median threshold -1.0000 is not in the range [0, 100]") + assert.Contains(t, err.Error(), "p95 threshold -1.0000 is not in the range [0, 100]") + assert.Contains(t, err.Error(), "max threshold -1.0000 is not in the range [0, 100]") + assert.Contains(t, err.Error(), "error rate threshold -1.0000 is not in the range [0, 100]") }) }