diff --git a/cmd/root.go b/cmd/root.go index 92cd91207..1ffdb731f 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -39,9 +39,7 @@ func InitContext() (context.Context, error) { } ctx.WithTracer(otel.GetTracerProvider().Tracer(app)) - ctx.WithTracer(otel.GetTracerProvider().Tracer("canary-checker")) - ctx.WithTracer(otel.GetTracerProvider().Tracer("canary-checker")) - ctx.DB().Use(db.NewPlugin()) + ctx.DB().Use(db.NewOopsPlugin()) return ctx, nil } diff --git a/go.mod b/go.mod index 519213b6c..66db261c6 100644 --- a/go.mod +++ b/go.mod @@ -54,6 +54,7 @@ require ( github.com/robertkrimen/otto v0.3.0 github.com/robfig/cron/v3 v3.0.1 github.com/samber/lo v1.47.0 + github.com/samber/oops v1.13.1 github.com/sethvargo/go-retry v0.3.0 github.com/sevennt/echo-pprof v0.1.1-0.20220616082843-66a461746b5f github.com/spf13/cobra v1.8.0 @@ -253,7 +254,6 @@ require ( github.com/prometheus/procfs v0.15.1 // indirect github.com/rodaine/table v1.3.0 // indirect github.com/rogpeppe/go-internal v1.12.0 // indirect - github.com/samber/oops v1.13.1 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect github.com/shirou/gopsutil/v3 v3.24.5 // indirect github.com/shoenig/go-m1cpu v0.1.6 // indirect @@ -334,7 +334,7 @@ require ( sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) -replace github.com/flanksource/duty => ../duty +// replace github.com/flanksource/duty => ../duty // replace github.com/flanksource/artifacts => ../artifacts diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 12267ca67..f183c1641 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -94,7 +94,15 @@ func setupMetrics() { checkLabels, ) - prometheus.MustRegister(Gauge, CanaryCheckInfo, OpsCount, OpsSuccessCount, OpsInvalidCount, OpsFailedCount, RequestLatency) + OpsErrorCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "canary_check_error_count", + Help: "The total number of checks that resulted in error", + }, + checkLabels, + ) + + prometheus.MustRegister(Gauge, CanaryCheckInfo, OpsCount, OpsSuccessCount, OpsInvalidCount, OpsErrorCount, OpsFailedCount, RequestLatency) } var ( @@ -115,6 +123,7 @@ var ( OpsCount *prometheus.CounterVec OpsFailedCount *prometheus.CounterVec OpsSuccessCount *prometheus.CounterVec + OpsErrorCount *prometheus.CounterVec RequestLatency *prometheus.HistogramVec ) @@ -278,7 +287,13 @@ func Record( Gauge.WithLabelValues(gaugeLabels...).Set(1) CanaryCheckInfo.WithLabelValues(checkMetricLabels...).Set(1) - OpsFailedCount.WithLabelValues(checkMetricLabels...).Inc() + + if result.InternalError { + OpsErrorCount.WithLabelValues(checkMetricLabels...).Inc() + } else { + fail.Append(1) + OpsFailedCount.WithLabelValues(checkMetricLabels...).Inc() + } } _uptime = types.Uptime{Passed: int(pass.Reduce(rolling.Sum)), Failed: int(fail.Reduce(rolling.Sum))} diff --git a/pkg/results.go b/pkg/results.go index 440939770..f420c23aa 100644 --- a/pkg/results.go +++ b/pkg/results.go @@ -6,8 +6,7 @@ import ( "github.com/flanksource/canary-checker/api/external" v1 "github.com/flanksource/canary-checker/api/v1" - "github.com/samber/lo" - "github.com/samber/oops" + "github.com/flanksource/duty/db" ) type Results []*CheckResult @@ -94,9 +93,7 @@ func (result *CheckResult) Failf(message string, args ...interface{}) *CheckResu result.Error += ", " } - if oe, ok := oops.AsOops(fmt.Errorf(message, args...)); ok { - result.InternalError = lo.Contains(oe.Tags(), "db") - } + result.InternalError = db.IsDBError(fmt.Errorf(message, args...)) result.Pass = false result.Error += fmt.Sprintf(message, args...)