diff --git a/cmd/root.go b/cmd/root.go index 9ea2f97..9ce660b 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -60,7 +60,7 @@ func Execute() { registry.MustRegister(metrics.BuildStatic(cfg.Static.Gauges)...) // Register reference rpc metrics - refMets := metrics.NewReferenceRPC() + refMets := metrics.NewHTTPRequest() registry.MustRegister(refMets.Metrics()...) // Register cosmos chain metrics @@ -125,7 +125,7 @@ func logFatal(msg string, err error) { os.Exit(1) } -func buildCosmosJobs(cosmosMets *metrics.Cosmos, refMets *metrics.ReferenceRPC, cfg Config) (jobs []metrics.Job) { +func buildCosmosJobs(cosmosMets *metrics.Cosmos, refMets *metrics.ReferenceAPI, cfg Config) (jobs []metrics.Job) { // TODO(nix): Need different rest clients per chain. This hack prevents > 1 chain. var urls []url.URL for _, rest := range cfg.Cosmos[0].Rest { @@ -136,8 +136,7 @@ func buildCosmosJobs(cosmosMets *metrics.Cosmos, refMets *metrics.ReferenceRPC, urls = append(urls, *u) } - const rpcType = "cosmos" - restClient := cosmos.NewRestClient(metrics.NewFallbackClient(httpClient, refMets, rpcType, urls)) + restClient := cosmos.NewRestClient(metrics.NewFallbackClient(httpClient, refMets, urls)) restJobs := cosmos.BuildRestJobs(cosmosMets, restClient, cfg.Cosmos) jobs = append(jobs, toJobs(restJobs)...) diff --git a/metrics/fallback_client.go b/metrics/fallback_client.go index 92a6fae..cb5fe1b 100644 --- a/metrics/fallback_client.go +++ b/metrics/fallback_client.go @@ -16,15 +16,14 @@ type FallbackClient struct { httpDo func(req *http.Request) (*http.Response, error) log *slog.Logger metrics ClientMetrics - rpcType string } type ClientMetrics interface { - IncClientError(rpcType string, host url.URL, reason string) + IncAPIError(host url.URL, reason string) // TODO(nix): Metrics for request counts. Latency histogram. } -func NewFallbackClient(client *http.Client, metrics ClientMetrics, rpcType string, hosts []url.URL) *FallbackClient { +func NewFallbackClient(client *http.Client, metrics ClientMetrics, hosts []url.URL) *FallbackClient { if len(hosts) == 0 { panic("no hosts provided") } @@ -33,7 +32,6 @@ func NewFallbackClient(client *http.Client, metrics ClientMetrics, rpcType strin httpDo: client.Do, log: slog.Default(), metrics: metrics, - rpcType: rpcType, } } @@ -41,7 +39,7 @@ const unknownErrReason = "unknown" func (c FallbackClient) Get(ctx context.Context, path string) (*http.Response, error) { doGet := func(host url.URL) (*http.Response, error) { - log := c.log.With("host", host.Hostname(), "path", path, "rpc", c.rpcType) + log := c.log.With("host", host.Hostname(), "path", path, "method", http.MethodGet) host.Path = path req, err := http.NewRequestWithContext(ctx, http.MethodGet, host.String(), nil) if err != nil { @@ -58,7 +56,7 @@ func (c FallbackClient) Get(ctx context.Context, path string) (*http.Response, e if resp.StatusCode < 200 || resp.StatusCode >= 300 { _ = resp.Body.Close() log.Error("Response returned bad status code", "status", resp.StatusCode) - c.metrics.IncClientError(c.rpcType, host, strconv.Itoa(resp.StatusCode)) + c.metrics.IncAPIError(host, strconv.Itoa(resp.StatusCode)) return nil, fmt.Errorf("%s: bad status code %d", req.URL, resp.StatusCode) } return resp, nil @@ -85,5 +83,5 @@ func (c FallbackClient) recordErrMetric(host url.URL, err error) { // Do not record when the process is shutting down. return } - c.metrics.IncClientError(c.rpcType, host, reason) + c.metrics.IncAPIError(host, reason) } diff --git a/metrics/fallback_client_test.go b/metrics/fallback_client_test.go index 1d8b0f7..dbb921e 100644 --- a/metrics/fallback_client_test.go +++ b/metrics/fallback_client_test.go @@ -17,14 +17,12 @@ import ( type mockClientMetrics struct { IncClientErrCalls int - GotRPCType string GotHost url.URL GotErrMsg string } -func (m *mockClientMetrics) IncClientError(rpcType string, host url.URL, errMsg string) { +func (m *mockClientMetrics) IncAPIError(host url.URL, errMsg string) { m.IncClientErrCalls++ - m.GotRPCType = rpcType m.GotHost = host m.GotErrMsg = errMsg } @@ -41,7 +39,7 @@ func TestFallbackClient_Get(t *testing.T) { ctx := context.WithValue(context.Background(), dummy("test"), dummy("test")) t.Run("happy path", func(t *testing.T) { - client := NewFallbackClient(&http.Client{}, nil, "test", urls) + client := NewFallbackClient(&http.Client{}, nil, urls) client.log = nopLogger require.NotNil(t, client.httpDo) @@ -65,7 +63,7 @@ func TestFallbackClient_Get(t *testing.T) { t.Run("fallback on error", func(t *testing.T) { var metrics mockClientMetrics - client := NewFallbackClient(nil, &metrics, "test", urls) + client := NewFallbackClient(nil, &metrics, urls) client.log = nopLogger var callCount int @@ -91,7 +89,7 @@ func TestFallbackClient_Get(t *testing.T) { t.Run("fallback on bad status code", func(t *testing.T) { var metrics mockClientMetrics - client := NewFallbackClient(nil, &metrics, "test", urls) + client := NewFallbackClient(nil, &metrics, urls) client.log = nopLogger var callCount int @@ -119,7 +117,7 @@ func TestFallbackClient_Get(t *testing.T) { t.Run("all errors", func(t *testing.T) { r := rand.New(rand.NewSource(time.Now().UnixNano())) var metrics mockClientMetrics - client := NewFallbackClient(nil, &metrics, "test", urls) + client := NewFallbackClient(nil, &metrics, urls) client.log = nopLogger var callCount int @@ -154,7 +152,7 @@ func TestFallbackClient_Get(t *testing.T) { {nil, &http.Response{StatusCode: http.StatusNotFound}, "404"}, } { var metrics mockClientMetrics - client := NewFallbackClient(nil, &metrics, "test", []url.URL{{Host: "error.example.com"}}) + client := NewFallbackClient(nil, &metrics, []url.URL{{Host: "error.example.com"}}) client.log = nopLogger client.httpDo = func(req *http.Request) (*http.Response, error) { @@ -167,7 +165,6 @@ func TestFallbackClient_Get(t *testing.T) { //nolint _, _ = client.Get(ctx, "") - require.Equal(t, "test", metrics.GotRPCType, tt) require.Equal(t, "error.example.com", metrics.GotHost.Hostname(), tt) require.Equal(t, tt.WantMsg, metrics.GotErrMsg, tt) } @@ -175,7 +172,7 @@ func TestFallbackClient_Get(t *testing.T) { t.Run("context canceled error", func(t *testing.T) { var metrics mockClientMetrics - client := NewFallbackClient(nil, &metrics, "test", []url.URL{{Host: "error.example.com"}}) + client := NewFallbackClient(nil, &metrics, []url.URL{{Host: "error.example.com"}}) client.log = nopLogger client.httpDo = func(req *http.Request) (*http.Response, error) { diff --git a/metrics/metrics.go b/metrics/metrics.go index 122ec5b..d8a3b1a 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -1,8 +1,11 @@ package metrics const ( - namespace = "sl_exporter" + namespace = "sl_exporter" + + // Subsystems staticSubsystem = "static" cosmosSubsystem = "cosmos" cosmosValSubsystem = cosmosSubsystem + "_val" + refAPISubsystem = "reference_api" ) diff --git a/metrics/reference_api.go b/metrics/reference_api.go new file mode 100644 index 0000000..cff447b --- /dev/null +++ b/metrics/reference_api.go @@ -0,0 +1,35 @@ +package metrics + +import ( + "net/url" + + "github.com/prometheus/client_golang/prometheus" +) + +// ReferenceAPI records metrics for external http calls. +type ReferenceAPI struct { + errorCounter *prometheus.CounterVec + // TODO(nix): Count requests and histogram of latency. +} + +func NewHTTPRequest() *ReferenceAPI { + return &ReferenceAPI{ + errorCounter: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: prometheus.BuildFQName(namespace, refAPISubsystem, "error_total"), + Help: "Number of errors encountered while making external calls to an API to gather reference data.", + }, + []string{"host", "reason"}, + ), + } +} + +func (c ReferenceAPI) IncAPIError(host url.URL, reason string) { + c.errorCounter.WithLabelValues(host.Hostname(), reason).Inc() +} + +func (c ReferenceAPI) Metrics() []prometheus.Collector { + return []prometheus.Collector{ + c.errorCounter, + } +} diff --git a/metrics/reference_api_test.go b/metrics/reference_api_test.go new file mode 100644 index 0000000..2347425 --- /dev/null +++ b/metrics/reference_api_test.go @@ -0,0 +1,29 @@ +package metrics + +import ( + "net/http/httptest" + "net/url" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" +) + +func TestReferenceAPI_IncAPIError(t *testing.T) { + t.Parallel() + + reg := prometheus.NewRegistry() + metrics := NewHTTPRequest() + reg.MustRegister(metrics.Metrics()[0]) + + u, err := url.Parse("http://test.example/should/not/be/used") + require.NoError(t, err) + + metrics.IncAPIError(*u, "timeout") + + h := metricsHandler(reg) + r := httptest.NewRecorder() + h.ServeHTTP(r, stubRequest) + + require.Contains(t, r.Body.String(), `sl_exporter_reference_api_error_total{host="test.example",reason="timeout"} 1`) +} diff --git a/metrics/reference_rpc.go b/metrics/reference_rpc.go deleted file mode 100644 index bedabc1..0000000 --- a/metrics/reference_rpc.go +++ /dev/null @@ -1,36 +0,0 @@ -package metrics - -import ( - "net/url" - - "github.com/prometheus/client_golang/prometheus" -) - -// ReferenceRPC records metrics for external RPC calls. -type ReferenceRPC struct { - errorCounter *prometheus.CounterVec - // TODO(nix): Count requests and histogram of latency. -} - -func NewReferenceRPC() *ReferenceRPC { - const subsystem = "reference_rpc" - return &ReferenceRPC{ - errorCounter: prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: prometheus.BuildFQName(Namespace, subsystem, "error_count"), - Help: "Number of errors encountered while making external RPC, API, or GRPC calls.", - }, - []string{"type", "host", "reason"}, - ), - } -} - -func (c ReferenceRPC) IncClientError(rpcType string, host url.URL, reason string) { - c.errorCounter.WithLabelValues(rpcType, host.Hostname(), reason).Inc() -} - -func (c ReferenceRPC) Metrics() []prometheus.Collector { - return []prometheus.Collector{ - c.errorCounter, - } -} diff --git a/metrics/reference_rpc_test.go b/metrics/reference_rpc_test.go deleted file mode 100644 index 3df2f2a..0000000 --- a/metrics/reference_rpc_test.go +++ /dev/null @@ -1,33 +0,0 @@ -package metrics - -import ( - "net/http/httptest" - "net/url" - "strings" - "testing" - - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" -) - -func TestReferenceRPC_IncClientError(t *testing.T) { - t.Parallel() - - reg := prometheus.NewRegistry() - metrics := NewReferenceRPC() - reg.MustRegister(metrics.Metrics()[0]) - - u, err := url.Parse("http://test.example/should/not/be/used") - require.NoError(t, err) - - metrics.IncClientError("cosmos-lcd", *u, "timeout") - - h := metricsHandler(reg) - r := httptest.NewRecorder() - h.ServeHTTP(r, stubRequest) - - const want = `# HELP sl_exporter_reference_rpc_error_count Number of errors encountered while making external RPC, API, or GRPC calls. -# TYPE sl_exporter_reference_rpc_error_count counter -sl_exporter_reference_rpc_error_count{host="test.example",reason="timeout",type="cosmos-lcd"} 1` - require.Equal(t, strings.TrimSpace(want), strings.TrimSpace(r.Body.String())) -}