Skip to content

Commit

Permalink
feat: add metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-karan committed Feb 18, 2022
1 parent 7640674 commit dd6067d
Show file tree
Hide file tree
Showing 11 changed files with 173 additions and 10 deletions.
24 changes: 24 additions & 0 deletions cmd/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"net/http"
"time"

alertmgrtmpl "github.com/prometheus/alertmanager/template"
)
Expand Down Expand Up @@ -51,34 +52,57 @@ func sendErrorResponse(w http.ResponseWriter, message string, code int, data int

// Index page.
func handleIndex(w http.ResponseWriter, r *http.Request) {
var (
app = r.Context().Value("app").(*App)
)
app.metrics.Increment(`http_requests_total{handler="index"}`)
sendResponse(w, "welcome to cAlerts!")
}

// Health check.
func handleHealthCheck(w http.ResponseWriter, r *http.Request) {
var (
app = r.Context().Value("app").(*App)
)
app.metrics.Increment(`http_requests_total{handler="ping"}`)
sendResponse(w, "pong")
}

// Export prometheus metrics.
func handleMetrics(w http.ResponseWriter, r *http.Request) {
var (
app = r.Context().Value("app").(*App)
)
app.metrics.FlushMetrics(w)
}

// Handle dispatching new alerts to upstream providers.
func handleDispatchNotif(w http.ResponseWriter, r *http.Request) {
var (
now = time.Now()
app = r.Context().Value("app").(*App)
payload = alertmgrtmpl.Data{}
)

app.metrics.Increment(`http_requests_total{handler="dispatch"}`)

// Unmarshall POST Body.
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
app.lo.WithError(err).Error("error decoding request body")
app.metrics.Increment(`http_request_errors_total{handler="dispatch"}`)
sendErrorResponse(w, "Error decoding payload.", http.StatusBadRequest, nil)
return
}

// Dispatch a list of alerts via Notifier.
if err := app.notifier.Dispatch(payload.Alerts); err != nil {
app.lo.WithError(err).Error("error dispatching alerts")
app.metrics.Increment(`http_request_errors_total{handler="dispatch"}`)
sendErrorResponse(w, "Error dispatching alerts.", http.StatusInternalServerError, nil)
return
}

app.metrics.Duration(`http_request_duration_seconds{handler="dispatch"}`, now)

sendResponse(w, "dispatched")
}
11 changes: 9 additions & 2 deletions cmd/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/knadh/koanf/parsers/toml"
"github.com/knadh/koanf/providers/env"
"github.com/knadh/koanf/providers/file"
"github.com/mr-karan/calert/internal/metrics"
"github.com/mr-karan/calert/internal/notifier"
prvs "github.com/mr-karan/calert/internal/providers"
"github.com/mr-karan/calert/internal/providers/google_chat"
Expand Down Expand Up @@ -77,7 +78,7 @@ func initConfig(cfgDefault string, envPrefix string) (*koanf.Koanf, error) {
}

// initProviders loads all the providers specified in the config.
func initProviders(ko *koanf.Koanf, lo *logrus.Logger) []prvs.Provider {
func initProviders(ko *koanf.Koanf, lo *logrus.Logger, metrics *metrics.Manager) []prvs.Provider {
provs := make([]prvs.Provider, 0)

// Loop over all providers listed in config.
Expand All @@ -97,13 +98,14 @@ func initProviders(ko *koanf.Koanf, lo *logrus.Logger) []prvs.Provider {
Room: name,
Template: ko.MustString(fmt.Sprintf("%s.template", cfgKey)),
ActiveAlertsTTL: ko.MustDuration(fmt.Sprintf("%s.active_alerts_ttl", cfgKey)),
Metrics: metrics,
},
)
if err != nil {
lo.WithError(err).Fatal("error initialising google chat provider")
}

lo.WithField("room", gchat.GetRoom()).Info("initialised provider")
lo.WithField("room", gchat.Room()).Info("initialised provider")
provs = append(provs, gchat)
}
}
Expand All @@ -127,3 +129,8 @@ func initNotifier(ko *koanf.Koanf, lo *logrus.Logger, provs []prvs.Provider) not

return n
}

// initMetrics initializes a Metrics manager.
func initMetrics() *metrics.Manager {
return metrics.New("calert")
}
7 changes: 6 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (

"github.com/go-chi/chi"
"github.com/go-chi/chi/middleware"
"github.com/mr-karan/calert/internal/metrics"
"github.com/mr-karan/calert/internal/notifier"

"github.com/sirupsen/logrus"
Expand All @@ -19,6 +20,7 @@ var (
// instances of various objects used in the lifecyle of program.
type App struct {
lo *logrus.Logger
metrics *metrics.Manager
notifier notifier.Notifier
}

Expand All @@ -32,13 +34,15 @@ func main() {

var (
lo = initLogger(ko)
provs = initProviders(ko, lo)
metrics = initMetrics()
provs = initProviders(ko, lo, metrics)
notifier = initNotifier(ko, lo, provs)
)

app := &App{
lo: lo,
notifier: notifier,
metrics: metrics,
}

app.lo.WithField("version", buildString).Info("booting calerts")
Expand All @@ -55,6 +59,7 @@ func main() {
// Register Handlers
r.Get("/", wrap(app, handleIndex))
r.Get("/ping", wrap(app, handleHealthCheck))
r.Get("/metrics", wrap(app, handleMetrics))
r.Post("/dispatch", wrap(app, handleDispatchNotif))

// Start HTTP Server.
Expand Down
30 changes: 30 additions & 0 deletions docs/metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
```
calert_alerts_dispatched_errors_total{provider="google_chat", room="dev_alerts"} 1
calert_alerts_dispatched_errors_total{provider="google_chat", room="prod_alerts"} 1
calert_alerts_dispatched_total{provider="google_chat", room="dev_alerts"} 1
calert_alerts_dispatched_total{provider="google_chat", room="prod_alerts"} 1
calert_http_request_duration_seconds_bucket{handler="dispatch",vmrange="4.642e-01...5.275e-01"} 2
calert_http_request_duration_seconds_sum{handler="dispatch"} 1.0370891740000001
calert_http_request_duration_seconds_count{handler="dispatch"} 2
calert_http_requests_total{handler="dispatch"} 2
calert_start_timestamp 1645193338
calert_uptime_seconds 34
```

```
calert_alerts_dispatched_duration_seconds_bucket{provider="google_chat", room="dev_alerts",vmrange="1.292e+00...1.468e+00"} 1
calert_alerts_dispatched_duration_seconds_sum{provider="google_chat", room="dev_alerts"} 1.372438907
calert_alerts_dispatched_duration_seconds_count{provider="google_chat", room="dev_alerts"} 1
calert_alerts_dispatched_duration_seconds_bucket{provider="google_chat", room="prod_alerts",vmrange="1.292e+00...1.468e+00"} 1
calert_alerts_dispatched_duration_seconds_sum{provider="google_chat", room="prod_alerts"} 1.4265874539999999
calert_alerts_dispatched_duration_seconds_count{provider="google_chat", room="prod_alerts"} 1
calert_alerts_dispatched_total{provider="google_chat", room="dev_alerts"} 1
calert_alerts_dispatched_total{provider="google_chat", room="prod_alerts"} 1
calert_http_request_duration_seconds_bucket{handler="dispatch",vmrange="1.292e+00...1.468e+00"} 2
calert_http_request_duration_seconds_sum{handler="dispatch"} 2.799676161
calert_http_request_duration_seconds_count{handler="dispatch"} 2
calert_http_requests_total{handler="dispatch"} 2
calert_start_timestamp 1645193392
calert_uptime_seconds 78
```
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module github.com/mr-karan/calert

require (
github.com/VictoriaMetrics/metrics v1.18.1
github.com/go-chi/chi v1.5.4
github.com/gofrs/uuid v4.0.0+incompatible
github.com/knadh/koanf v1.4.0
Expand All @@ -27,6 +28,8 @@ require (
github.com/prometheus/procfs v0.6.0 // indirect
github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749 // indirect
github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546 // indirect
github.com/valyala/fastrand v1.1.0 // indirect
github.com/valyala/histogram v1.2.0 // indirect
golang.org/x/sys v0.0.0-20220209214540-3681064d5158 // indirect
google.golang.org/protobuf v1.26.0-rc.1 // indirect
)
Expand Down
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAE
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/VictoriaMetrics/metrics v1.18.1 h1:OZ0+kTTto8oPfHnVAnTOoyl0XlRhRkoQrD2n2cOuRw0=
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
github.com/agnivade/levenshtein v1.0.1/go.mod h1:CURSv5d9Uaml+FovSIICkLbAUZ9S4RqaHDIsdSBg7lM=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
Expand Down Expand Up @@ -467,6 +469,10 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
github.com/valyala/fastrand v1.1.0 h1:f+5HkLW4rsgzdNoleUOB69hyT9IlD2ZQh9GyDMfb5G8=
github.com/valyala/fastrand v1.1.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
github.com/valyala/histogram v1.2.0 h1:wyYGAZZt3CpwUiIb9AU/Zbllg1llXyrtApRS815OLoQ=
github.com/valyala/histogram v1.2.0/go.mod h1:Hb4kBwb4UxsaNbbbh+RRz8ZR6pdodR57tzWUS3BUzXY=
github.com/vektah/gqlparser v1.1.2/go.mod h1:1ycwN7Ij5njmMkPPAOaRFY4rET2Enx7IkVv3vaXspKw=
github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
github.com/xdg-go/scram v1.0.2/go.mod h1:1WAq6h33pAW+iRreB34OORO2Nf7qel3VV3fjBj+hCSs=
Expand Down
70 changes: 70 additions & 0 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -1 +1,71 @@
// Package metrics contains wrapper around VictoriaMetrics
// functions to interact with counters, gauges etc. It also has functions
// to write the metrics output to to an `io.Writer` interface.
package metrics

import (
"fmt"
"io"
"time"

"github.com/VictoriaMetrics/metrics"
)

// Manager represents options for storing metrics.
type Manager struct {
metrics *metrics.Set
namespace string // Optional string to prepend all label names.
startTime time.Time
}

// New returns a new configured instance of Manager.
func New(ns string) *Manager {
return &Manager{
metrics: metrics.NewSet(),
namespace: ns,
startTime: time.Now(),
}
}

// Increment the counter for the corresponding key.
// This is used for Counter metric type.
func (s *Manager) Increment(label string) {
s.metrics.GetOrCreateCounter(s.getFormattedLabel(label)).Inc()
}

// Decrement the counter for the corresponding key.
// This is used for Counter metric type.
func (s *Manager) Decrement(label string) {
s.metrics.GetOrCreateCounter(s.getFormattedLabel(label)).Dec()
}

// Duration updates the key with time delta value of `startTime`.
// This is used for Histogram metric type.
func (s *Manager) Duration(label string, startTime time.Time) {
s.metrics.GetOrCreateHistogram(s.getFormattedLabel(label)).UpdateDuration(startTime)
}

// Set updates the key with a float64 value.
// This is used for Gauge metric type.
func (s *Manager) Set(label string, val float64) {
s.metrics.GetOrCreateFloatCounter(s.getFormattedLabel(label)).Set(val)
}

// FlushMetrics writes the metrics data from the internal store
// to the buffer.
func (s *Manager) FlushMetrics(buf io.Writer) {
metrics.WriteProcessMetrics(buf)
s.metrics.WritePrometheus(buf)

// Export start time and uptime in seconds
fmt.Fprintf(buf, "calert_start_timestamp %d\n", s.startTime.Unix())
fmt.Fprintf(buf, "calert_uptime_seconds %d\n", int(time.Since(s.startTime).Seconds()))
}

// getFormattedLabel prefixes the label with namespace (if non empty).
func (s *Manager) getFormattedLabel(l string) string {
if s.namespace != "" {
return s.namespace + "_" + l
}
return l
}
2 changes: 1 addition & 1 deletion internal/notifier/notifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func Init(opts Opts) (Notifier, error) {
m := make(map[string]providers.Provider, 0)

for _, prov := range opts.Providers {
room := prov.GetRoom()
room := prov.Room()
m[room] = prov
}

Expand Down
6 changes: 5 additions & 1 deletion internal/providers/google_chat/alerts.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ import (
"time"

"github.com/gofrs/uuid"
"github.com/mr-karan/calert/internal/metrics"
alertmgrtmpl "github.com/prometheus/alertmanager/template"
"github.com/sirupsen/logrus"
)

// ActiveAlerts represents a map of alerts unique fingerprint hash
// with their details.
type ActiveAlerts struct {
lo *logrus.Logger
lo *logrus.Logger
metrics *metrics.Manager
sync.RWMutex
alerts map[string]AlertDetails
}
Expand Down Expand Up @@ -85,6 +87,8 @@ func (d *ActiveAlerts) Prune(ttl time.Duration) {
}
}

d.metrics.Duration(`alerts_prune_duration_seconds`, now)

}

// InitPruner is used to remove active alerts in the
Expand Down
22 changes: 18 additions & 4 deletions internal/providers/google_chat/google_chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@ import (
"text/template"
"time"

"github.com/mr-karan/calert/internal/metrics"
alertmgrtmpl "github.com/prometheus/alertmanager/template"
"github.com/sirupsen/logrus"
)

type GoogleChatManager struct {
lo *logrus.Logger
metrics *metrics.Manager
activeAlerts *ActiveAlerts
endpoint string
room string
Expand All @@ -23,6 +25,7 @@ type GoogleChatManager struct {

type GoogleChatOpts struct {
Log *logrus.Logger
Metrics *metrics.Manager
MaxIdleConn int
Timeout time.Duration
ProxyURL string
Expand Down Expand Up @@ -71,11 +74,14 @@ func NewGoogleChat(opts GoogleChatOpts) (*GoogleChatManager, error) {

mgr := &GoogleChatManager{
lo: opts.Log,
metrics: opts.Metrics,
client: client,
endpoint: opts.Endpoint,
room: opts.Room,
activeAlerts: &ActiveAlerts{
alerts: alerts,
alerts: alerts,
lo: opts.Log,
metrics: opts.Metrics,
},
msgTmpl: tmpl,
}
Expand Down Expand Up @@ -105,21 +111,29 @@ func (m *GoogleChatManager) Push(alerts []alertmgrtmpl.Alert) error {

// Dispatch an HTTP request for each message.
for _, msg := range msgs {
threadKey := m.activeAlerts.alerts[a.Fingerprint].UUID.String()
var (
threadKey = m.activeAlerts.alerts[a.Fingerprint].UUID.String()
now = time.Now()
)

m.metrics.Increment(fmt.Sprintf(`alerts_dispatched_total{provider="%s", room="%s"}`, m.ID(), m.Room()))

// Send message to API.
if err := m.sendMessage(msg, threadKey); err != nil {
m.metrics.Increment(fmt.Sprintf(`alerts_dispatched_errors_total{provider="%s", room="%s"}`, m.ID(), m.Room()))
m.lo.WithError(err).Error("error sending message")
continue
}

m.metrics.Duration(fmt.Sprintf(`alerts_dispatched_duration_seconds{provider="%s", room="%s"}`, m.ID(), m.Room()), now)
}
}

return nil
}

// GetRoom returns the name of room for which this provider is configured.
func (m *GoogleChatManager) GetRoom() string {
// Room returns the name of room for which this provider is configured.
func (m *GoogleChatManager) Room() string {
return m.room
}

Expand Down
Loading

0 comments on commit dd6067d

Please sign in to comment.