From 7880889d6cea60a1739b44e14ef2c57bc21443dd Mon Sep 17 00:00:00 2001 From: Victor Boivie Date: Fri, 15 Sep 2017 11:53:16 +0200 Subject: [PATCH] Added metrics for Prometheus --- Gopkg.lock | 44 ++++++++++++++++++++++++- app/backend.go | 27 ++++++++++++---- app/forward.go | 5 ++- app/init.go | 3 ++ app/manager.go | 4 +++ app/metrics.go | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 162 insertions(+), 8 deletions(-) create mode 100644 app/metrics.go diff --git a/Gopkg.lock b/Gopkg.lock index 937025a..a100389 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -7,18 +7,60 @@ revision = "f006c2ac4710855cf0f916dd6b77acf6b048dc6e" version = "v1.0.3" +[[projects]] + branch = "master" + name = "github.com/beorn7/perks" + packages = ["quantile"] + revision = "4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9" + [[projects]] branch = "master" name = "github.com/franela/goreq" packages = ["."] revision = "b5b0f5eb2d16f20345cce0a544a75163579c0b00" +[[projects]] + branch = "master" + name = "github.com/golang/protobuf" + packages = ["proto"] + revision = "11b8df160996e00fd4b55cbaafb3d84ec6d50fa8" + [[projects]] name = "github.com/gorilla/websocket" packages = ["."] revision = "ea4d1f681babbce9545c9c5f3d5194a789c89f5b" version = "v1.2.0" +[[projects]] + name = "github.com/matttproud/golang_protobuf_extensions" + packages = ["pbutil"] + revision = "3247c84500bff8d9fb6d579d800f20b3e091582c" + version = "v1.0.0" + +[[projects]] + name = "github.com/prometheus/client_golang" + packages = ["prometheus","prometheus/promhttp"] + revision = "c5b7fccd204277076155f10851dad72b76a49317" + version = "v0.8.0" + +[[projects]] + branch = "master" + name = "github.com/prometheus/client_model" + packages = ["go"] + revision = "6f3806018612930941127f2a7c6c453ba2c527d2" + +[[projects]] + branch = "master" + name = "github.com/prometheus/common" + packages = ["expfmt","internal/bitbucket.org/ww/goautoneg","model"] + revision = "2f17f4a9d485bf34b4bfaccc273805040e4f86c8" + +[[projects]] + branch = "master" + name = "github.com/prometheus/procfs" + packages = [".","xfs"] + revision = "e645f4e5aaa8506fc71d6edbc5c4ff02c04c46f2" + [[projects]] name = "github.com/satori/go.uuid" packages = ["."] @@ -46,6 +88,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "ad6267805cf031cc137f3ab45cf6a34b75d3625a571ae27e88f90c57770ed0bd" + inputs-digest = "2a47384265e00454ef7f4aad67c9efff93c42b96e5d2a45f742879c2fc86aebb" solver-name = "gps-cdcl" solver-version = 1 diff --git a/app/backend.go b/app/backend.go index 9317280..a52bc46 100644 --- a/app/backend.go +++ b/app/backend.go @@ -9,6 +9,7 @@ import ( "time" "github.com/Sirupsen/logrus" + "github.com/prometheus/client_golang/prometheus" "golang.org/x/crypto/ssh" ) @@ -88,6 +89,7 @@ func (b *backendStruct) isProvisioned() bool { func (b *backendStruct) waitProvisioned() error { if !b.isProvisioned() { + start := time.Now() b.progress <- progressCmd{"wait_provisioning_start", nil} for { newInfo := doLookup(b.info.Host, b.info.Prefix) @@ -102,6 +104,7 @@ func (b *backendStruct) waitProvisioned() error { b.log.Info("Provisioning - retry...") time.Sleep(5 * time.Second) } + BackendProvisioningDuration.Observe(time.Since(start).Seconds()) b.log.Info("Provisioning completed") b.progress <- progressCmd{"wait_provisioning_end", nil} } @@ -110,6 +113,7 @@ func (b *backendStruct) waitProvisioned() error { func (b *backendStruct) failed(reason string, err error) { b.log.Warnf("ENTER FAILED STATE, due to %s: %v", reason, err) + BackendFailure.With(prometheus.Labels{"reason": reason}).Inc() // lame duck mode. UnregisterBackend(b.id) for { @@ -139,12 +143,14 @@ func dialSSH(info *configSSHTunnel, config *ssh.ClientConfig, proxyCommand strin } func (b *backendStruct) connectSSH() (client *ssh.Client, err error) { + start := time.Now() b.progress <- progressCmd{"connection_start", nil} b.log.Info("Connecting to SSH server") for retry := 0; retry < maxRetriesServer; retry++ { b.progress <- progressCmd{"connection_try", nil} client, err = dialSSH(b.info.SSHTunnel, b.sshConfig, proxyCommand) if err == nil { + BackendConnectSSHDuration.Observe(time.Since(start).Seconds()) b.log.Infof("Connected to SSH server: %v, err %v", client, err) go generateKeepalive(client) b.progress <- progressCmd{"connection_established", nil} @@ -166,6 +172,7 @@ func (b *backendStruct) reconnectSSH() (client *ssh.Client, err error) { client, err = dialSSH(b.info.SSHTunnel, b.sshConfig, proxyCommand) if err == nil { b.log.Infof("Re-connected to SSH server: %v, err %v", client, err) + BackendReconnectSSH.Inc() go generateKeepalive(client) b.progress <- progressCmd{"reconnection_established", nil} return @@ -177,6 +184,12 @@ func (b *backendStruct) reconnectSSH() (client *ssh.Client, err error) { } func (b *backendStruct) bootstrap(client *ssh.Client) (err error) { + if len(b.info.SSHTunnel.Bootstrap) == 0 && b.info.SSHTunnel.Run == nil { + return + } + + start := time.Now() + type BootstrapStep struct { Description string `json:"description"` Status string `json:"status"` @@ -224,6 +237,7 @@ func (b *backendStruct) bootstrap(client *ssh.Client) (err error) { session.Start(b.info.SSHTunnel.Run.Command) time.Sleep(500 * time.Millisecond) } + BackendBootstrapDuration.Observe(time.Since(start).Seconds()) return } @@ -317,6 +331,7 @@ func (b *backendStruct) waitBackend(client *ssh.Client) (err error) { func (b *backendStruct) waitUntilStarted() { <-b.start + BackendsStarted.Inc() b.log.Info("Woke up") // Just a goroutine that eats up all future start calls. go func() { @@ -333,7 +348,7 @@ func (b *backendStruct) monitor() { b.waitUntilStarted() if err = b.waitProvisioned(); err != nil { - b.failed("Failed to provision", err) + b.failed("provisioning", err) } b.log = b.log.WithFields(logrus.Fields{ @@ -342,19 +357,19 @@ func (b *backendStruct) monitor() { }) if err = b.prepareSSH(); err != nil { - b.failed("Failed to prepare SSH connection", err) + b.failed("prepare_ssh", err) } if client, err = b.connectSSH(); err != nil { - b.failed("Failed to do initial SSH connection", err) + b.failed("connect_ssh", err) } if err = b.bootstrap(client); err != nil { - b.failed("Failed to bootstrap", err) + b.failed("bootstrap", err) } if err = b.waitBackend(client); err != nil { - b.failed("Failed waiting for backend", err) + b.failed("wait_backend_ready", err) } b.isReady = true @@ -364,7 +379,7 @@ func (b *backendStruct) monitor() { err = <-connectionError b.log.Warnf("Connection error: %v - reconnecting", err) if client, err = b.reconnectSSH(); err != nil { - b.failed("Failed to reconnect", err) + b.failed("reconnect_ssh", err) } } } diff --git a/app/forward.go b/app/forward.go index ccc725a..643276a 100644 --- a/app/forward.go +++ b/app/forward.go @@ -5,9 +5,11 @@ import ( "net" "net/http" "net/http/httputil" + "strconv" "strings" "github.com/Sirupsen/logrus" + "github.com/prometheus/client_golang/prometheus" ) func respond(log *logrus.Entry, w http.ResponseWriter, req *http.Request, reply string, status int) { @@ -19,6 +21,7 @@ func respond(log *logrus.Entry, w http.ResponseWriter, req *http.Request, reply if status >= 400 { log.Warnf("Failed to serve URL: %s", reply) } + HTTPResponseCtr.With(prometheus.Labels{"code": string(strconv.Itoa(status))}).Inc() log.Printf("%s %s %s %d \"%s\"", host, req.Method, req.RequestURI, status, reply) http.Error(w, reply, status) } @@ -27,7 +30,7 @@ func serveBasicAuth(backend Backend, w http.ResponseWriter, req *http.Request) b if authInfo := backend.GetInfo().BasicAuth; authInfo != nil { authError := func() bool { w.Header().Set("WWW-Authenticate", "Basic realm=\"Restricted Access\"") - http.Error(w, "authorization failed", http.StatusUnauthorized) + respond(backend.GetLogger(), w, req, "authorization failed", http.StatusUnauthorized) return true } diff --git a/app/init.go b/app/init.go index 92561f8..3ee8bad 100644 --- a/app/init.go +++ b/app/init.go @@ -5,6 +5,8 @@ import ( "log" "net/http" "runtime" + + "github.com/prometheus/client_golang/prometheus/promhttp" ) var externalLookupURL string @@ -36,5 +38,6 @@ func Init(extPathLookupURL, proxyCmd, version string) { http.HandleFunc("/__ug__dump", dumpHandler) http.HandleFunc("/__ug__health", healthHandler) http.HandleFunc("/__ug__version", versionHandler) + http.Handle("/__ug__metrics", promhttp.Handler()) http.HandleFunc("/", forward) } diff --git a/app/manager.go b/app/manager.go index 56f5abd..6cd4c35 100644 --- a/app/manager.go +++ b/app/manager.go @@ -104,6 +104,8 @@ func backendManager() { currentID++ backend := NewBackend(currentID, info) log.Infof("Adding backend %d -> '%s%s'", currentID, info.Host, info.Prefix) + BackendsRegistered.Inc() + BackendActive.Inc() mapping[key] = backend return backend } @@ -139,6 +141,8 @@ func backendManager() { for mapkey, backend := range mapping { if backend.ID() == req.id { log.Infof("Removing backend %d -> '%s%s'", req.id, backend.GetInfo().Host, backend.GetInfo().Prefix) + BackendsUnregistered.Inc() + BackendActive.Dec() delete(mapping, mapkey) } } diff --git a/app/metrics.go b/app/metrics.go new file mode 100644 index 0000000..b04bf84 --- /dev/null +++ b/app/metrics.go @@ -0,0 +1,87 @@ +package app + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +var ( + // HTTPResponseCtr allows the counting of Http Responses and their status codes + HTTPResponseCtr = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "http_resp_total", + Help: "Number of http responses", + }, + []string{"code"}, + ) + // BackendActive allows the counting of active (registered) backends + BackendActive = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "backend_active_total", + Help: "Number of active backends", + }, + ) + // BackendsRegistered allows the counting of backends that have been registered + BackendsRegistered = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "backend_registered_total", + Help: "Number of backends that have been registered", + }, + ) + // BackendsStarted allows the counting of backends that have been started + BackendsStarted = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "backend_started_total", + Help: "Number of backends that have been started", + }, + ) + // BackendsUnregistered allows the counting of backends that have been unregistered + BackendsUnregistered = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "backend_unregistered_total", + Help: "Number of backends that have been unregistered", + }, + ) + // BackendFailure allows the counting of backends and the corresponding failure reason + BackendFailure = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "backend_failure_total", + Help: "Number of backends that have failed", + }, + []string{"reason"}, + ) + // BackendReconnectSSH allows the counting of backends that have reconnected to the SSH server + BackendReconnectSSH = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "backend_reconnect_ssh_total", + Help: "Number of backends that have reconnected to SSH", + }, + ) + // BackendProvisioningDuration allows the histogram of provisioning durations + BackendProvisioningDuration = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Name: "backend_provisioning_seconds", + Help: "Provisioning duration for backends", + Buckets: []float64{1, 5, 10, 30, 1 * 60, 2 * 60, 3 * 60, 4 * 60, 5 * 60, 10 * 60, 20 * 60}, + }, + ) + // BackendConnectSSHDuration allows the histogram of provisioning durations + BackendConnectSSHDuration = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Name: "backend_connect_ssh_seconds", + Help: "SSH connection duration for backends", + Buckets: []float64{1, 5, 10, 30, 1 * 60, 2 * 60, 3 * 60, 4 * 60, 5 * 60}, + }, + ) + // BackendBootstrapDuration allows the histogram of provisioning durations + BackendBootstrapDuration = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Name: "backend_bootstrap_seconds", + Help: "SSH bootstrap duration for backends", + Buckets: []float64{1, 5, 10, 30, 1 * 60, 2 * 60, 3 * 60, 4 * 60, 5 * 60, 10 * 60, 20 * 60}, + }, + ) +) + +func init() { + prometheus.MustRegister(HTTPResponseCtr) +}