From d5811fe547ded25c80e1f901c66260ed519427b2 Mon Sep 17 00:00:00 2001 From: Emil Ingerslev Date: Tue, 28 Apr 2020 11:30:11 +0200 Subject: [PATCH] Refactor big metric file into multiple files (#20) --- internal/metrics/daemon.go | 79 ++++++ internal/metrics/helpers.go | 100 ++++++++ internal/metrics/ikesa.go | 209 ++++++++++++++++ internal/metrics/metrics.go | 396 ++----------------------------- internal/metrics/metrics_test.go | 12 +- internal/metrics/tcpchecker.go | 77 ++++++ main.go | 27 +-- 7 files changed, 498 insertions(+), 402 deletions(-) create mode 100644 internal/metrics/daemon.go create mode 100644 internal/metrics/helpers.go create mode 100644 internal/metrics/ikesa.go create mode 100644 internal/metrics/tcpchecker.go diff --git a/internal/metrics/daemon.go b/internal/metrics/daemon.go new file mode 100644 index 0000000..ee30ecf --- /dev/null +++ b/internal/metrics/daemon.go @@ -0,0 +1,79 @@ +package metrics + +import ( + "time" + + daemonpkg "github.com/lunarway/strong-duckling/internal/daemon" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" +) + +const ( + subSystemDaemon = "daemon" +) + +type daemon struct { + logger log.Logger + + started *prometheus.CounterVec + stopped *prometheus.CounterVec + skipped *prometheus.CounterVec + ticked *prometheus.CounterVec +} + +func newDaemon() *daemon { + return &daemon{ + started: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subSystemDaemon, + Name: "starts_total", + Help: "Total number of times started", + }, []string{"name", "interval"}), + stopped: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subSystemDaemon, + Name: "stops_total", + Help: "Total number of times stopped", + }, []string{"name"}), + skipped: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subSystemDaemon, + Name: "skips_total", + Help: "Total number of times tick was skipped", + }, []string{"name"}), + ticked: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subSystemDaemon, + Name: "ticks_total", + Help: "Total number of times tick was invoked", + }, []string{"name"}), + } +} + +func (d *daemon) getCollectors() []prometheus.Collector { + return []prometheus.Collector{ + d.started, + d.stopped, + d.skipped, + d.ticked, + } +} + +func (d *daemon) DefaultDaemonReporter(logger log.Logger, name string) *daemonpkg.Reporter { + return &daemonpkg.Reporter{ + Started: func(duration time.Duration) { + logger.With("state", "started").Infof("%s daemon started with interval %v", name, duration) + d.started.WithLabelValues(name, duration.String()).Inc() + }, + Stopped: func() { + logger.With("state", "stopped").Infof("%s daemon stopped", name) + d.stopped.WithLabelValues(name).Inc() + }, + Skipped: func() { + d.skipped.WithLabelValues(name).Inc() + }, + Ticked: func() { + d.ticked.WithLabelValues(name).Inc() + }, + } +} diff --git a/internal/metrics/helpers.go b/internal/metrics/helpers.go new file mode 100644 index 0000000..ef146fb --- /dev/null +++ b/internal/metrics/helpers.go @@ -0,0 +1,100 @@ +package metrics + +import ( + "strconv" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" +) + +func newHelper(logger log.Logger) *helper { + return &helper{ + previousValues: make(map[string]float64), + logger: logger, + } +} + +type helper struct { + previousValues map[string]float64 + logger log.Logger +} + +func (p *helper) setGauge(g *prometheus.GaugeVec, value, name string, labels childSALabels) { + f, err := strconv.ParseFloat(value, 64) + if err != nil { + p.logger.Errorf("metrics: failed to convert %s '%s' to float64: %v", name, value, err) + return + } + g.WithLabelValues(labels.values()...).Set(f) +} + +func (p *helper) setCounterByMax(c *prometheus.CounterVec, value, name string, labels childSALabels) { + // if this is the first time it is called it should be increased as well + _, ok := p.previousValues[name] + if !ok { + c.WithLabelValues(labels.values()...).Inc() + } + _, ok = p.maxValue(name, value) + if !ok { + return + } + c.WithLabelValues(labels.values()...).Inc() +} + +func (p *helper) setGaugeByMax(g *prometheus.GaugeVec, value, name string, labels ikeSALabels) { + max, ok := p.maxValue(name, value) + if !ok { + return + } + g.WithLabelValues(labels.values()...).Set(max) +} + +func (p *helper) setHistogramByMax(h *prometheus.HistogramVec, value, name string, labels childSALabels) { + max, ok := p.maxValue(name, value) + if !ok { + return + } + h.WithLabelValues(labels.values()...).Observe(max) +} + +func (p *helper) setHistogramByMin(h *prometheus.HistogramVec, value, name string, labels childSALabels) { + min, ok := p.minValue(name, value) + if !ok { + return + } + h.WithLabelValues(labels.values()...).Observe(min) +} + +// maxValue detects the max value of value. If max is detected the returned +// bool is true otherwise it returns the current value. +func (p *helper) maxValue(name, value string) (float64, bool) { + f, err := strconv.ParseFloat(value, 64) + if err != nil { + p.logger.Errorf("metrics: failed to convert %s '%s' to float64: %v", name, value, err) + return 0, false + } + previousValue, ok := p.previousValues[name] + // store the value for future reference when this call finishes + p.previousValues[name] = f + if ok && previousValue > f { + return previousValue, true + } + return f, false +} + +// minValue detects the min value of value. If min is detected the returned +// bool is true otherwise it returns the current value. +func (p *helper) minValue(name, value string) (float64, bool) { + f, err := strconv.ParseFloat(value, 64) + if err != nil { + p.logger.Errorf("metrics: failed to convert %s '%s' to float64: %v", name, value, err) + return 0, false + } + previousValue, ok := p.previousValues[name] + // store the value for future reference when this call finishes + p.previousValues[name] = f + if ok && previousValue < f { + return previousValue, true + } + return f, false +} diff --git a/internal/metrics/ikesa.go b/internal/metrics/ikesa.go new file mode 100644 index 0000000..f3c0589 --- /dev/null +++ b/internal/metrics/ikesa.go @@ -0,0 +1,209 @@ +package metrics + +import ( + "strings" + "strconv" + + "github.com/prometheus/client_golang/prometheus" + "github.com/lunarway/strong-duckling/internal/vici" + "github.com/prometheus/common/log" +) + +const ( + subSystemIKE = "ike_sa" +) + + +type ikeSA struct { + logger log.Logger + helper *helper + + establishedSeconds *prometheus.GaugeVec + packetsIn *prometheus.GaugeVec + packetsOut *prometheus.GaugeVec + lastPacketInSeconds *prometheus.HistogramVec + lastPacketOutSeconds *prometheus.HistogramVec + bytesIn *prometheus.GaugeVec + bytesOut *prometheus.GaugeVec + installs *prometheus.CounterVec + rekeySeconds *prometheus.HistogramVec + lifeTimeSeconds *prometheus.HistogramVec + state *prometheus.GaugeVec + childSAState *prometheus.GaugeVec +} + +type ikeSALabels struct { + name, localPeerIP, remotePeerIP string +} + +func (i ikeSALabels) names() []string { + return []string{"ike_sa_name", "local_peer_ip", "remote_peer_ip"} +} + +func (i ikeSALabels) values() []string { + return []string{i.name, i.localPeerIP, i.remotePeerIP} +} + +type childSALabels struct { + ikeSALabels + localIPRange, remoteIPRange, childSAName string +} + +func (c childSALabels) names() []string { + return append(c.ikeSALabels.names(), "local_ip_range", "remote_ip_range", "child_sa_name") +} + +func (c childSALabels) values() []string { + return append(c.ikeSALabels.values(), c.localIPRange, c.remoteIPRange, c.childSAName) +} + +func newIkeSA(logger log.Logger) *ikeSA { + return &ikeSA{ + logger: logger, + helper: newHelper(logger), + establishedSeconds: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subSystemIKE, + Name: "established_seconds", + Help: "Number of seconds the SA has been established", + }, ikeSALabels{}.names()), + packetsIn: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subSystemIKE, + Name: "packets_in_total", + Help: "Total number of received packets", + }, childSALabels{}.names()), + packetsOut: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subSystemIKE, + Name: "packets_out_total", + Help: "Total number of transmitted packets", + }, childSALabels{}.names()), + lastPacketInSeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subSystemIKE, + Name: "packets_in_silence_duration_seconds", + Help: "Duration of silences between packets in", + Buckets: prometheus.ExponentialBuckets(15, 2, 14), + }, childSALabels{}.names()), + lastPacketOutSeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subSystemIKE, + Name: "packets_out_silence_duration_seconds", + Help: "Duration of silences between packets out", + Buckets: prometheus.ExponentialBuckets(15, 2, 14), + }, childSALabels{}.names()), + bytesIn: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subSystemIKE, + Name: "bytes_in_total", + Help: "Total number of received bytes", + }, childSALabels{}.names()), + bytesOut: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subSystemIKE, + Name: "bytes_out_total", + Help: "Total number of transmitted bytes", + }, childSALabels{}.names()), + installs: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subSystemIKE, + Name: "installs_total", + Help: "Total number of SA installs", + }, childSALabels{}.names()), + rekeySeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subSystemIKE, + Name: "rekey_seconds", + Help: "Duration between re-keying", + Buckets: prometheus.ExponentialBuckets(15, 2, 12), + }, childSALabels{}.names()), + lifeTimeSeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subSystemIKE, + Name: "lifetime_seconds", + Help: "Duration of each IKE session", + Buckets: prometheus.ExponentialBuckets(15, 2, 14), + }, childSALabels{}.names()), + state: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subSystemIKE, + Name: "state_info", + Help: "Current state of the SA", + }, ikeSALabels{}.names()), + childSAState: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subSystemIKE, + Name: "child_state_info", + Help: "Current state of the child SA", + }, childSALabels{}.names()), + } +} + +func (i *ikeSA) getCollectors() []prometheus.Collector { + return []prometheus.Collector { + i.establishedSeconds, + i.packetsIn, + i.packetsOut, + i.lastPacketInSeconds, + i.lastPacketOutSeconds, + i.bytesIn, + i.bytesOut, + i.installs, + i.rekeySeconds, + i.lifeTimeSeconds, + i.state, + i.childSAState, + } +} + +func (p *ikeSA) IKESAStatus(ikeName string, conn vici.IKEConf, sa *vici.IkeSa) { + if sa == nil { + p.logger.Errorf("No SA for connecetion configuration: %#v", conn) + return + } + ikeSALabels := ikeSALabels{ + name: ikeName, + localPeerIP: sa.LocalHost, + remotePeerIP: sa.RemoteHost, + } + p.helper.setGaugeByMax(p.establishedSeconds, sa.EstablishedSeconds, "EstablishedSeconds", ikeSALabels) + p.logger.Infof("prometheusReporter: IKESAStatus: IKE_SA state: %v", sa.State) + for _, child := range sa.ChildSAs { + labels := childSALabels{ + ikeSALabels: ikeSALabels, + childSAName: child.Name, + localIPRange: strings.Join(child.LocalTrafficSelectors, ","), + remoteIPRange: strings.Join(child.RemoteTrafficSelectors, ","), + } + p.logger.Infof("prometheusReporter: IKESAStatus: IKE_SA child state: %v", child.State) + p.helper.setCounterByMax(p.installs, child.InstallTimeSeconds, "InstallTimeSeconds", labels) + p.helper.setGauge(p.packetsIn, child.PacketsIn, "PacketsIn", labels) + p.helper.setGauge(p.packetsOut, child.PacketsOut, "PacketsOut", labels) + p.helper.setGauge(p.bytesIn, child.BytesIn, "BytesIn", labels) + p.helper.setGauge(p.bytesOut, child.BytesOut, "BytesOut", labels) + p.helper.setHistogramByMax(p.lastPacketInSeconds, child.LastPacketInSeconds, "LastPacketInSeconds", labels) + p.helper.setHistogramByMax(p.lastPacketOutSeconds, child.LastPacketOutSeconds, "LastPacketOutSeconds", labels) + p.helper.setHistogramByMin(p.rekeySeconds, child.RekeyTimeSeconds, "RekeyTimeSeconds", labels) + p.helper.setHistogramByMax(p.lifeTimeSeconds, child.LifeTimeSeconds, "LifeTimeSeconds", labels) + p.setRekeySeconds(conn, child, labels) + } +} + +func (p *ikeSA) setRekeySeconds(conn vici.IKEConf, child vici.ChildSA, labels childSALabels) { + // RekeyTimeSeconds on the conn conf is the start value and on the child the + // time left from this value. We want to track how long each rekey session + // was, ie. the ellapsed time from max to when it increases again. This is + // done by finding a min value on the child field and subtracting that from + // the max value on the conf. + minRekeyTimeSeconds, ok := p.helper.minValue("RekeyTimeSeconds", child.RekeyTimeSeconds) + if !ok { + return + } + connRekeyTimeSeconds, err := strconv.ParseFloat(conn.RekeyTimeSeconds, 64) + if err != nil { + p.logger.Errorf("metrics: failed to convert RekeyTimeSeconds '%s' to float64: %v", conn.RekeyTimeSeconds, err) + return + } + p.rekeySeconds.WithLabelValues(labels.values()...).Observe(connRekeyTimeSeconds - minRekeyTimeSeconds) +} diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index d0cf10c..08a0627 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -1,15 +1,14 @@ package metrics import ( - "fmt" "net/http" - "strconv" - "strings" + daemonpkg "github.com/lunarway/strong-duckling/internal/daemon" + "github.com/lunarway/strong-duckling/internal/strongswan" "github.com/lunarway/strong-duckling/internal/tcpchecker" - "github.com/lunarway/strong-duckling/internal/vici" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/prometheus/common/log" ) func Register(serveMux *http.ServeMux) { @@ -21,90 +20,32 @@ func Register(serveMux *http.ServeMux) { } const ( - namespace = "strong_duckling" - subSystemTcpChecker = "tcp_checker" - subSystemIKE = "ike_sa" - subSystemDaemon = "daemon" + namespace = "strong_duckling" ) -type Logger interface { - Infof(string, ...interface{}) - Errorf(string, ...interface{}) -} - type PrometheusReporter struct { registry prometheus.Registerer - logger Logger + logger log.Logger version *prometheus.GaugeVec tcpChecker *tcpChecker - ikeSA ikeSA - Daemon *Daemon + ikeSA *ikeSA + daemon *daemon } func (pr *PrometheusReporter) TcpChecker() tcpchecker.Reporter { return pr.tcpChecker } -type tcpChecker struct { - checks *prometheus.CounterVec - open *prometheus.GaugeVec - connectedTotal *prometheus.CounterVec - disconectedTotal *prometheus.CounterVec - - previousOpenState *bool -} - -type ikeSA struct { - previousValues map[string]float64 - - establishedSeconds *prometheus.GaugeVec - packetsIn *prometheus.GaugeVec - packetsOut *prometheus.GaugeVec - lastPacketInSeconds *prometheus.HistogramVec - lastPacketOutSeconds *prometheus.HistogramVec - bytesIn *prometheus.GaugeVec - bytesOut *prometheus.GaugeVec - installs *prometheus.CounterVec - rekeySeconds *prometheus.HistogramVec - lifeTimeSeconds *prometheus.HistogramVec - state *prometheus.GaugeVec - childSAState *prometheus.GaugeVec -} - -type ikeSALabels struct { - name, localPeerIP, remotePeerIP string -} - -func (i ikeSALabels) names() []string { - return []string{"ike_sa_name", "local_peer_ip", "remote_peer_ip"} -} - -func (i ikeSALabels) values() []string { - return []string{i.name, i.localPeerIP, i.remotePeerIP} -} - -type childSALabels struct { - ikeSALabels - localIPRange, remoteIPRange, childSAName string -} - -func (c childSALabels) names() []string { - return append(c.ikeSALabels.names(), "local_ip_range", "remote_ip_range", "child_sa_name") +func (pr *PrometheusReporter) StrongSwan() strongswan.Reporter { + return pr.ikeSA } -func (c childSALabels) values() []string { - return append(c.ikeSALabels.values(), c.localIPRange, c.remoteIPRange, c.childSAName) +func (pr *PrometheusReporter) Daemon(logger log.Logger, name string) *daemonpkg.Reporter { + return pr.daemon.DefaultDaemonReporter(logger, name) } -type Daemon struct { - Started *prometheus.CounterVec - Stopped *prometheus.CounterVec - Ticked *prometheus.CounterVec - Skipped *prometheus.CounterVec -} - -func NewPrometheusReporter(reg prometheus.Registerer, logger Logger) (*PrometheusReporter, error) { +func NewPrometheusReporter(reg prometheus.Registerer, logger log.Logger) (*PrometheusReporter, error) { r := PrometheusReporter{ registry: reg, logger: logger, @@ -113,162 +54,20 @@ func NewPrometheusReporter(reg prometheus.Registerer, logger Logger) (*Prometheu Name: "info", Help: "Version info of strong_duckling", }, []string{"version"}), - tcpChecker: &tcpChecker{ - checks: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subSystemTcpChecker, - Name: "checked_total", - Help: "Total number of times the connection has been checked", - }, []string{"name", "address", "port", "open"}), - open: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subSystemTcpChecker, - Name: "open_info", - Help: "Is TCP open is 1 otherwise 0", - }, []string{"name", "address", "port"}), - connectedTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subSystemTcpChecker, - Name: "connected_total", - Help: "Total number of times connection to TCP address:port was established", - }, []string{"name", "address", "port"}), - disconectedTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subSystemTcpChecker, - Name: "disconnected_total", - Help: "Total number of times connection to TCP address:port was lost", - }, []string{"name", "address", "port"}), - }, - ikeSA: ikeSA{ - previousValues: make(map[string]float64), - establishedSeconds: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subSystemIKE, - Name: "established_seconds", - Help: "Number of seconds the SA has been established", - }, ikeSALabels{}.names()), - packetsIn: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subSystemIKE, - Name: "packets_in_total", - Help: "Total number of received packets", - }, childSALabels{}.names()), - packetsOut: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subSystemIKE, - Name: "packets_out_total", - Help: "Total number of transmitted packets", - }, childSALabels{}.names()), - lastPacketInSeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: subSystemIKE, - Name: "packets_in_silence_duration_seconds", - Help: "Duration of silences between packets in", - Buckets: prometheus.ExponentialBuckets(15, 2, 14), - }, childSALabels{}.names()), - lastPacketOutSeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: subSystemIKE, - Name: "packets_out_silence_duration_seconds", - Help: "Duration of silences between packets out", - Buckets: prometheus.ExponentialBuckets(15, 2, 14), - }, childSALabels{}.names()), - bytesIn: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subSystemIKE, - Name: "bytes_in_total", - Help: "Total number of received bytes", - }, childSALabels{}.names()), - bytesOut: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subSystemIKE, - Name: "bytes_out_total", - Help: "Total number of transmitted bytes", - }, childSALabels{}.names()), - installs: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subSystemIKE, - Name: "installs_total", - Help: "Total number of SA installs", - }, childSALabels{}.names()), - rekeySeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: subSystemIKE, - Name: "rekey_seconds", - Help: "Duration between re-keying", - Buckets: prometheus.ExponentialBuckets(15, 2, 12), - }, childSALabels{}.names()), - lifeTimeSeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: subSystemIKE, - Name: "lifetime_seconds", - Help: "Duration of each IKE session", - Buckets: prometheus.ExponentialBuckets(15, 2, 14), - }, childSALabels{}.names()), - state: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subSystemIKE, - Name: "state_info", - Help: "Current state of the SA", - }, ikeSALabels{}.names()), - childSAState: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subSystemIKE, - Name: "child_state_info", - Help: "Current state of the child SA", - }, childSALabels{}.names()), - }, - Daemon: &Daemon{ - Started: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subSystemDaemon, - Name: "starts_total", - Help: "Total number of times started", - }, []string{"name", "interval"}), - Stopped: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subSystemDaemon, - Name: "stops_total", - Help: "Total number of times stopped", - }, []string{"name"}), - Skipped: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subSystemDaemon, - Name: "skips_total", - Help: "Total number of times tick was skipped", - }, []string{"name"}), - Ticked: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subSystemDaemon, - Name: "ticks_total", - Help: "Total number of times tick was invoked", - }, []string{"name"}), - }, + tcpChecker: newTcpChecker(), + ikeSA: newIkeSA(logger), + daemon: newDaemon(), } - err := register(r.registry, + collectors := []prometheus.Collector{ r.version, - r.tcpChecker.open, - r.tcpChecker.connectedTotal, - r.tcpChecker.disconectedTotal, - r.tcpChecker.checks, - r.ikeSA.establishedSeconds, - r.ikeSA.packetsIn, - r.ikeSA.packetsOut, - r.ikeSA.lastPacketInSeconds, - r.ikeSA.lastPacketOutSeconds, - r.ikeSA.bytesIn, - r.ikeSA.bytesOut, - r.ikeSA.installs, - r.ikeSA.rekeySeconds, - r.ikeSA.lifeTimeSeconds, - r.ikeSA.state, - r.ikeSA.childSAState, - r.Daemon.Started, - r.Daemon.Stopped, - r.Daemon.Skipped, - r.Daemon.Ticked, - ) + } + + collectors = append(collectors, r.tcpChecker.getCollectors()...) + collectors = append(collectors, r.ikeSA.getCollectors()...) + collectors = append(collectors, r.daemon.getCollectors()...) + + err := register(r.registry, collectors...) if err != nil { return nil, err } @@ -288,152 +87,3 @@ func register(r prometheus.Registerer, collectors ...prometheus.Collector) error func (p *PrometheusReporter) Info(strongDucklingVersion string) { p.version.WithLabelValues(strongDucklingVersion).Set(1) } - -func (r *tcpChecker) ReportPortCheck(report tcpchecker.Report) { - labelValues := []string{report.Name, report.Address, fmt.Sprintf("%d", report.Port)} - if report.Open { - r.checks.WithLabelValues(append(labelValues, "true")...).Inc() - r.open.WithLabelValues(labelValues...).Set(1) - if r.previousOpenState == nil || *r.previousOpenState != report.Open { - r.connectedTotal.WithLabelValues(labelValues...).Add(1) - } - } else { - r.checks.WithLabelValues(append(labelValues, "false")...).Inc() - r.open.WithLabelValues(labelValues...).Set(0) - if r.previousOpenState == nil || *r.previousOpenState != report.Open { - r.disconectedTotal.WithLabelValues(labelValues...).Add(0) - } - } - r.previousOpenState = &report.Open -} - -func (p *PrometheusReporter) IKESAStatus(ikeName string, conn vici.IKEConf, sa *vici.IkeSa) { - if sa == nil { - p.logger.Errorf("No SA for connecetion configuration: %#v", conn) - return - } - ikeSALabels := ikeSALabels{ - name: ikeName, - localPeerIP: sa.LocalHost, - remotePeerIP: sa.RemoteHost, - } - p.setGaugeByMax(p.ikeSA.establishedSeconds, sa.EstablishedSeconds, "EstablishedSeconds", ikeSALabels) - p.logger.Infof("prometheusReporter: IKESAStatus: IKE_SA state: %v", sa.State) - for _, child := range sa.ChildSAs { - labels := childSALabels{ - ikeSALabels: ikeSALabels, - childSAName: child.Name, - localIPRange: strings.Join(child.LocalTrafficSelectors, ","), - remoteIPRange: strings.Join(child.RemoteTrafficSelectors, ","), - } - p.logger.Infof("prometheusReporter: IKESAStatus: IKE_SA child state: %v", child.State) - p.setCounterByMax(p.ikeSA.installs, child.InstallTimeSeconds, "InstallTimeSeconds", labels) - p.setGauge(p.ikeSA.packetsIn, child.PacketsIn, "PacketsIn", labels) - p.setGauge(p.ikeSA.packetsOut, child.PacketsOut, "PacketsOut", labels) - p.setGauge(p.ikeSA.bytesIn, child.BytesIn, "BytesIn", labels) - p.setGauge(p.ikeSA.bytesOut, child.BytesOut, "BytesOut", labels) - p.setHistogramByMax(p.ikeSA.lastPacketInSeconds, child.LastPacketInSeconds, "LastPacketInSeconds", labels) - p.setHistogramByMax(p.ikeSA.lastPacketOutSeconds, child.LastPacketOutSeconds, "LastPacketOutSeconds", labels) - p.setHistogramByMin(p.ikeSA.rekeySeconds, child.RekeyTimeSeconds, "RekeyTimeSeconds", labels) - p.setHistogramByMax(p.ikeSA.lifeTimeSeconds, child.LifeTimeSeconds, "LifeTimeSeconds", labels) - p.setRekeySeconds(conn, child, labels) - } -} - -func (p *PrometheusReporter) setRekeySeconds(conn vici.IKEConf, child vici.ChildSA, labels childSALabels) { - // RekeyTimeSeconds on the conn conf is the start value and on the child the - // time left from this value. We want to track how long each rekey session - // was, ie. the ellapsed time from max to when it increases again. This is - // done by finding a min value on the child field and subtracting that from - // the max value on the conf. - minRekeyTimeSeconds, ok := p.minValue("RekeyTimeSeconds", child.RekeyTimeSeconds) - if !ok { - return - } - connRekeyTimeSeconds, err := strconv.ParseFloat(conn.RekeyTimeSeconds, 64) - if err != nil { - p.logger.Errorf("metrics: failed to convert RekeyTimeSeconds '%s' to float64: %v", conn.RekeyTimeSeconds, err) - return - } - p.ikeSA.rekeySeconds.WithLabelValues(labels.values()...).Observe(connRekeyTimeSeconds - minRekeyTimeSeconds) -} - -func (p *PrometheusReporter) setGauge(g *prometheus.GaugeVec, value, name string, labels childSALabels) { - f, err := strconv.ParseFloat(value, 64) - if err != nil { - p.logger.Errorf("metrics: failed to convert %s '%s' to float64: %v", name, value, err) - return - } - g.WithLabelValues(labels.values()...).Set(f) -} - -func (p *PrometheusReporter) setCounterByMax(c *prometheus.CounterVec, value, name string, labels childSALabels) { - // if this is the first time it is called it should be increased as well - _, ok := p.ikeSA.previousValues[name] - if !ok { - c.WithLabelValues(labels.values()...).Inc() - } - _, ok = p.maxValue(name, value) - if !ok { - return - } - c.WithLabelValues(labels.values()...).Inc() -} - -func (p *PrometheusReporter) setGaugeByMax(g *prometheus.GaugeVec, value, name string, labels ikeSALabels) { - max, ok := p.maxValue(name, value) - if !ok { - return - } - g.WithLabelValues(labels.values()...).Set(max) -} - -func (p *PrometheusReporter) setHistogramByMax(h *prometheus.HistogramVec, value, name string, labels childSALabels) { - max, ok := p.maxValue(name, value) - if !ok { - return - } - h.WithLabelValues(labels.values()...).Observe(max) -} - -func (p *PrometheusReporter) setHistogramByMin(h *prometheus.HistogramVec, value, name string, labels childSALabels) { - min, ok := p.minValue(name, value) - if !ok { - return - } - h.WithLabelValues(labels.values()...).Observe(min) -} - -// maxValue detects the max value of value. If max is detected the returned -// bool is true otherwise it returns the current value. -func (p *PrometheusReporter) maxValue(name, value string) (float64, bool) { - f, err := strconv.ParseFloat(value, 64) - if err != nil { - p.logger.Errorf("metrics: failed to convert %s '%s' to float64: %v", name, value, err) - return 0, false - } - previousValue, ok := p.ikeSA.previousValues[name] - // store the value for future reference when this call finishes - p.ikeSA.previousValues[name] = f - if ok && previousValue > f { - return previousValue, true - } - return f, false -} - -// minValue detects the min value of value. If min is detected the returned -// bool is true otherwise it returns the current value. -func (p *PrometheusReporter) minValue(name, value string) (float64, bool) { - f, err := strconv.ParseFloat(value, 64) - if err != nil { - p.logger.Errorf("metrics: failed to convert %s '%s' to float64: %v", name, value, err) - return 0, false - } - previousValue, ok := p.ikeSA.previousValues[name] - // store the value for future reference when this call finishes - p.ikeSA.previousValues[name] = f - if ok && previousValue < f { - return previousValue, true - } - return f, false -} diff --git a/internal/metrics/metrics_test.go b/internal/metrics/metrics_test.go index 0d0436f..3508de1 100644 --- a/internal/metrics/metrics_test.go +++ b/internal/metrics/metrics_test.go @@ -49,7 +49,7 @@ func TestIKESAStatus_gauges(t *testing.T) { return } - p.IKESAStatus("", tc.conf, tc.sa) + p.StrongSwan().IKESAStatus("", tc.conf, tc.sa) assert.Equal(t, tc.packetsIn, testutil.ToFloat64(p.ikeSA.packetsIn), "packets in not as expected") assert.Equal(t, tc.packetsOut, testutil.ToFloat64(p.ikeSA.packetsOut), "packets out not as expected") @@ -91,7 +91,7 @@ func TestIKESAStatus_installs(t *testing.T) { } for _, s := range tc.installTimeSeconds { - p.IKESAStatus("", vici.IKEConf{}, &vici.IkeSa{ + p.StrongSwan().IKESAStatus("", vici.IKEConf{}, &vici.IkeSa{ ChildSAs: map[string]vici.ChildSA{ "net-0": vici.ChildSA{ InstallTimeSeconds: s, @@ -177,7 +177,7 @@ strong_duckling_ike_sa_rekey_seconds_count{child_sa_name="",ike_sa_name="",local } for _, s := range tc.rekeySeconds { - p.IKESAStatus("", vici.IKEConf{ + p.StrongSwan().IKESAStatus("", vici.IKEConf{ RekeyTimeSeconds: tc.connRekeySeconds, }, &vici.IkeSa{ ChildSAs: map[string]vici.ChildSA{ @@ -249,7 +249,7 @@ func TestPrometheusReporter_maxValue(t *testing.T) { var v float64 var ok bool for _, s := range tc.values { - v, ok = p.maxValue("test", s) + v, ok = p.ikeSA.helper.maxValue("test", s) } assert.Equal(t, tc.ok, ok, "ok indication not as expected") @@ -313,7 +313,7 @@ func TestPrometheusReporter_minValue(t *testing.T) { var v float64 var ok bool for _, s := range tc.values { - v, ok = p.minValue("test", s) + v, ok = p.ikeSA.helper.minValue("test", s) } assert.Equal(t, tc.ok, ok, "ok indication not as expected") @@ -367,7 +367,7 @@ strong_duckling_ike_sa_packets_out_total{child_sa_name="net-1",ike_sa_name="gw-g if !assert.NoError(t, err, "unexpected initialization error") { return } - p.IKESAStatus(tc.ikeName, tc.conf, tc.sa) + p.StrongSwan().IKESAStatus(tc.ikeName, tc.conf, tc.sa) err = testutil.GatherAndCompare(reg, strings.NewReader(tc.output)) assert.NoError(t, err, "registered metrics not as expected") }) diff --git a/internal/metrics/tcpchecker.go b/internal/metrics/tcpchecker.go new file mode 100644 index 0000000..3c965fb --- /dev/null +++ b/internal/metrics/tcpchecker.go @@ -0,0 +1,77 @@ +package metrics + +import ( + "fmt" + + "github.com/lunarway/strong-duckling/internal/tcpchecker" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + subSystemTcpChecker = "tcp_checker" +) + +type tcpChecker struct { + checks *prometheus.CounterVec + open *prometheus.GaugeVec + connectedTotal *prometheus.CounterVec + disconectedTotal *prometheus.CounterVec + + previousOpenState *bool +} + +func newTcpChecker() *tcpChecker { + return &tcpChecker{ + checks: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subSystemTcpChecker, + Name: "checked_total", + Help: "Total number of times the connection has been checked", + }, []string{"name", "address", "port", "open"}), + open: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subSystemTcpChecker, + Name: "open_info", + Help: "Is TCP open is 1 otherwise 0", + }, []string{"name", "address", "port"}), + connectedTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subSystemTcpChecker, + Name: "connected_total", + Help: "Total number of times connection to TCP address:port was established", + }, []string{"name", "address", "port"}), + disconectedTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subSystemTcpChecker, + Name: "disconnected_total", + Help: "Total number of times connection to TCP address:port was lost", + }, []string{"name", "address", "port"}), + } +} + +func (tc *tcpChecker) getCollectors() []prometheus.Collector { + return []prometheus.Collector{ + tc.open, + tc.checks, + tc.connectedTotal, + tc.disconectedTotal, + } +} + +func (r *tcpChecker) ReportPortCheck(report tcpchecker.Report) { + labelValues := []string{report.Name, report.Address, fmt.Sprintf("%d", report.Port)} + if report.Open { + r.checks.WithLabelValues(append(labelValues, "true")...).Inc() + r.open.WithLabelValues(labelValues...).Set(1) + if r.previousOpenState == nil || *r.previousOpenState != report.Open { + r.connectedTotal.WithLabelValues(labelValues...).Add(1) + } + } else { + r.checks.WithLabelValues(append(labelValues, "false")...).Inc() + r.open.WithLabelValues(labelValues...).Set(0) + if r.previousOpenState == nil || *r.previousOpenState != report.Open { + r.disconectedTotal.WithLabelValues(labelValues...).Add(0) + } + } + r.previousOpenState = &report.Open +} diff --git a/main.go b/main.go index eae222d..a0582cd 100644 --- a/main.go +++ b/main.go @@ -62,7 +62,7 @@ func main() { if whoopingAddress != nil && *whoopingAddress != "" { logger := log.With("name", "whooper") whoopDaemon := daemon.New(daemon.Configuration{ - Reporter: defaultDaemonReporter(logger, prometheusReporter, "whopper"), + Reporter: prometheusReporter.Daemon(logger, "whopper"), Interval: 1 * time.Second, Tick: func() { whooper.Whoop(*whoopingAddress, fmt.Sprintf("http://localhost%s", *listenAddress)) @@ -105,7 +105,7 @@ func main() { With("port", port) logger.Infof("Start checking address %s:%v", address, port) tcpCheckerDaemon := daemon.New(daemon.Configuration{ - Reporter: defaultDaemonReporter(logger, prometheusReporter, "tcpchecker"), + Reporter: prometheusReporter.Daemon(logger, "tcpchecker"), Interval: 1 * time.Second, Tick: func() { tcpchecker.Check(name, address, int(port), tcpchecker.CompositeReporter(tcpchecker.LogReporter(logger), prometheusReporter.TcpChecker())) @@ -149,10 +149,10 @@ func main() { client := vici.NewClientConn(conn) defer client.Close() d := daemon.New(daemon.Configuration{ - Reporter: defaultDaemonReporter(log.Base().With("name", "strongswan"), prometheusReporter, "strongswan"), + Reporter: prometheusReporter.Daemon(log.Base().With("name", "strongswan"), "strongswan"), Interval: 2 * time.Second, Tick: func() { - strongswan.Collect(client, prometheusReporter) + strongswan.Collect(client, prometheusReporter.StrongSwan()) }, }) @@ -180,22 +180,3 @@ func main() { log.Info("exited due to a component shutting down") } } - -func defaultDaemonReporter(logger log.Logger, prometheusReporter *metrics.PrometheusReporter, name string) *daemon.Reporter { - return &daemon.Reporter{ - Started: func(d time.Duration) { - logger.With("state", "started").Infof("%s daemon started with interval %v", name, d) - prometheusReporter.Daemon.Started.WithLabelValues(name, d.String()).Inc() - }, - Stopped: func() { - logger.With("state", "stopped").Infof("%s daemon stopped", name) - prometheusReporter.Daemon.Stopped.WithLabelValues(name).Inc() - }, - Skipped: func() { - prometheusReporter.Daemon.Skipped.WithLabelValues(name).Inc() - }, - Ticked: func() { - prometheusReporter.Daemon.Ticked.WithLabelValues(name).Inc() - }, - } -}