From aa651a5f496bb988e67ea4725ed7dc5890430706 Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Wed, 23 Aug 2023 13:50:46 +0200 Subject: [PATCH 1/6] PMM-12384 Add backup artifact metrics --- dev/mongo-rs-backups/docker-compose.yml | 1 + managed/cmd/pmm-managed/main.go | 3 + managed/data/iatemplates/backup_error.yml | 12 +++ managed/services/backup/metrics.go | 97 +++++++++++++++++++++++ managed/services/preconditions.go | 2 +- 5 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 managed/data/iatemplates/backup_error.yml create mode 100644 managed/services/backup/metrics.go diff --git a/dev/mongo-rs-backups/docker-compose.yml b/dev/mongo-rs-backups/docker-compose.yml index 458674b664..001be6355c 100644 --- a/dev/mongo-rs-backups/docker-compose.yml +++ b/dev/mongo-rs-backups/docker-compose.yml @@ -1,6 +1,7 @@ networks: pmm_default: name: pmm_default + external: true services: mongo1: diff --git a/managed/cmd/pmm-managed/main.go b/managed/cmd/pmm-managed/main.go index 64faf965b1..4dfd252e3d 100644 --- a/managed/cmd/pmm-managed/main.go +++ b/managed/cmd/pmm-managed/main.go @@ -927,6 +927,9 @@ func main() { //nolint:cyclop,maintidx dbaasClient := dbaas.NewClient(*dbaasControllerAPIAddrF) compatibilityService := backup.NewCompatibilityService(db, versioner) backupService := backup.NewService(db, jobsService, agentService, compatibilityService, pbmPITRService) + backupMetricsCollector := backup.NewMetricsCollector(db) + prom.MustRegister(backupMetricsCollector) + schedulerService := scheduler.New(db, backupService) versionCache := versioncache.New(db, versioner) emailer := alertmanager.NewEmailer(logrus.WithField("component", "alertmanager-emailer").Logger) diff --git a/managed/data/iatemplates/backup_error.yml b/managed/data/iatemplates/backup_error.yml new file mode 100644 index 0000000000..9099fab036 --- /dev/null +++ b/managed/data/iatemplates/backup_error.yml @@ -0,0 +1,12 @@ +--- +templates: + - name: pmm_backup_error + version: 1 + summary: Backup failed + expr: 'pmm_managed_backups_artifacts{status="error"} == bool 1' + for: 1m + severity: error + annotations: + description: |- + Failed to create backup artifact '{{ $labels.artifact_name}}' on service '{{ $labels.service_id }}'. + summary: Failed to create backup artifact '{{ $labels.artifact_name}}' on service '{{ $labels.service_id }}'. diff --git a/managed/services/backup/metrics.go b/managed/services/backup/metrics.go new file mode 100644 index 0000000000..a67bf07bb2 --- /dev/null +++ b/managed/services/backup/metrics.go @@ -0,0 +1,97 @@ +package backup + +import ( + "context" + "time" + + "github.com/pkg/errors" + prom "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + "gopkg.in/reform.v1" + + "github.com/percona/pmm/managed/models" +) + +const ( + requestTimeout = 3 * time.Second + artifactExists float64 = 1 + prometheusNamespace = "pmm_managed" + prometheusSubsystem = "backups" +) + +type MetricsCollector struct { + db *reform.DB + l *logrus.Entry + + mArtifactsDesc *prom.Desc +} + +func NewMetricsCollector(db *reform.DB) *MetricsCollector { + return &MetricsCollector{ + db: db, + l: logrus.WithField("component", "backups/metrics"), + mArtifactsDesc: prom.NewDesc( + prom.BuildFQName(prometheusNamespace, prometheusSubsystem, "artifacts"), + "Artifacts", + []string{ + "artifact_id", "artifact_name", "artifact_vendor", "service_id", "service_name", + "type", "db_version", "data_model", "mode", "status", + }, + nil), + } +} + +func (c *MetricsCollector) Describe(ch chan<- *prom.Desc) { + ch <- c.mArtifactsDesc +} + +func (c *MetricsCollector) Collect(ch chan<- prom.Metric) { + ctx, cancelCtx := context.WithTimeout(context.Background(), requestTimeout) + defer cancelCtx() + + var artifacts []*models.Artifact + var services map[string]*models.Service + errTx := c.db.InTransactionContext(ctx, nil, func(t *reform.TX) error { + var err error + artifacts, err = models.FindArtifacts(t.Querier, models.ArtifactFilters{}) + if err != nil { + return errors.Wrapf(err, "failed to find artifacts") + } + + serviceIDs := make([]string, len(artifacts)) + for _, artifact := range artifacts { + serviceIDs = append(serviceIDs, artifact.ServiceID) + } + + services, err = models.FindServicesByIDs(t.Querier, serviceIDs) + if err != nil { + return errors.Wrapf(err, "failed to find services") + } + return nil + }) + if errTx != nil { + c.l.Warnf("Failed to get artifacts") + } + + for _, artifact := range artifacts { + var serviceName string + if service, ok := services[artifact.ServiceID]; ok { + serviceName = service.ServiceName + } + + ch <- prom.MustNewConstMetric( + c.mArtifactsDesc, + prom.GaugeValue, + artifactExists, + artifact.ID, + artifact.Name, + artifact.Vendor, + artifact.ServiceID, + serviceName, + string(artifact.Type), + artifact.DBVersion, + string(artifact.DataModel), + string(artifact.Mode), + string(artifact.Status)) + } +} diff --git a/managed/services/preconditions.go b/managed/services/preconditions.go index 41e06cf179..8eef5d1a51 100644 --- a/managed/services/preconditions.go +++ b/managed/services/preconditions.go @@ -117,7 +117,7 @@ func CheckArtifactOverlapping(q *reform.Querier, serviceID, locationID, folder s } for _, artifact := range artifacts { - if artifact.ServiceID != serviceID { + if artifact.ServiceID != serviceID { // TODO artifact.ServiceID is empty when service already removed. svc, err := models.FindServiceByID(q, artifact.ServiceID) if err != nil { return err From db294edda9a249e0b78a483ba3ce4ff802df25bc Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Wed, 23 Aug 2023 14:05:38 +0200 Subject: [PATCH 2/6] PMM-12384 Fixes --- managed/services/backup/metrics.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/managed/services/backup/metrics.go b/managed/services/backup/metrics.go index a67bf07bb2..384c5706d5 100644 --- a/managed/services/backup/metrics.go +++ b/managed/services/backup/metrics.go @@ -1,3 +1,18 @@ +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package backup import ( @@ -70,7 +85,8 @@ func (c *MetricsCollector) Collect(ch chan<- prom.Metric) { return nil }) if errTx != nil { - c.l.Warnf("Failed to get artifacts") + c.l.Warnf("Failed to get artifacts: %v", errTx) + return } for _, artifact := range artifacts { From 6a124475b16543ee68f90fdf039389e03be7aeff Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Wed, 23 Aug 2023 15:37:06 +0200 Subject: [PATCH 3/6] PMM_12384 Fix --- managed/services/backup/metrics.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/managed/services/backup/metrics.go b/managed/services/backup/metrics.go index 384c5706d5..ad9dc4e106 100644 --- a/managed/services/backup/metrics.go +++ b/managed/services/backup/metrics.go @@ -73,7 +73,7 @@ func (c *MetricsCollector) Collect(ch chan<- prom.Metric) { return errors.Wrapf(err, "failed to find artifacts") } - serviceIDs := make([]string, len(artifacts)) + serviceIDs := make([]string, 0, len(artifacts)) for _, artifact := range artifacts { serviceIDs = append(serviceIDs, artifact.ServiceID) } From ca2a46814ee1c2e1ce54256a9ee5b3f25ec3740c Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Fri, 25 Aug 2023 11:42:47 +0200 Subject: [PATCH 4/6] PMM-12384 Remove TODO --- managed/services/preconditions.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/managed/services/preconditions.go b/managed/services/preconditions.go index 8eef5d1a51..41e06cf179 100644 --- a/managed/services/preconditions.go +++ b/managed/services/preconditions.go @@ -117,7 +117,7 @@ func CheckArtifactOverlapping(q *reform.Querier, serviceID, locationID, folder s } for _, artifact := range artifacts { - if artifact.ServiceID != serviceID { // TODO artifact.ServiceID is empty when service already removed. + if artifact.ServiceID != serviceID { svc, err := models.FindServiceByID(q, artifact.ServiceID) if err != nil { return err From ecfd96c0a760599ed8e5f0bb109efd480a3c3f70 Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Fri, 25 Aug 2023 11:44:45 +0200 Subject: [PATCH 5/6] PMM-12384 Refactoring --- managed/data/iatemplates/backup_error.yml | 4 ++-- managed/services/backup/metrics.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/managed/data/iatemplates/backup_error.yml b/managed/data/iatemplates/backup_error.yml index 9099fab036..b8c4e98635 100644 --- a/managed/data/iatemplates/backup_error.yml +++ b/managed/data/iatemplates/backup_error.yml @@ -8,5 +8,5 @@ templates: severity: error annotations: description: |- - Failed to create backup artifact '{{ $labels.artifact_name}}' on service '{{ $labels.service_id }}'. - summary: Failed to create backup artifact '{{ $labels.artifact_name}}' on service '{{ $labels.service_id }}'. + Failed to create a backup artifact '{{ $labels.artifact_name}}' on service '{{ $labels.service_id }}'. + summary: Failed to create a backup artifact '{{ $labels.artifact_name}}' on service '{{ $labels.service_id }}'. diff --git a/managed/services/backup/metrics.go b/managed/services/backup/metrics.go index ad9dc4e106..b97148a958 100644 --- a/managed/services/backup/metrics.go +++ b/managed/services/backup/metrics.go @@ -70,7 +70,7 @@ func (c *MetricsCollector) Collect(ch chan<- prom.Metric) { var err error artifacts, err = models.FindArtifacts(t.Querier, models.ArtifactFilters{}) if err != nil { - return errors.Wrapf(err, "failed to find artifacts") + return errors.WithStack(err) } serviceIDs := make([]string, 0, len(artifacts)) @@ -80,7 +80,7 @@ func (c *MetricsCollector) Collect(ch chan<- prom.Metric) { services, err = models.FindServicesByIDs(t.Querier, serviceIDs) if err != nil { - return errors.Wrapf(err, "failed to find services") + return errors.WithStack(err) } return nil }) From 43928ba7c573d7d76de28b06a02752c296f2de8b Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Tue, 29 Aug 2023 15:45:03 +0200 Subject: [PATCH 6/6] Update managed/data/iatemplates/backup_error.yml --- managed/data/iatemplates/backup_error.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/managed/data/iatemplates/backup_error.yml b/managed/data/iatemplates/backup_error.yml index b8c4e98635..592c109c9c 100644 --- a/managed/data/iatemplates/backup_error.yml +++ b/managed/data/iatemplates/backup_error.yml @@ -2,7 +2,7 @@ templates: - name: pmm_backup_error version: 1 - summary: Backup failed + summary: Backup failed (Tech preview) expr: 'pmm_managed_backups_artifacts{status="error"} == bool 1' for: 1m severity: error