Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PMM-12384 Backup artifact metrics #2428

Merged
merged 9 commits into from
Sep 7, 2023
1 change: 1 addition & 0 deletions dev/mongo-rs-backups/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
networks:
pmm_default:
name: pmm_default
external: true

services:
mongo1:
Expand Down
3 changes: 3 additions & 0 deletions managed/cmd/pmm-managed/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -927,6 +927,9 @@ func main() { //nolint:cyclop,maintidx
dbaasClient := dbaas.NewClient(*dbaasControllerAPIAddrF)
compatibilityService := backup.NewCompatibilityService(db, versioner)
backupService := backup.NewService(db, jobsService, agentService, compatibilityService, pbmPITRService)
backupMetricsCollector := backup.NewMetricsCollector(db)
prom.MustRegister(backupMetricsCollector)

schedulerService := scheduler.New(db, backupService)
versionCache := versioncache.New(db, versioner)
emailer := alertmanager.NewEmailer(logrus.WithField("component", "alertmanager-emailer").Logger)
Expand Down
12 changes: 12 additions & 0 deletions managed/data/iatemplates/backup_error.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
templates:
- name: pmm_backup_error
version: 1
summary: Backup failed
artemgavrilov marked this conversation as resolved.
Show resolved Hide resolved
expr: 'pmm_managed_backups_artifacts{status="error"} == bool 1'
for: 1m
severity: error
annotations:
description: |-
Failed to create backup artifact '{{ $labels.artifact_name}}' on service '{{ $labels.service_id }}'.
artemgavrilov marked this conversation as resolved.
Show resolved Hide resolved
summary: Failed to create backup artifact '{{ $labels.artifact_name}}' on service '{{ $labels.service_id }}'.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: Find better summary and description.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rnovikovP @catalinaadam any suggestions?

artemgavrilov marked this conversation as resolved.
Show resolved Hide resolved
113 changes: 113 additions & 0 deletions managed/services/backup/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Copyright (C) 2017 Percona LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.

package backup

import (
"context"
"time"

"github.com/pkg/errors"
prom "github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"gopkg.in/reform.v1"

"github.com/percona/pmm/managed/models"
)

const (
requestTimeout = 3 * time.Second
artifactExists float64 = 1
prometheusNamespace = "pmm_managed"
prometheusSubsystem = "backups"
)

type MetricsCollector struct {
db *reform.DB
l *logrus.Entry

mArtifactsDesc *prom.Desc
}

func NewMetricsCollector(db *reform.DB) *MetricsCollector {
return &MetricsCollector{
db: db,
l: logrus.WithField("component", "backups/metrics"),
mArtifactsDesc: prom.NewDesc(
prom.BuildFQName(prometheusNamespace, prometheusSubsystem, "artifacts"),
"Artifacts",
[]string{
"artifact_id", "artifact_name", "artifact_vendor", "service_id", "service_name",
"type", "db_version", "data_model", "mode", "status",
},
nil),
}
}

func (c *MetricsCollector) Describe(ch chan<- *prom.Desc) {
ch <- c.mArtifactsDesc
}

func (c *MetricsCollector) Collect(ch chan<- prom.Metric) {
ctx, cancelCtx := context.WithTimeout(context.Background(), requestTimeout)
defer cancelCtx()

var artifacts []*models.Artifact
var services map[string]*models.Service
errTx := c.db.InTransactionContext(ctx, nil, func(t *reform.TX) error {
var err error
artifacts, err = models.FindArtifacts(t.Querier, models.ArtifactFilters{})
if err != nil {
return errors.Wrapf(err, "failed to find artifacts")
artemgavrilov marked this conversation as resolved.
Show resolved Hide resolved
}

serviceIDs := make([]string, len(artifacts))
for _, artifact := range artifacts {
serviceIDs = append(serviceIDs, artifact.ServiceID)
artemgavrilov marked this conversation as resolved.
Show resolved Hide resolved
}

services, err = models.FindServicesByIDs(t.Querier, serviceIDs)
if err != nil {
return errors.Wrapf(err, "failed to find services")
artemgavrilov marked this conversation as resolved.
Show resolved Hide resolved
}
return nil
})
if errTx != nil {
c.l.Warnf("Failed to get artifacts: %v", errTx)
return
}

for _, artifact := range artifacts {
var serviceName string
if service, ok := services[artifact.ServiceID]; ok {
serviceName = service.ServiceName
}

ch <- prom.MustNewConstMetric(
c.mArtifactsDesc,
prom.GaugeValue,
artifactExists,
artifact.ID,
artifact.Name,
artifact.Vendor,
artifact.ServiceID,
serviceName,
string(artifact.Type),
artifact.DBVersion,
string(artifact.DataModel),
string(artifact.Mode),
string(artifact.Status))
}
}
2 changes: 1 addition & 1 deletion managed/services/preconditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ func CheckArtifactOverlapping(q *reform.Querier, serviceID, locationID, folder s
}

for _, artifact := range artifacts {
if artifact.ServiceID != serviceID {
if artifact.ServiceID != serviceID { // TODO artifact.ServiceID is empty when service already removed.
artemgavrilov marked this conversation as resolved.
Show resolved Hide resolved
svc, err := models.FindServiceByID(q, artifact.ServiceID)
if err != nil {
return err
Expand Down