diff --git a/quickwit/quickwit-control-plane/src/control_plane.rs b/quickwit/quickwit-control-plane/src/control_plane.rs index 63e7dd44cb0..90a19b7f46b 100644 --- a/quickwit/quickwit-control-plane/src/control_plane.rs +++ b/quickwit/quickwit-control-plane/src/control_plane.rs @@ -190,11 +190,17 @@ fn convert_metastore_error( // It will be up to the client to decide what to do there. error!(err=?metastore_error, transaction_outcome="aborted", "metastore error"); } + crate::metrics::CONTROL_PLANE_METRICS + .metastore_error_aborted + .inc(); Ok(Err(ControlPlaneError::Metastore(metastore_error))) } else { // If the metastore transaction may have been executed, we need to restart the control plane // so that it gets resynced with the metastore state. error!(err=?metastore_error, transaction_outcome="maybe-executed", "metastore error"); + crate::metrics::CONTROL_PLANE_METRICS + .metastore_error_maybe_executed + .inc(); Err(ActorExitStatus::from(anyhow::anyhow!(metastore_error))) } } diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs index a5e99fc7804..5832c4a624c 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs @@ -168,11 +168,7 @@ fn get_sources_to_schedule(model: &ControlPlaneModel) -> Vec { } impl IndexingScheduler { - pub fn new( - cluster_id: String, - self_node_id: NodeId, - indexer_pool: IndexerPool, - ) -> Self { + pub fn new(cluster_id: String, self_node_id: NodeId, indexer_pool: IndexerPool) -> Self { IndexingScheduler { cluster_id, self_node_id, diff --git a/quickwit/quickwit-control-plane/src/metrics.rs b/quickwit/quickwit-control-plane/src/metrics.rs index 33179d517ff..76049380632 100644 --- a/quickwit/quickwit-control-plane/src/metrics.rs +++ b/quickwit/quickwit-control-plane/src/metrics.rs @@ -1,9 +1,30 @@ +// Copyright (C) 2023 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + use once_cell::sync::Lazy; -use quickwit_common::metrics::{IntCounter, new_counter}; +use quickwit_common::metrics::{new_counter, IntCounter}; pub struct ControlPlaneMetrics { pub restart_total: IntCounter, pub schedule_total: IntCounter, + pub metastore_error_aborted: IntCounter, + pub metastore_error_maybe_executed: IntCounter, } impl Default for ControlPlaneMetrics { @@ -19,8 +40,21 @@ impl Default for ControlPlaneMetrics { "Number of control plane `schedule` operation.", "quickwit_control_plane", ), + metastore_error_aborted: new_counter( + "metastore_error_aborted", + "Number of aborted metastore transaction (= do not trigger a control plane \ + restart)", + "quickwit_control_plane", + ), + metastore_error_maybe_executed: new_counter( + "metastore_error_maybe_executed", + "Number of metastore transaction with an uncertain outcome (= do trigger a \ + control plane restart)", + "quickwit_control_plane", + ), } } } -pub static CONTROL_PLANE_METRICS: Lazy = Lazy::new(ControlPlaneMetrics::default); +pub static CONTROL_PLANE_METRICS: Lazy = + Lazy::new(ControlPlaneMetrics::default);