From 43c12b4f2c91424c01f95883e638fdee98aa4817 Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Mon, 16 Dec 2024 22:01:40 +0800 Subject: [PATCH] fix: correct `set_region_role_state_gracefully` behaviors (#5171) * fix: reduce default max rows for fuzz testing * chore: remove Postgres setup from fuzz test workflow * chore(fuzz): increase resource limits for GreptimeDB cluster * chore(fuzz): increase resource limits for kafka * fix: correct `set_region_role_state_gracefully` behaviors * chore: remove Postgres setup from fuzz test workflow * chore(fuzz): redue resource limits for GreptimeDB & kafka --- .github/actions/setup-kafka-cluster/action.yml | 2 ++ .github/workflows/develop.yml | 4 ---- src/metric-engine/src/engine.rs | 8 +++++++- src/metric-engine/src/engine/catchup.rs | 3 +++ src/mito2/src/worker/handle_catchup.rs | 3 ++- tests-fuzz/src/utils.rs | 2 +- 6 files changed, 15 insertions(+), 7 deletions(-) diff --git a/.github/actions/setup-kafka-cluster/action.yml b/.github/actions/setup-kafka-cluster/action.yml index b8a73394235a..22b438995740 100644 --- a/.github/actions/setup-kafka-cluster/action.yml +++ b/.github/actions/setup-kafka-cluster/action.yml @@ -18,6 +18,8 @@ runs: --set controller.replicaCount=${{ inputs.controller-replicas }} \ --set controller.resources.requests.cpu=50m \ --set controller.resources.requests.memory=128Mi \ + --set controller.resources.limits.cpu=2000m \ + --set controller.resources.limits.memory=2Gi \ --set listeners.controller.protocol=PLAINTEXT \ --set listeners.client.protocol=PLAINTEXT \ --create-namespace \ diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 6eccbe65b811..8939453f9dd9 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -323,8 +323,6 @@ jobs: uses: ./.github/actions/setup-kafka-cluster - name: Setup Etcd cluser uses: ./.github/actions/setup-etcd-cluster - - name: Setup Postgres cluser - uses: ./.github/actions/setup-postgres-cluster # Prepares for fuzz tests - uses: arduino/setup-protoc@v3 with: @@ -474,8 +472,6 @@ jobs: uses: ./.github/actions/setup-kafka-cluster - name: Setup Etcd cluser uses: ./.github/actions/setup-etcd-cluster - - name: Setup Postgres cluser - uses: ./.github/actions/setup-postgres-cluster # Prepares for fuzz tests - uses: arduino/setup-protoc@v3 with: diff --git a/src/metric-engine/src/engine.rs b/src/metric-engine/src/engine.rs index 86b64ddfae2a..15b94701139b 100644 --- a/src/metric-engine/src/engine.rs +++ b/src/metric-engine/src/engine.rs @@ -210,7 +210,6 @@ impl RegionEngine for MetricEngine { for x in [ utils::to_metadata_region_id(region_id), utils::to_data_region_id(region_id), - region_id, ] { if let Err(e) = self.inner.mito.set_region_role(x, role) && e.status_code() != StatusCode::RegionNotFound @@ -226,6 +225,13 @@ impl RegionEngine for MetricEngine { region_id: RegionId, region_role_state: SettableRegionRoleState, ) -> std::result::Result { + self.inner + .mito + .set_region_role_state_gracefully( + utils::to_metadata_region_id(region_id), + region_role_state, + ) + .await?; self.inner .mito .set_region_role_state_gracefully(region_id, region_role_state) diff --git a/src/metric-engine/src/engine/catchup.rs b/src/metric-engine/src/engine/catchup.rs index 4b1268c049b5..783e1f009c0a 100644 --- a/src/metric-engine/src/engine/catchup.rs +++ b/src/metric-engine/src/engine/catchup.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_telemetry::debug; use snafu::ResultExt; use store_api::region_engine::RegionEngine; use store_api::region_request::{AffectedRows, RegionCatchupRequest, RegionRequest}; @@ -35,6 +36,7 @@ impl MetricEngineInner { } let metadata_region_id = utils::to_metadata_region_id(region_id); // TODO(weny): improve the catchup, we can read the wal entries only once. + debug!("Catchup metadata region {metadata_region_id}"); self.mito .handle_request( metadata_region_id, @@ -48,6 +50,7 @@ impl MetricEngineInner { .context(MitoCatchupOperationSnafu)?; let data_region_id = utils::to_data_region_id(region_id); + debug!("Catchup data region {data_region_id}"); self.mito .handle_request( data_region_id, diff --git a/src/mito2/src/worker/handle_catchup.rs b/src/mito2/src/worker/handle_catchup.rs index f0fd6b05503c..8992621dd724 100644 --- a/src/mito2/src/worker/handle_catchup.rs +++ b/src/mito2/src/worker/handle_catchup.rs @@ -16,8 +16,8 @@ use std::sync::Arc; -use common_telemetry::info; use common_telemetry::tracing::warn; +use common_telemetry::{debug, info}; use snafu::ensure; use store_api::logstore::LogStore; use store_api::region_engine::RegionRole; @@ -40,6 +40,7 @@ impl RegionWorkerLoop { }; if region.is_writable() { + debug!("Region {region_id} is writable, skip catchup"); return Ok(0); } // Note: Currently, We protect the split brain by ensuring the mutable table is empty. diff --git a/tests-fuzz/src/utils.rs b/tests-fuzz/src/utils.rs index 743347978924..84222f6d5a58 100644 --- a/tests-fuzz/src/utils.rs +++ b/tests-fuzz/src/utils.rs @@ -142,7 +142,7 @@ macro_rules! make_get_from_env_helper { make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ALTER_ACTIONS, 256); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_INSERT_ACTIONS, 8); -make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ROWS, 2048); +make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ROWS, 512); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_TABLES, 64); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_COLUMNS, 32);