From 957d0b4ede7f43b888804dd1065d41f375e4d86d Mon Sep 17 00:00:00 2001 From: WenyXu Date: Sun, 15 Dec 2024 12:48:07 +0000 Subject: [PATCH 1/7] fix: reduce default max rows for fuzz testing --- tests-fuzz/src/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests-fuzz/src/utils.rs b/tests-fuzz/src/utils.rs index 743347978924..84222f6d5a58 100644 --- a/tests-fuzz/src/utils.rs +++ b/tests-fuzz/src/utils.rs @@ -142,7 +142,7 @@ macro_rules! make_get_from_env_helper { make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ALTER_ACTIONS, 256); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_INSERT_ACTIONS, 8); -make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ROWS, 2048); +make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ROWS, 512); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_TABLES, 64); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_COLUMNS, 32); From 298b6570db1036bbcdd9915a3c2ff7df9bb482bf Mon Sep 17 00:00:00 2001 From: WenyXu Date: Sun, 15 Dec 2024 12:54:52 +0000 Subject: [PATCH 2/7] chore: remove Postgres setup from fuzz test workflow --- .github/workflows/develop.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 6eccbe65b811..dbdfda4c3012 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -323,8 +323,6 @@ jobs: uses: ./.github/actions/setup-kafka-cluster - name: Setup Etcd cluser uses: ./.github/actions/setup-etcd-cluster - - name: Setup Postgres cluser - uses: ./.github/actions/setup-postgres-cluster # Prepares for fuzz tests - uses: arduino/setup-protoc@v3 with: From 88af8728f43aa304a42aeae7e320014d0225b049 Mon Sep 17 00:00:00 2001 From: WenyXu Date: Mon, 16 Dec 2024 03:03:45 +0000 Subject: [PATCH 3/7] chore(fuzz): increase resource limits for GreptimeDB cluster --- .github/actions/setup-greptimedb-cluster/action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/setup-greptimedb-cluster/action.yml b/.github/actions/setup-greptimedb-cluster/action.yml index 7c385c43a9a9..034a18fbc5a1 100644 --- a/.github/actions/setup-greptimedb-cluster/action.yml +++ b/.github/actions/setup-greptimedb-cluster/action.yml @@ -58,8 +58,8 @@ runs: --set image.tag=${{ inputs.image-tag }} \ --set base.podTemplate.main.resources.requests.cpu=50m \ --set base.podTemplate.main.resources.requests.memory=256Mi \ - --set base.podTemplate.main.resources.limits.cpu=2000m \ - --set base.podTemplate.main.resources.limits.memory=2Gi \ + --set base.podTemplate.main.resources.limits.cpu=4000m \ + --set base.podTemplate.main.resources.limits.memory=4Gi \ --set frontend.replicas=${{ inputs.frontend-replicas }} \ --set datanode.replicas=${{ inputs.datanode-replicas }} \ --set meta.replicas=${{ inputs.meta-replicas }} \ From 0e4296379f5c1ae54173d8895fe166adfe0069be Mon Sep 17 00:00:00 2001 From: WenyXu Date: Mon, 16 Dec 2024 03:14:01 +0000 Subject: [PATCH 4/7] chore(fuzz): increase resource limits for kafka --- .github/actions/setup-kafka-cluster/action.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/actions/setup-kafka-cluster/action.yml b/.github/actions/setup-kafka-cluster/action.yml index b8a73394235a..71deeabc88ea 100644 --- a/.github/actions/setup-kafka-cluster/action.yml +++ b/.github/actions/setup-kafka-cluster/action.yml @@ -18,6 +18,8 @@ runs: --set controller.replicaCount=${{ inputs.controller-replicas }} \ --set controller.resources.requests.cpu=50m \ --set controller.resources.requests.memory=128Mi \ + --set controller.resources.limits.cpu=4000m \ + --set controller.resources.limits.memory=4Gi \ --set listeners.controller.protocol=PLAINTEXT \ --set listeners.client.protocol=PLAINTEXT \ --create-namespace \ From d30b9c421206cdabd7b15ba9f02d2700a81bc57f Mon Sep 17 00:00:00 2001 From: WenyXu Date: Mon, 16 Dec 2024 11:57:27 +0000 Subject: [PATCH 5/7] fix: correct `set_region_role_state_gracefully` behaviors --- src/metric-engine/src/engine.rs | 8 +++++++- src/metric-engine/src/engine/catchup.rs | 3 +++ src/mito2/src/worker/handle_catchup.rs | 3 ++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/metric-engine/src/engine.rs b/src/metric-engine/src/engine.rs index 86b64ddfae2a..15b94701139b 100644 --- a/src/metric-engine/src/engine.rs +++ b/src/metric-engine/src/engine.rs @@ -210,7 +210,6 @@ impl RegionEngine for MetricEngine { for x in [ utils::to_metadata_region_id(region_id), utils::to_data_region_id(region_id), - region_id, ] { if let Err(e) = self.inner.mito.set_region_role(x, role) && e.status_code() != StatusCode::RegionNotFound @@ -226,6 +225,13 @@ impl RegionEngine for MetricEngine { region_id: RegionId, region_role_state: SettableRegionRoleState, ) -> std::result::Result { + self.inner + .mito + .set_region_role_state_gracefully( + utils::to_metadata_region_id(region_id), + region_role_state, + ) + .await?; self.inner .mito .set_region_role_state_gracefully(region_id, region_role_state) diff --git a/src/metric-engine/src/engine/catchup.rs b/src/metric-engine/src/engine/catchup.rs index 4b1268c049b5..783e1f009c0a 100644 --- a/src/metric-engine/src/engine/catchup.rs +++ b/src/metric-engine/src/engine/catchup.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_telemetry::debug; use snafu::ResultExt; use store_api::region_engine::RegionEngine; use store_api::region_request::{AffectedRows, RegionCatchupRequest, RegionRequest}; @@ -35,6 +36,7 @@ impl MetricEngineInner { } let metadata_region_id = utils::to_metadata_region_id(region_id); // TODO(weny): improve the catchup, we can read the wal entries only once. + debug!("Catchup metadata region {metadata_region_id}"); self.mito .handle_request( metadata_region_id, @@ -48,6 +50,7 @@ impl MetricEngineInner { .context(MitoCatchupOperationSnafu)?; let data_region_id = utils::to_data_region_id(region_id); + debug!("Catchup data region {data_region_id}"); self.mito .handle_request( data_region_id, diff --git a/src/mito2/src/worker/handle_catchup.rs b/src/mito2/src/worker/handle_catchup.rs index f0fd6b05503c..8992621dd724 100644 --- a/src/mito2/src/worker/handle_catchup.rs +++ b/src/mito2/src/worker/handle_catchup.rs @@ -16,8 +16,8 @@ use std::sync::Arc; -use common_telemetry::info; use common_telemetry::tracing::warn; +use common_telemetry::{debug, info}; use snafu::ensure; use store_api::logstore::LogStore; use store_api::region_engine::RegionRole; @@ -40,6 +40,7 @@ impl RegionWorkerLoop { }; if region.is_writable() { + debug!("Region {region_id} is writable, skip catchup"); return Ok(0); } // Note: Currently, We protect the split brain by ensuring the mutable table is empty. From 3d75f746620dc3113390166409e12941f840b5b6 Mon Sep 17 00:00:00 2001 From: WenyXu Date: Mon, 16 Dec 2024 12:01:15 +0000 Subject: [PATCH 6/7] chore: remove Postgres setup from fuzz test workflow --- .github/workflows/develop.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index dbdfda4c3012..8939453f9dd9 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -472,8 +472,6 @@ jobs: uses: ./.github/actions/setup-kafka-cluster - name: Setup Etcd cluser uses: ./.github/actions/setup-etcd-cluster - - name: Setup Postgres cluser - uses: ./.github/actions/setup-postgres-cluster # Prepares for fuzz tests - uses: arduino/setup-protoc@v3 with: From a899aa084f8dff999de41791f5afc1b1e20e2bf9 Mon Sep 17 00:00:00 2001 From: WenyXu Date: Mon, 16 Dec 2024 12:18:10 +0000 Subject: [PATCH 7/7] chore(fuzz): redue resource limits for GreptimeDB & kafka --- .github/actions/setup-greptimedb-cluster/action.yml | 4 ++-- .github/actions/setup-kafka-cluster/action.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/actions/setup-greptimedb-cluster/action.yml b/.github/actions/setup-greptimedb-cluster/action.yml index 034a18fbc5a1..7c385c43a9a9 100644 --- a/.github/actions/setup-greptimedb-cluster/action.yml +++ b/.github/actions/setup-greptimedb-cluster/action.yml @@ -58,8 +58,8 @@ runs: --set image.tag=${{ inputs.image-tag }} \ --set base.podTemplate.main.resources.requests.cpu=50m \ --set base.podTemplate.main.resources.requests.memory=256Mi \ - --set base.podTemplate.main.resources.limits.cpu=4000m \ - --set base.podTemplate.main.resources.limits.memory=4Gi \ + --set base.podTemplate.main.resources.limits.cpu=2000m \ + --set base.podTemplate.main.resources.limits.memory=2Gi \ --set frontend.replicas=${{ inputs.frontend-replicas }} \ --set datanode.replicas=${{ inputs.datanode-replicas }} \ --set meta.replicas=${{ inputs.meta-replicas }} \ diff --git a/.github/actions/setup-kafka-cluster/action.yml b/.github/actions/setup-kafka-cluster/action.yml index 71deeabc88ea..22b438995740 100644 --- a/.github/actions/setup-kafka-cluster/action.yml +++ b/.github/actions/setup-kafka-cluster/action.yml @@ -18,8 +18,8 @@ runs: --set controller.replicaCount=${{ inputs.controller-replicas }} \ --set controller.resources.requests.cpu=50m \ --set controller.resources.requests.memory=128Mi \ - --set controller.resources.limits.cpu=4000m \ - --set controller.resources.limits.memory=4Gi \ + --set controller.resources.limits.cpu=2000m \ + --set controller.resources.limits.memory=2Gi \ --set listeners.controller.protocol=PLAINTEXT \ --set listeners.client.protocol=PLAINTEXT \ --create-namespace \