Skip to content

Commit

Permalink
90% utilization tests
Browse files Browse the repository at this point in the history
  • Loading branch information
cezarmoise committed Dec 11, 2024
1 parent 63b10fb commit 8cf8d4d
Show file tree
Hide file tree
Showing 16 changed files with 549 additions and 0 deletions.
19 changes: 19 additions & 0 deletions jenkins-pipelines/temp_90_percent/90_percent_drop.jenkinsfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!groovy

// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43
def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm)

perfRegressionParallelPipeline(
backend: "aws",
availability_zone: 'a',
test_name: "performance_regression_test.PerformanceRegressionTest",
test_config: """["test-cases/temp_90_percent/90-percent-perf-i4i-2xlarge-drop.yaml", "configurations/disable_kms.yaml"]""",
sub_tests: ["test_latency_mixed_with_nemesis"],
scylla_version: 'enterprise:latest',
email_recipients: '[email protected]',
post_behavior_db_nodes: 'destroy',
post_behavior_loader_nodes: 'destroy',
post_behavior_monitor_nodes: 'destroy',
post_behavior_k8s_cluster: 'destroy',
provision_type: 'on_demand',
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!groovy

// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43
def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm)

perfRegressionParallelPipeline(
backend: "aws",
availability_zone: 'a',
test_name: "performance_regression_test.PerformanceRegressionTest",
test_config: """["test-cases/temp_90_percent/90-percent-perf-experiment.yaml", "configurations/disable_kms.yaml"]""",
sub_tests: ["test_latency_mixed_with_nemesis"],
scylla_version: 'enterprise:latest',
email_recipients: '[email protected]',
post_behavior_db_nodes: 'destroy',
post_behavior_loader_nodes: 'destroy',
post_behavior_monitor_nodes: 'destroy',
post_behavior_k8s_cluster: 'destroy',
provision_type: 'on_demand',
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!groovy

// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43
def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm)

perfRegressionParallelPipeline(
backend: "aws",
availability_zone: 'a',
test_name: "performance_regression_test.PerformanceRegressionTest",
test_config: """["test-cases/temp_90_percent/90-percent-perf-i4i-large-grow-i4i-4xlarge.yaml", "configurations/disable_kms.yaml"]""",
sub_tests: ["test_latency_mixed_with_nemesis"],
scylla_version: 'enterprise:latest',
email_recipients: '[email protected]',
post_behavior_db_nodes: 'destroy',
post_behavior_loader_nodes: 'destroy',
post_behavior_monitor_nodes: 'destroy',
post_behavior_k8s_cluster: 'destroy',
provision_type: 'on_demand',
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!groovy

// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43
def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm)

perfRegressionParallelPipeline(
backend: "aws",
availability_zone: 'a',
test_name: "performance_regression_test.PerformanceRegressionTest",
test_config: """["test-cases/temp_90_percent/90-percent-perf-i4i-2xlarge-grow-shrink-dc.yaml", "configurations/disable_kms.yaml"]""",
sub_tests: ["test_latency_mixed_with_nemesis"],
scylla_version: 'enterprise:latest',
email_recipients: '[email protected]',
post_behavior_db_nodes: 'destroy',
post_behavior_loader_nodes: 'destroy',
post_behavior_monitor_nodes: 'destroy',
post_behavior_k8s_cluster: 'destroy',
provision_type: 'on_demand',
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!groovy

// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43
def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm)

perfRegressionParallelPipeline(
backend: "aws",
availability_zone: 'a',
test_name: "performance_regression_test.PerformanceRegressionTest",
test_config: """["test-cases/temp_90_percent/90-percent-perf-i4i-2xlarge-grow-shrink-i4i-2xlarge.yaml", "configurations/disable_kms.yaml"]""",
sub_tests: ["test_latency_mixed_with_nemesis"],
scylla_version: 'enterprise:latest',
email_recipients: '[email protected]',
post_behavior_db_nodes: 'destroy',
post_behavior_loader_nodes: 'destroy',
post_behavior_monitor_nodes: 'destroy',
post_behavior_k8s_cluster: 'destroy',
provision_type: 'on_demand',
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!groovy

// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43
def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm)

perfRegressionParallelPipeline(
backend: "aws",
availability_zone: 'a',
test_name: "performance_regression_test.PerformanceRegressionTest",
test_config: """["test-cases/temp_90_percent/90-percent-perf-i4i-4xlarge-grow-i4i-large.yaml", "configurations/disable_kms.yaml"]""",
sub_tests: ["test_latency_mixed_with_nemesis"],
scylla_version: 'enterprise:latest',
email_recipients: '[email protected]',
post_behavior_db_nodes: 'destroy',
post_behavior_loader_nodes: 'destroy',
post_behavior_monitor_nodes: 'destroy',
post_behavior_k8s_cluster: 'destroy',
provision_type: 'on_demand',
)
19 changes: 19 additions & 0 deletions jenkins-pipelines/temp_90_percent/90_percent_truncate.jenkinsfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!groovy

// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43
def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm)

perfRegressionParallelPipeline(
backend: "aws",
availability_zone: 'a',
test_name: "performance_regression_test.PerformanceRegressionTest",
test_config: """["test-cases/temp_90_percent/90-percent-perf-i4i-2xlarge-truncate.yaml", "configurations/disable_kms.yaml"]""",
sub_tests: ["test_latency_mixed_with_nemesis"],
scylla_version: 'enterprise:latest',
email_recipients: '[email protected]',
post_behavior_db_nodes: 'destroy',
post_behavior_loader_nodes: 'destroy',
post_behavior_monitor_nodes: 'destroy',
post_behavior_k8s_cluster: 'destroy',
provision_type: 'on_demand',
)
1 change: 1 addition & 0 deletions jenkins-pipelines/temp_90_percent/_display_name
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
90% Utilization
98 changes: 98 additions & 0 deletions sdcm/nemesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2094,6 +2094,18 @@ def _truncate_cmd_timeout_suffix(self, truncate_timeout): # pylint: disable=no-
# NOTE: 'self' is used by the 'scylla_versions' decorator
return ''

@latency_calculator_decorator(legend="Drop Table")
def disrupt_drop(self):
keyspace_drop = 'ks_drop'
table = 'standard1'

self._prepare_test_table(ks=keyspace_drop)

# do the actual drop
with self.cluster.cql_connection_patient(self.target_node, keyspace=keyspace_drop) as session:
session.execute(f"DROP TABLE {table};")

@latency_calculator_decorator(legend="Truncate Table")
def disrupt_truncate(self):
keyspace_truncate = 'ks_truncate'
table = 'standard1'
Expand Down Expand Up @@ -4244,6 +4256,14 @@ def _double_cluster_load(self, duration: int) -> None:
results = self.tester.get_stress_results(queue=stress_queue, store_results=False)
self.log.info(f"Double load results: {results}")

@target_data_nodes
def disrupt_grow_cluster(self):
sleep_time_between_ops = self.cluster.params.get('nemesis_sequence_sleep_between_ops')
if not self.has_steady_run and sleep_time_between_ops:
self.steady_state_latency()
self.has_steady_run = True
self._grow_cluster(rack=None)

@target_data_nodes
def disrupt_grow_shrink_cluster(self):
sleep_time_between_ops = self.cluster.params.get('nemesis_sequence_sleep_between_ops')
Expand All @@ -4258,6 +4278,45 @@ def disrupt_grow_shrink_cluster(self):
self._double_cluster_load(duration)
self._shrink_cluster(rack=None, new_nodes=new_nodes)

@target_data_nodes
def disrupt_grow_shrink_datacenter(self):
if self._is_it_on_kubernetes():
raise UnsupportedNemesis("Operator doesn't support multi-DC yet. Skipping.")
if self.cluster.test_config.MULTI_REGION:
raise UnsupportedNemesis(
"grow_shring_datacenter skipped for multi-dc scenario (https://github.com/scylladb/scylla-cluster-tests/issues/5369)")
InfoEvent(message='Starting Grow Shrink DC Nemesis').publish()
sleep_time_between_ops = self.cluster.params.get('nemesis_sequence_sleep_between_ops')
sleep_time_between_ops = sleep_time_between_ops if sleep_time_between_ops else 10
sleep_time_between_ops = sleep_time_between_ops * 60
if not self.has_steady_run and sleep_time_between_ops:
self.steady_state_latency()
self.has_steady_run = True

# create a new dc
InfoEvent(message='New DC').publish()
nodes_on_new_dc = []
initial_dc_nodes = self.cluster.params.get('n_db_nodes')
for _ in range(initial_dc_nodes):
nodes_on_new_dc += [self._add_new_node_in_new_dc()]
time.sleep(sleep_time_between_ops)

# reconfigure keyspaces
# TODO

# add nodes to each dc
InfoEvent(message='Grow both DCs').publish()
add_nodes_number = self.tester.params.get('nemesis_add_node_cnt')
self._grow_cluster()
for _ in range(add_nodes_number):
nodes_on_new_dc += [self._add_new_node_in_new_dc()]
time.sleep(sleep_time_between_ops)

# remove the new dc
InfoEvent(message='Remove DC').publish()
for node in nodes_on_new_dc:
self.cluster.decommission(node)

# NOTE: version limitation is caused by the following:
# - https://github.com/scylladb/scylla-enterprise/issues/3211
# - https://github.com/scylladb/scylladb/issues/14184
Expand Down Expand Up @@ -4697,6 +4756,7 @@ def _verify_cdc_feature_status(self, keyspace: str, table: str, cdc_settings: di
assert actual_cdc_settings == cdc_settings, \
f"CDC extension settings are differs. Current: {actual_cdc_settings} expected: {cdc_settings}"

@latency_calculator_decorator(legend="Adding new nodes in new DC")
def _add_new_node_in_new_dc(self, is_zero_node=False) -> BaseNode:
if is_zero_node:
new_node = skip_on_capacity_issues(self.cluster.add_nodes)(
Expand Down Expand Up @@ -5549,6 +5609,16 @@ def disrupt(self):
time.sleep(300)


class SteadyMonkey(Nemesis):
kubernetes = True

def disrupt(self):
sleep_time_between_ops = self.cluster.params.get('nemesis_sequence_sleep_between_ops')
if not self.has_steady_run and sleep_time_between_ops:
self.steady_state_latency(sleep_time=sleep_time_between_ops)
self.has_steady_run = True


class AddRemoveDcNemesis(Nemesis):

disruptive = True
Expand All @@ -5561,6 +5631,15 @@ def disrupt(self):
self.disrupt_add_remove_dc()


class GrowClusterNemesis(Nemesis):
disruptive = True
kubernetes = True
topology_changes = True

def disrupt(self):
self.disrupt_grow_cluster()


class GrowShrinkClusterNemesis(Nemesis):
disruptive = True
kubernetes = True
Expand All @@ -5570,6 +5649,15 @@ def disrupt(self):
self.disrupt_grow_shrink_cluster()


class GrowShrinkDatacenterNemesis(Nemesis):
disruptive = True
kubernetes = True
topology_changes = True

def disrupt(self):
self.disrupt_grow_shrink_datacenter()


class AddRemoveRackNemesis(Nemesis):
disruptive = True
kubernetes = True
Expand Down Expand Up @@ -5790,6 +5878,16 @@ def disrupt(self):
self.disrupt_nodetool_cleanup()


class DropMonkey(Nemesis):
disruptive = False
kubernetes = True
limited = True
free_tier_set = True

def disrupt(self):
self.disrupt_drop()


class TruncateMonkey(Nemesis):
disruptive = False
kubernetes = True
Expand Down
44 changes: 44 additions & 0 deletions test-cases/temp_90_percent/90-percent-perf-experiment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
test_duration: 1080
prepare_write_cmd: [
"cassandra-stress write no-warmup cl=ALL n=312500000 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=200 -col 'size=FIXED(128) n=FIXED(8)' -pop seq=1..312500000",
"cassandra-stress write no-warmup cl=ALL n=312500000 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=200 -col 'size=FIXED(128) n=FIXED(8)' -pop seq=312500001..625000000",
"cassandra-stress write no-warmup cl=ALL n=312500000 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=200 -col 'size=FIXED(128) n=FIXED(8)' -pop seq=625000001..937500000",
"cassandra-stress write no-warmup cl=ALL n=312500000 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=200 -col 'size=FIXED(128) n=FIXED(8)' -pop seq=937500001..1250000000",
]

stress_cmd_m: "cassandra-stress mixed no-warmup cl=QUORUM duration=800m -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate 'threads=300 fixed=16875/s' -col 'size=FIXED(128) n=FIXED(8)' -pop 'dist=gauss(1..650000000,325000000,6500000)' "


n_db_nodes: 3
nemesis_add_node_cnt: 2
n_loaders: 4
n_monitor_nodes: 1
nemesis_grow_shrink_instance_type: 'i4i.2xlarge'

instance_type_loader: 'c6i.2xlarge'
instance_type_monitor: 't3.large'
instance_type_db: 'i4i.2xlarge'

nemesis_class_name: 'DropMonkey'
nemesis_interval: 30
nemesis_sequence_sleep_between_ops: 10

user_prefix: 'elasticity-test'
space_node_threshold: 644245094
ami_id_db_scylla_desc: 'VERSION_DESC'

round_robin: true
append_scylla_args: '--blocked-reactor-notify-ms 5 --abort-on-lsa-bad-alloc 1 --abort-on-seastar-bad-alloc --abort-on-internal-error 1 --abort-on-ebadf 1'
backtrace_decoding: false
print_kernel_callstack: true

store_perf_results: true
# email_recipients: ["[email protected]"]
# use_prepared_loaders: true
use_hdr_cs_histogram: true
email_subject_postfix: 'elasticity test'
nemesis_double_load_during_grow_shrink_duration: 0
parallel_node_operations: false

stress_image:
cassandra-stress: 'scylladb/cassandra-stress:3.17.0'
47 changes: 47 additions & 0 deletions test-cases/temp_90_percent/90-percent-perf-i4i-2xlarge-drop.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
test_duration: 1080
prepare_write_cmd: [
"cassandra-stress write no-warmup cl=ALL n=312500000 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=200 -col 'size=FIXED(128) n=FIXED(8)' -pop seq=1..312500000",
"cassandra-stress write no-warmup cl=ALL n=312500000 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=200 -col 'size=FIXED(128) n=FIXED(8)' -pop seq=312500001..625000000",
"cassandra-stress write no-warmup cl=ALL n=312500000 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=200 -col 'size=FIXED(128) n=FIXED(8)' -pop seq=625000001..937500000",
"cassandra-stress write no-warmup cl=ALL n=312500000 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=200 -col 'size=FIXED(128) n=FIXED(8)' -pop seq=937500001..1250000000",
]

stress_cmd_m: "cassandra-stress mixed no-warmup cl=QUORUM duration=800m -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate 'threads=300 fixed=16875/s' -col 'size=FIXED(128) n=FIXED(8)' -pop 'dist=gauss(1..650000000,325000000,6500000)' "



n_db_nodes: 3
n_loaders: 4
n_monitor_nodes: 1

instance_type_loader: 'c6i.2xlarge'
instance_type_monitor: 't3.large'
instance_type_db: 'i4i.2xlarge'

nemesis_class_name: 'SteadyMonkey: DropMonkey:1'
nemesis_interval: 30
nemesis_sequence_sleep_between_ops: 60

user_prefix: 'elasticity-test'
space_node_threshold: 644245094
ami_id_db_scylla_desc: 'VERSION_DESC'

round_robin: true
append_scylla_args: '--blocked-reactor-notify-ms 5 --abort-on-lsa-bad-alloc 1 --abort-on-seastar-bad-alloc --abort-on-internal-error 1 --abort-on-ebadf 1'
backtrace_decoding: false
print_kernel_callstack: true

store_perf_results: true
# email_recipients: ["[email protected]"]
# use_prepared_loaders: true
use_hdr_cs_histogram: true
email_subject_postfix: 'elasticity test'
nemesis_double_load_during_grow_shrink_duration: 0
parallel_node_operations: false

append_scylla_yaml:
enable_tablets: true
auto_snapshot: false

stress_image:
cassandra-stress: 'scylladb/cassandra-stress:3.17.0'
Loading

0 comments on commit 8cf8d4d

Please sign in to comment.