From 18c06643ff4e56c0ba5351f4b8fe370a4f6994c1 Mon Sep 17 00:00:00 2001 From: Victor Cavichioli <79488234+VictorCavichioli@users.noreply.github.com> Date: Wed, 6 Nov 2024 12:01:53 -0300 Subject: [PATCH 1/8] Introduction of VNODE Repairs (#756) * Introduction of VNODE Repairs * Fix Ip definition in ecc.yml * Fix Wrong Creation of Table Storage Map * Rebase Branch with Master --- CHANGES.md | 2 + application/pom.xml | 12 + .../application/spring/BeanConfigurator.java | 38 + .../application/spring/ECChronos.java | 18 +- .../spring/ECChronosInternals.java | 26 + application/src/main/resources/ecc.yml | 2 +- core.impl/pom.xml | 6 + .../core/impl/repair/RepairGroup.java | 108 ++- .../core/impl/repair/RepairTask.java | 2 +- .../incremental/IncrementalRepairJob.java | 2 +- .../repair/scheduler/RepairSchedulerImpl.java | 115 ++- .../repair/state/AlarmPostUpdateHook.java | 103 +++ .../impl/repair/state/HostStatesImpl.java | 186 ++++ .../repair/state/RepairStateFactoryImpl.java | 142 +++ .../impl/repair/state/RepairStateImpl.java | 271 ++++++ .../repair/vnode/NormalizedBaseRange.java | 163 ++++ .../impl/repair/vnode/NormalizedRange.java | 308 +++++++ .../repair/vnode/SubRangeRepairStates.java | 186 ++++ .../repair/vnode/VnodeRepairGroupFactory.java | 79 ++ .../vnode/VnodeRepairStateFactoryImpl.java | 242 ++++++ .../vnode/VnodeRepairStateSummarizer.java | 234 +++++ .../repair/vnode/VnodeRepairStatesImpl.java | 160 ++++ .../impl/repair/vnode/VnodeRepairTask.java | 181 ++++ .../core/impl/repair/vnode/package-info.java | 18 + .../core/impl/table/TableRepairJob.java | 502 +++++++++++ .../impl/table/TableStorageStatesImpl.java | 239 +++++ .../core/impl/repair/TestRepairGroup.java | 27 +- .../ecchronos/core/impl/repair/TestUtils.java | 206 +++++ .../scheduler/TestRepairSchedulerImpl.java | 100 ++- .../repair/state/TestAlarmPostUpdateHook.java | 145 ++++ .../repair/vnode/TestNormalizedBaseRange.java | 242 ++++++ .../repair/vnode/TestNormalizedRange.java | 357 ++++++++ .../TestVnodeRepairStateFactoryImpl.java | 815 ++++++++++++++++++ .../vnode/TestVnodeRepairStateSummarizer.java | 388 +++++++++ .../core/impl/state/TestRepairedAt.java | 95 ++ .../ecchronos/core/metadata/DriverNode.java | 9 + .../ecchronos/core/state/HostStates.java | 41 + .../ecchronos/core/state/PostUpdateHook.java | 29 + .../ecchronos/core/state/RepairEntry.java | 141 +++ .../ecchronos/core/state/RepairHistory.java | 81 ++ .../core/state/RepairHistoryProvider.java | 50 ++ .../ecchronos/core/state/RepairState.java | 35 + .../core/state/RepairStateFactory.java | 28 + .../ecchronos/core/state/RepairedAt.java | 117 +++ .../core/state/ReplicaRepairGroupFactory.java | 38 + .../core/state/TokenSubRangeUtil.java | 125 +++ .../core/state/VnodeRepairStateFactory.java | 50 ++ .../core/table/TableStorageStates.java | 38 + .../ecchronos/core/state/TestRepairEntry.java | 114 +++ data/pom.xml | 6 + .../repairhistory/RepairHistoryService.java | 312 ++++++- .../TestRepairHistoryService.java | 16 +- fault.manager.impl/pom.xml | 73 ++ .../fm/impl/LoggingFaultReporter.java | 55 ++ .../ecchronos/fm/impl/package-info.java | 18 + .../fm/impl/TestLoggingFaultReporter.java | 93 ++ fault.manager/pom.xml | 62 ++ .../ecchronos/fm/RepairFaultReporter.java | 45 + .../cassandra/ecchronos/fm/package-info.java | 18 + pom.xml | 2 + .../utils/enums/history/SessionState.java | 32 + .../utils/enums/history/package-info.java | 18 + .../utils/exceptions/InternalException.java | 29 + 63 files changed, 7344 insertions(+), 51 deletions(-) create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/AlarmPostUpdateHook.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/HostStatesImpl.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/RepairStateFactoryImpl.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/RepairStateImpl.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/NormalizedBaseRange.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/NormalizedRange.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/SubRangeRepairStates.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairGroupFactory.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairStateFactoryImpl.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairStateSummarizer.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairStatesImpl.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairTask.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/package-info.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/TableRepairJob.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/TableStorageStatesImpl.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/TestUtils.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/TestAlarmPostUpdateHook.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestNormalizedBaseRange.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestNormalizedRange.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestVnodeRepairStateFactoryImpl.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestVnodeRepairStateSummarizer.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/TestRepairedAt.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/HostStates.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/PostUpdateHook.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairEntry.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairHistory.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairHistoryProvider.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairState.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairStateFactory.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairedAt.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/ReplicaRepairGroupFactory.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/TokenSubRangeUtil.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairStateFactory.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/TableStorageStates.java create mode 100644 core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestRepairEntry.java create mode 100644 fault.manager.impl/pom.xml create mode 100644 fault.manager.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/impl/LoggingFaultReporter.java create mode 100644 fault.manager.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/impl/package-info.java create mode 100644 fault.manager.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/fm/impl/TestLoggingFaultReporter.java create mode 100644 fault.manager/pom.xml create mode 100644 fault.manager/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/RepairFaultReporter.java create mode 100644 fault.manager/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/package-info.java create mode 100644 utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/history/SessionState.java create mode 100644 utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/history/package-info.java create mode 100644 utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/exceptions/InternalException.java diff --git a/CHANGES.md b/CHANGES.md index b83e10c8..411d4188 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,8 @@ ## Version 1.0.0 (Not yet Released) +* Create New Repair Type Called "VNODE" - Issue #755 +* Create ReplicaRepairGroup Class for Grouping Replicas and Token Ranges - Issue #721 * Hot Reload of Nodes List - Issue #699 * Investigate Creation of RepairScheduler and ScheduleManager #714 * Implement ScheduledJobQueue for Prioritized Job Management and Execution - Issue #740 diff --git a/application/pom.xml b/application/pom.xml index a461cf35..30e0eae7 100644 --- a/application/pom.xml +++ b/application/pom.xml @@ -62,6 +62,18 @@ ${project.version} + + com.ericsson.bss.cassandra.ecchronos + fault.manager + ${project.version} + + + + com.ericsson.bss.cassandra.ecchronos + fault.manager.impl + ${project.version} + + com.ericsson.bss.cassandra.ecchronos utils diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/BeanConfigurator.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/BeanConfigurator.java index 9e7c8e2d..83d3ac47 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/BeanConfigurator.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/BeanConfigurator.java @@ -24,13 +24,17 @@ import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.state.ReplicationStateImpl; import com.ericsson.bss.cassandra.ecchronos.core.metadata.NodeResolver; import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; +import com.ericsson.bss.cassandra.ecchronos.data.repairhistory.RepairHistoryService; import com.ericsson.bss.cassandra.ecchronos.data.sync.EccNodesSync; +import com.ericsson.bss.cassandra.ecchronos.fm.RepairFaultReporter; +import com.ericsson.bss.cassandra.ecchronos.fm.impl.LoggingFaultReporter; import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.ConfigurationException; import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.EcChronosException; import java.net.InetAddress; import java.io.IOException; import java.net.UnknownHostException; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import java.util.function.Supplier; @@ -172,6 +176,12 @@ public ConfigurableServletWebServerFactory webServerFactory(final Config config) return factory; } + @Bean + public RepairFaultReporter repairFaultReporter() throws ConfigurationException + { + return new LoggingFaultReporter(); + } + /** * Provides a {@link DistributedNativeConnectionProvider} bean to manage Cassandra native connections. * @@ -255,6 +265,17 @@ public ReplicationState replicationState( return new ReplicationStateImpl(nodeResolver, session); } + @Bean + public RepairHistoryService repairHistoryService( + final DistributedNativeConnectionProvider distributedNativeConnectionProvider, + final NodeResolver nodeResolver, + final ReplicationState replicationState, + final Config config + ) + { + return getRepairHistoryService(distributedNativeConnectionProvider, nodeResolver, replicationState, config); + } + private Security getSecurityConfig() throws ConfigurationException { return ConfigurationHelper.DEFAULT_INSTANCE.getConfiguration(SECURITY_FILE, Security.class); @@ -330,4 +351,21 @@ private EccNodesSync getEccNodesSync( LOG.info("Nodes acquired with success"); return myEccNodesSync; } + + private RepairHistoryService getRepairHistoryService( + final DistributedNativeConnectionProvider distributedNativeConnectionProvider, + final NodeResolver nodeResolver, + final ReplicationState replicationState, + final Config config + ) + { + long interval = config.getRepairConfig().getRepairHistoryLookback().getInterval(TimeUnit.MILLISECONDS); + long repairHistoryLookBack = TimeUnit.MILLISECONDS.convert(interval, TimeUnit.MILLISECONDS); + return new RepairHistoryService( + distributedNativeConnectionProvider.getCqlSession(), + replicationState, + nodeResolver, + repairHistoryLookBack + ); + } } diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronos.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronos.java index 9e2376e3..7466b097 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronos.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronos.java @@ -20,12 +20,15 @@ import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.DefaultRepairConfigurationProvider; import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.scheduler.RepairSchedulerImpl; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.state.RepairStateFactoryImpl; import com.ericsson.bss.cassandra.ecchronos.core.impl.table.TimeBasedRunPolicy; import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.RepairScheduler; import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; import com.ericsson.bss.cassandra.ecchronos.core.table.ReplicatedTableProvider; import com.ericsson.bss.cassandra.ecchronos.core.table.TableReferenceFactory; +import com.ericsson.bss.cassandra.ecchronos.data.repairhistory.RepairHistoryService; import com.ericsson.bss.cassandra.ecchronos.data.sync.EccNodesSync; +import com.ericsson.bss.cassandra.ecchronos.fm.RepairFaultReporter; import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.ConfigurationException; import java.io.Closeable; @@ -49,7 +52,9 @@ public ECChronos( final DistributedJmxConnectionProvider jmxConnectionProvider, final ReplicationState replicationState, final DefaultRepairConfigurationProvider defaultRepairConfigurationProvider, - final EccNodesSync eccNodesSync + final EccNodesSync eccNodesSync, + final RepairHistoryService repairHistoryService, + final RepairFaultReporter repairFaultReporter ) throws ConfigurationException { @@ -63,6 +68,13 @@ public ECChronos( .withKeyspaceName(configuration.getRunPolicy().getTimeBasedConfig().getKeyspaceName()) .build(); + RepairStateFactoryImpl repairStateFactoryImpl = RepairStateFactoryImpl.builder() + .withReplicationState(replicationState) + .withHostStates(myECChronosInternals.getHostStates()) + .withRepairHistoryProvider(repairHistoryService) + .withTableRepairMetrics(myECChronosInternals.getTableRepairMetrics()) + .build(); + myRepairSchedulerImpl = RepairSchedulerImpl.builder() .withJmxProxyFactory(myECChronosInternals.getJmxProxyFactory()) .withScheduleManager(myECChronosInternals.getScheduleManager()) @@ -71,6 +83,10 @@ public ECChronos( .withReplicationState(replicationState) .withRepairPolicies(Collections.singletonList(myTimeBasedRunPolicy)) .withCassandraMetrics(myECChronosInternals.getCassandraMetrics()) + .withRepairStateFactory(repairStateFactoryImpl) + .withRepairHistory(repairHistoryService) + .withFaultReporter(repairFaultReporter) + .withTableStorageStates(myECChronosInternals.getTableStorageStates()) .build(); AbstractRepairConfigurationProvider repairConfigurationProvider = new FileBasedRepairConfiguration(applicationContext); diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronosInternals.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronosInternals.java index 24cd990d..8c137f46 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronosInternals.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronosInternals.java @@ -22,15 +22,19 @@ import com.ericsson.bss.cassandra.ecchronos.core.impl.jmx.DistributedJmxProxyFactoryImpl; import com.ericsson.bss.cassandra.ecchronos.core.impl.metrics.CassandraMetrics; import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.scheduler.ScheduleManagerImpl; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.state.HostStatesImpl; import com.ericsson.bss.cassandra.ecchronos.core.impl.table.ReplicatedTableProviderImpl; import com.ericsson.bss.cassandra.ecchronos.core.impl.table.TableReferenceFactoryImpl; +import com.ericsson.bss.cassandra.ecchronos.core.impl.table.TableStorageStatesImpl; import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.RunPolicy; import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduleManager; +import com.ericsson.bss.cassandra.ecchronos.core.state.HostStates; import com.ericsson.bss.cassandra.ecchronos.core.table.ReplicatedTableProvider; import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; import com.ericsson.bss.cassandra.ecchronos.core.table.TableReferenceFactory; import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableStorageStates; import com.ericsson.bss.cassandra.ecchronos.data.sync.EccNodesSync; import java.io.Closeable; import java.util.HashMap; @@ -51,6 +55,8 @@ public class ECChronosInternals implements Closeable private final TableReferenceFactory myTableReferenceFactory; private final DistributedJmxProxyFactory myJmxProxyFactory; private final CassandraMetrics myCassandraMetrics; + private final HostStatesImpl myHostStatesImpl; + private final TableStorageStatesImpl myTableStorageStatesImpl; public ECChronosInternals( final Config configuration, @@ -69,11 +75,21 @@ public ECChronosInternals( myTableReferenceFactory = new TableReferenceFactoryImpl(session); + myHostStatesImpl = HostStatesImpl.builder() + .withJmxProxyFactory(myJmxProxyFactory) + .build(); + myReplicatedTableProvider = new ReplicatedTableProviderImpl( session, myTableReferenceFactory, nativeConnectionProvider.getNodes()); + myTableStorageStatesImpl = TableStorageStatesImpl.builder() + .withReplicatedTableProvider(myReplicatedTableProvider) + .withJmxProxyFactory(myJmxProxyFactory) + .withConnectionProvider(nativeConnectionProvider) + .build(); + myCassandraMetrics = new CassandraMetrics(myJmxProxyFactory); myScheduleManagerImpl = ScheduleManagerImpl.builder() .withRunInterval(configuration.getSchedulerConfig().getFrequency().getInterval(TimeUnit.MILLISECONDS), @@ -112,6 +128,16 @@ public final TableRepairMetrics getTableRepairMetrics() return NO_OP_REPAIR_METRICS; } + public final HostStates getHostStates() + { + return myHostStatesImpl; + } + + public final TableStorageStates getTableStorageStates() + { + return myTableStorageStatesImpl; + } + public final boolean addRunPolicy(final RunPolicy runPolicy) { return myScheduleManagerImpl.addRunPolicy(runPolicy); diff --git a/application/src/main/resources/ecc.yml b/application/src/main/resources/ecc.yml index fe8a017e..253f496a 100644 --- a/application/src/main/resources/ecc.yml +++ b/application/src/main/resources/ecc.yml @@ -238,7 +238,7 @@ repair: ## parallel_vnode = repair vnodes in parallel, this will combine vnodes into a single repair session per repair group ## incremental = repair vnodes incrementally (incremental repair) ## - repair_type: incremental + repair_type: vnode run_policy: time_based: diff --git a/core.impl/pom.xml b/core.impl/pom.xml index d42db13e..9351dfeb 100644 --- a/core.impl/pom.xml +++ b/core.impl/pom.xml @@ -47,6 +47,12 @@ ${project.version} + + com.ericsson.bss.cassandra.ecchronos + fault.manager + ${project.version} + + com.datastax.oss diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairGroup.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairGroup.java index 520d86f2..78286cfa 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairGroup.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairGroup.java @@ -14,19 +14,29 @@ */ package com.ericsson.bss.cassandra.ecchronos.core.impl.repair; +import com.datastax.oss.driver.api.core.metadata.Node; import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.incremental.IncrementalRepairTask; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode.VnodeRepairTask; import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledTask; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairHistory; import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicaRepairGroup; +import com.ericsson.bss.cassandra.ecchronos.core.state.TokenSubRangeUtil; import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairPolicy; +import com.ericsson.bss.cassandra.ecchronos.data.repairhistory.RepairHistoryService; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairType; import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.ScheduledJobException; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import java.math.BigInteger; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.UUID; import org.slf4j.Logger; @@ -40,11 +50,15 @@ public class RepairGroup extends ScheduledTask private static final Logger LOG = LoggerFactory.getLogger(RepairGroup.class); private final TableReference myTableReference; + private RepairHistory myRepairHistory; private final RepairConfiguration myRepairConfiguration; private final ReplicaRepairGroup myReplicaRepairGroup; private final DistributedJmxProxyFactory myJmxProxyFactory; private final TableRepairMetrics myTableRepairMetrics; private final List myRepairPolicies; + private BigInteger myTokensPerRepair; + private final UUID myJobId; + private Node myNode; /** * Constructs an IncrementalRepairTask for a specific node and table. @@ -57,6 +71,7 @@ public RepairGroup(final int priority, final Builder builder) super(priority); myTableReference = Preconditions .checkNotNull(builder.myTableReference, "Table reference must be set"); + myRepairConfiguration = Preconditions .checkNotNull(builder.myRepairConfiguration, "Repair configuration must be set"); myReplicaRepairGroup = Preconditions @@ -67,6 +82,21 @@ public RepairGroup(final int priority, final Builder builder) .checkNotNull(builder.myTableRepairMetrics, "Table repair metrics must be set"); myRepairPolicies = new ArrayList<>(Preconditions .checkNotNull(builder.myRepairPolicies, "Repair policies must be set")); + + if (!RepairType.INCREMENTAL.equals(myRepairConfiguration.getRepairType())) + { + myRepairHistory = Preconditions + .checkNotNull(builder.myRepairHistory, "Repair History must be set"); + } + if (RepairType.VNODE.equals(myRepairConfiguration.getRepairType())) + { + myNode = Preconditions + .checkNotNull(builder.myNode, "Node must be set"); + myTokensPerRepair = Preconditions + .checkNotNull(builder.myTokensPerRepair, "Tokens per repair must be set"); + } + myJobId = Preconditions + .checkNotNull(builder.myJobId, "Job id must be set"); } /** @@ -139,12 +169,27 @@ public String toString() public Collection getRepairTasks(final UUID nodeID) { Collection tasks = new ArrayList<>(); - tasks.add(new IncrementalRepairTask( - nodeID, - myJmxProxyFactory, - myTableReference, - myRepairConfiguration, - myTableRepairMetrics)); + if (myRepairConfiguration.getRepairType().equals(RepairType.INCREMENTAL)) + { + tasks.add(new IncrementalRepairTask( + nodeID, + myJmxProxyFactory, + myTableReference, + myRepairConfiguration, + myTableRepairMetrics)); + } + else if (myRepairConfiguration.getRepairType().equals(RepairType.VNODE)) + { + for (LongTokenRange range : myReplicaRepairGroup) + { + for (LongTokenRange subRange : new TokenSubRangeUtil(range).generateSubRanges(myTokensPerRepair)) + { + tasks.add(new VnodeRepairTask(myNode, myJmxProxyFactory, myTableReference, myRepairConfiguration, + myTableRepairMetrics, myRepairHistory, Collections.singleton(subRange), + new HashSet<>(myReplicaRepairGroup.getReplicas()), myJobId)); + } + } + } return tasks; } @@ -170,6 +215,10 @@ public static class Builder private DistributedJmxProxyFactory myJmxProxyFactory; private TableRepairMetrics myTableRepairMetrics; private List myRepairPolicies = new ArrayList<>(); + private BigInteger myTokensPerRepair = LongTokenRange.FULL_RANGE; + private RepairHistoryService myRepairHistory; + private Node myNode; + private UUID myJobId; /** @@ -244,6 +293,53 @@ public Builder withRepairPolicies(final List repairPolicies) return this; } + /** + * Build with tokens per repair. + * + * @param tokensPerRepair Tokens per repair. + * @return Builder + */ + public Builder withTokensPerRepair(final BigInteger tokensPerRepair) + { + myTokensPerRepair = tokensPerRepair; + return this; + } + + /** + * Build with repair history. + * + * @param repairHistory Repair history. + * @return Builder + */ + public Builder withRepairHistory(final RepairHistoryService repairHistory) + { + myRepairHistory = repairHistory; + return this; + } + + /** + * Build with job id. + * + * @param jobId Job id. + * @return Builder + */ + public Builder withJobId(final UUID jobId) + { + myJobId = jobId; + return this; + } + + /** + * Build with node. + * + * @param node node. + * @return Builder + */ + public Builder withNode(final Node node) + { + myNode = node; + return this; + } /** * Build repair group. * diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairTask.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairTask.java index d7ea302f..3c53bac2 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairTask.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairTask.java @@ -444,7 +444,7 @@ protected final Set getFailedRanges() } @VisibleForTesting - final Set getSuccessfulRanges() + protected final Set getSuccessfulRanges() { return mySuccessfulRanges; } diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/IncrementalRepairJob.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/IncrementalRepairJob.java index 8006f7a1..9277e230 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/IncrementalRepairJob.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/IncrementalRepairJob.java @@ -132,7 +132,7 @@ public Iterator iterator() .withJmxProxyFactory(getJmxProxyFactory()) .withTableRepairMetrics(getTableRepairMetrics()) .withReplicaRepairGroup(replicaRepairGroup) - .withRepairPolicies(getRepairPolicies()); + .withRepairPolicies(getRepairPolicies()).withJobId(getId()); List taskList = new ArrayList<>(); taskList.add(builder.build(getRealPriority())); return taskList.iterator(); diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/RepairSchedulerImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/RepairSchedulerImpl.java index ed764285..e7a34895 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/RepairSchedulerImpl.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/RepairSchedulerImpl.java @@ -17,6 +17,8 @@ import com.datastax.oss.driver.api.core.metadata.Node; import com.ericsson.bss.cassandra.ecchronos.core.impl.metrics.CassandraMetrics; import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.incremental.IncrementalRepairJob; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.state.AlarmPostUpdateHook; +import com.ericsson.bss.cassandra.ecchronos.core.impl.table.TableRepairJob; import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.RepairScheduler; @@ -24,10 +26,16 @@ import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledRepairJob; import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledRepairJobView; import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledJob; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairState; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairStateFactory; import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairPolicy; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableStorageStates; +import com.ericsson.bss.cassandra.ecchronos.data.repairhistory.RepairHistoryService; +import com.ericsson.bss.cassandra.ecchronos.fm.RepairFaultReporter; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairType; import com.google.common.util.concurrent.ThreadFactoryBuilder; import java.io.Closeable; @@ -63,12 +71,15 @@ public final class RepairSchedulerImpl implements RepairScheduler, Closeable private final ExecutorService myExecutor; private final TableRepairMetrics myTableRepairMetrics; - + private final RepairHistoryService myRepairHistoryService; + private final RepairFaultReporter myFaultReporter; private final DistributedJmxProxyFactory myJmxProxyFactory; private final ScheduleManager myScheduleManager; + private final RepairStateFactory myRepairStateFactory; private final ReplicationState myReplicationState; private final CassandraMetrics myCassandraMetrics; private final List myRepairPolicies; + private final TableStorageStates myTableStorageStates; private Set validateScheduleMap(final UUID nodeID, final TableReference tableReference) { @@ -86,12 +97,16 @@ private RepairSchedulerImpl(final Builder builder) { myExecutor = Executors.newSingleThreadScheduledExecutor( new ThreadFactoryBuilder().setNameFormat("RepairScheduler-%d").build()); + myFaultReporter = builder.myFaultReporter; myTableRepairMetrics = builder.myTableRepairMetrics; myJmxProxyFactory = builder.myJmxProxyFactory; myScheduleManager = builder.myScheduleManager; + myRepairStateFactory = builder.myRepairStateFactory; myReplicationState = builder.myReplicationState; myRepairPolicies = new ArrayList<>(builder.myRepairPolicies); myCassandraMetrics = builder.myCassandraMetrics; + myRepairHistoryService = builder.myRepairHistoryService; + myTableStorageStates = builder.myTableStorageStates; } @Override @@ -277,17 +292,42 @@ private ScheduledRepairJob createScheduledRepairJob( .withPriorityGranularity(repairConfiguration.getPriorityGranularityUnit()) .build(); ScheduledRepairJob job; - job = new IncrementalRepairJob.Builder() - .withConfiguration(configuration) - .withNode(node) - .withJmxProxyFactory(myJmxProxyFactory) - .withTableReference(tableReference) - .withRepairConfiguration(repairConfiguration) - .withTableRepairMetrics(myTableRepairMetrics) - .withCassandraMetrics(myCassandraMetrics) - .withReplicationState(myReplicationState) - .withRepairPolices(myRepairPolicies) - .build(); + if (repairConfiguration.getRepairType().equals(RepairType.INCREMENTAL)) + { + LOG.info("Creating IncrementalRepairJob for node {}", node.getHostId()); + job = new IncrementalRepairJob.Builder() + .withConfiguration(configuration) + .withNode(node) + .withJmxProxyFactory(myJmxProxyFactory) + .withTableReference(tableReference) + .withRepairConfiguration(repairConfiguration) + .withTableRepairMetrics(myTableRepairMetrics) + .withCassandraMetrics(myCassandraMetrics) + .withReplicationState(myReplicationState) + .withRepairPolices(myRepairPolicies) + .build(); + } + else + { + LOG.info("Creating TableRepairJob for table {}.{} in node {}", + tableReference.getKeyspace(), tableReference.getTable(), node.getHostId()); + AlarmPostUpdateHook alarmPostUpdateHook = new AlarmPostUpdateHook(tableReference, repairConfiguration, + myFaultReporter); + RepairState repairState = myRepairStateFactory.create(node, tableReference, repairConfiguration, + alarmPostUpdateHook); + job = new TableRepairJob.Builder() + .withConfiguration(configuration) + .withJmxProxyFactory(myJmxProxyFactory) + .withTableReference(tableReference) + .withRepairState(repairState) + .withTableRepairMetrics(myTableRepairMetrics) + .withRepairConfiguration(repairConfiguration) + .withTableStorageStates(myTableStorageStates) + .withRepairPolices(myRepairPolicies) + .withRepairHistory(myRepairHistoryService) + .withNode(node) + .build(); + } job.refreshState(); return job; } @@ -308,12 +348,51 @@ public static Builder builder() public static class Builder { private DistributedJmxProxyFactory myJmxProxyFactory; + private RepairFaultReporter myFaultReporter; + private RepairStateFactory myRepairStateFactory; private ScheduleManager myScheduleManager; private ReplicationState myReplicationState; private CassandraMetrics myCassandraMetrics; private final List myRepairPolicies = new ArrayList<>(); private TableRepairMetrics myTableRepairMetrics; + private RepairHistoryService myRepairHistoryService; + private TableStorageStates myTableStorageStates; + /** + * RepairSchedulerImpl build with fault reporter. + * + * @param repairFaultReporter Repair fault reporter. + * @return Builder + */ + public Builder withFaultReporter(final RepairFaultReporter repairFaultReporter) + { + myFaultReporter = repairFaultReporter; + return this; + } + + /** + * RepairSchedulerImpl build with repair history. + * + * @param repairHistory Repair history. + * @return Builder + */ + public Builder withRepairHistory(final RepairHistoryService repairHistory) + { + myRepairHistoryService = repairHistory; + return this; + } + + /** + * RepairSchedulerImpl build with table storage states. + * + * @param tableStorageStates Table storage states. + * @return Builder + */ + public Builder withTableStorageStates(final TableStorageStates tableStorageStates) + { + myTableStorageStates = tableStorageStates; + return this; + } /** * RepairSchedulerImpl build with JMX proxy factory. @@ -339,6 +418,18 @@ public Builder withScheduleManager(final ScheduleManager scheduleManager) return this; } + /** + * RepairSchedulerImpl build with repair state factory. + * + * @param repairStateFactory Repair state factory. + * @return Builder + */ + public Builder withRepairStateFactory(final RepairStateFactory repairStateFactory) + { + myRepairStateFactory = repairStateFactory; + return this; + } + /** * RepairSchedulerImpl build with replication state. * diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/AlarmPostUpdateHook.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/AlarmPostUpdateHook.java new file mode 100644 index 00000000..70023e8b --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/AlarmPostUpdateHook.java @@ -0,0 +1,103 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.state; + +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.state.PostUpdateHook; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairStateSnapshot; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.fm.RepairFaultReporter; +import com.google.common.annotations.VisibleForTesting; +import java.time.Clock; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; + +/** + * A update hook that raises and ceases alarms based on the elapsed time without repair. + */ +public class AlarmPostUpdateHook implements PostUpdateHook +{ + private final RepairFaultReporter myFaultReporter; + private final TableReference myTableReference; + private final RepairConfiguration myRepairConfiguration; + private final AtomicReference myClock = new AtomicReference<>(Clock.systemDefaultZone()); + + public AlarmPostUpdateHook(final TableReference tableReference, + final RepairConfiguration repairConfiguration, + final RepairFaultReporter faultReporter) + { + myFaultReporter = faultReporter; + myTableReference = tableReference; + myRepairConfiguration = repairConfiguration; + + } + + /** + * Post update. + * + * @param repairStateSnapshot The current repair state snapshot + */ + @Override + public void postUpdate(final RepairStateSnapshot repairStateSnapshot) + { + long lastRepaired = repairStateSnapshot.lastCompletedAt(); + + if (lastRepaired != VnodeRepairState.UNREPAIRED) + { + sendOrCeaseAlarm(lastRepaired); + } + } + + private void sendOrCeaseAlarm(final long lastRepairedAt) + { + long msSinceLastRepair = myClock.get().millis() - lastRepairedAt; + RepairFaultReporter.FaultCode faultCode = null; + + if (msSinceLastRepair >= myRepairConfiguration.getRepairErrorTimeInMs()) + { + faultCode = RepairFaultReporter.FaultCode.REPAIR_ERROR; + } + else if (msSinceLastRepair >= myRepairConfiguration.getRepairWarningTimeInMs()) + { + faultCode = RepairFaultReporter.FaultCode.REPAIR_WARNING; + } + Map data = new HashMap<>(); + data.put(RepairFaultReporter.FAULT_KEYSPACE, myTableReference.getKeyspace()); + data.put(RepairFaultReporter.FAULT_TABLE, myTableReference.getTable()); + + if (faultCode != null) + { + myFaultReporter.raise(faultCode, data); + } + else + { + myFaultReporter.cease(RepairFaultReporter.FaultCode.REPAIR_WARNING, data); + } + } + + /** + * Set clock. + * + * @param clock + */ + @VisibleForTesting + void setClock(final Clock clock) + { + myClock.set(clock); + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/HostStatesImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/HostStatesImpl.java new file mode 100644 index 00000000..585cd37c --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/HostStatesImpl.java @@ -0,0 +1,186 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.state; + +import com.ericsson.bss.cassandra.ecchronos.core.impl.logging.ThrottlingLogger; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxy; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.state.HostStates; +import java.io.Closeable; +import java.io.IOException; +import java.net.InetAddress; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; + +import com.datastax.oss.driver.api.core.metadata.Node; + +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Implementation of the {@link HostStates} interface using JMX to retrieve node statuses and then caches the retrieved + * statuses for some time. + */ +public final class HostStatesImpl implements HostStates, Closeable +{ + private static final Logger LOG = LoggerFactory.getLogger(HostStatesImpl.class); + private static final ThrottlingLogger THROTTLED_LOGGER = new ThrottlingLogger(LOG, 1, TimeUnit.MINUTES); + + private static final long DEFAULT_REFRESH_INTERVAL_IN_MS = TimeUnit.SECONDS.toMillis(10); + + private final ConcurrentHashMap myHostStates = new ConcurrentHashMap<>(); + private final Object myRefreshLock = new Object(); + private final long myRefreshIntervalInMs; + + private volatile long myLastRefresh = -1; + + private final DistributedJmxProxyFactory myJmxProxyFactory; + + private HostStatesImpl(final Builder builder) + { + myRefreshIntervalInMs = builder.myRefreshIntervalInMs; + myJmxProxyFactory = builder.myJmxProxyFactory; + } + + @Override + public boolean isUp(final Node node) + { + refreshNodeStatus(node.getHostId()); + Boolean status = myHostStates.get(node.getBroadcastAddress().get().getAddress()); + return status != null && status; + } + + @Override + public boolean isUp(final DriverNode node) + { + refreshNodeStatus(node.getId()); + Boolean status = myHostStates.get(node.getPublicAddress()); + return status != null && status; + } + + @Override + public void close() + { + myHostStates.clear(); + } + + private void refreshNodeStatus(final UUID nodeID) + { + if (shouldRefreshNodeStatus()) + { + synchronized (myRefreshLock) + { + if (shouldRefreshNodeStatus() && !tryRefreshHostStates(nodeID)) + { + myHostStates.clear(); + } + } + } + } + + @VisibleForTesting + void resetLastRefresh() + { + myLastRefresh = -1; + } + + private boolean shouldRefreshNodeStatus() + { + return myLastRefresh == -1 || myLastRefresh < (System.currentTimeMillis() - myRefreshIntervalInMs); + } + + private synchronized boolean tryRefreshHostStates(final UUID nodeID) + { + if (myJmxProxyFactory == null) + { + return false; + } + + try (DistributedJmxProxy proxy = myJmxProxyFactory.connect()) + { + for (String liveHost : proxy.getLiveNodes(nodeID)) + { + InetAddress host = InetAddress.getByName(liveHost); + + if (changeHostState(host, true)) + { + LOG.debug("Host {} marked as UP", host); + } + } + + for (String unreachableHost : proxy.getUnreachableNodes(nodeID)) + { + InetAddress host = InetAddress.getByName(unreachableHost); + + if (changeHostState(host, false)) + { + LOG.debug("Host {} marked as DOWN", host); + } + } + + myLastRefresh = System.currentTimeMillis(); + return true; + } + catch (IOException e) + { + THROTTLED_LOGGER.warn("Unable to retrieve host states", e); + } + + return false; + } + + private boolean changeHostState(final InetAddress host, final boolean newValue) + { + Boolean oldValue = myHostStates.put(host, newValue); + + return oldValue == null || oldValue != newValue; + } + + public static Builder builder() + { + return new Builder(); + } + + public static class Builder + { + private DistributedJmxProxyFactory myJmxProxyFactory; + private long myRefreshIntervalInMs = DEFAULT_REFRESH_INTERVAL_IN_MS; + + public final Builder withJmxProxyFactory(final DistributedJmxProxyFactory jmxProxyFactory) + { + myJmxProxyFactory = jmxProxyFactory; + return this; + } + + public final Builder withRefreshIntervalInMs(final long refreshIntervalInMs) + { + myRefreshIntervalInMs = refreshIntervalInMs; + return this; + } + + public final HostStatesImpl build() + { + if (myJmxProxyFactory == null) + { + throw new IllegalArgumentException("JMX Proxy Factory must be set"); + } + + return new HostStatesImpl(this); + } + } +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/RepairStateFactoryImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/RepairStateFactoryImpl.java new file mode 100644 index 00000000..2b74f72c --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/RepairStateFactoryImpl.java @@ -0,0 +1,142 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.state; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode.VnodeRepairGroupFactory; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode.VnodeRepairStateFactoryImpl; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; + +import com.ericsson.bss.cassandra.ecchronos.core.state.HostStates; +import com.ericsson.bss.cassandra.ecchronos.core.state.PostUpdateHook; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairState; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairStateFactory; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicaRepairGroupFactory; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairStateFactory; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.data.repairhistory.RepairHistoryService; + +public final class RepairStateFactoryImpl implements RepairStateFactory +{ + private final HostStates myHostStates; + private final TableRepairMetrics myTableRepairMetrics; + + private final VnodeRepairStateFactoryImpl myVnodeRepairStateFactory; + private final VnodeRepairStateFactoryImpl mySubRangeRepairStateFactory; + + private RepairStateFactoryImpl(final Builder builder) + { + myHostStates = builder.myHostStates; + myTableRepairMetrics = builder.myTableRepairMetrics; + + myVnodeRepairStateFactory = new VnodeRepairStateFactoryImpl(builder.myReplicationState, + builder.myRepairHistoryProvider, false); + mySubRangeRepairStateFactory = new VnodeRepairStateFactoryImpl(builder.myReplicationState, + builder.myRepairHistoryProvider, true); + } + + @Override + public RepairState create( + final Node node, + final TableReference tableReference, + final RepairConfiguration repairConfiguration, + final PostUpdateHook postUpdateHook) + { + ReplicaRepairGroupFactory replicaRepairGroupFactory = VnodeRepairGroupFactory.INSTANCE; + + VnodeRepairStateFactory vnodeRepairStateFactory = myVnodeRepairStateFactory; + if (repairConfiguration.getTargetRepairSizeInBytes() != RepairConfiguration.FULL_REPAIR_SIZE) + { + vnodeRepairStateFactory = mySubRangeRepairStateFactory; + } + + return new RepairStateImpl(node, tableReference, repairConfiguration, vnodeRepairStateFactory, myHostStates, + myTableRepairMetrics, replicaRepairGroupFactory, postUpdateHook); + } + + public static Builder builder() + { + return new Builder(); + } + + public static class Builder + { + private ReplicationState myReplicationState; + private HostStates myHostStates; + private RepairHistoryService myRepairHistoryProvider; + private TableRepairMetrics myTableRepairMetrics; + + /** + * Build repair state factory with replication state. + * + * @param replicationState Replication state. + * @return Builder + */ + public Builder withReplicationState(final ReplicationState replicationState) + { + myReplicationState = replicationState; + return this; + } + + /** + * Build repair state factory with host states. + * + * @param hostStates The host states. + * @return Builder + */ + public Builder withHostStates(final HostStates hostStates) + { + myHostStates = hostStates; + return this; + } + + /** + * Build repair state factory with repair history provider. + * + * @param repairHistoryProvider The repair history provider. + * @return Builder + */ + public Builder withRepairHistoryProvider(final RepairHistoryService repairHistoryProvider) + { + myRepairHistoryProvider = repairHistoryProvider; + return this; + } + + /** + * Build repair state factory with table repair metrics. + * + * @param tableRepairMetrics The table repair metrics. + * @return Builder + */ + public Builder withTableRepairMetrics(final TableRepairMetrics tableRepairMetrics) + { + myTableRepairMetrics = tableRepairMetrics; + return this; + } + + /** + * Build repair state factory. + * + * @return RepairStateFactoryImpl + */ + public RepairStateFactoryImpl build() + { + return new RepairStateFactoryImpl(this); + } + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/RepairStateImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/RepairStateImpl.java new file mode 100644 index 00000000..3f29ba49 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/RepairStateImpl.java @@ -0,0 +1,271 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.state; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.state.HostStates; +import com.ericsson.bss.cassandra.ecchronos.core.state.PostUpdateHook; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairState; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairStateSnapshot; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairedAt; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicaRepairGroup; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicaRepairGroupFactory; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairStateFactory; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairStates; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.List; +import java.util.Locale; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; + +public class RepairStateImpl implements RepairState +{ + private static final Logger LOG = LoggerFactory.getLogger(RepairStateImpl.class); + + private static final ThreadLocal MY_DATE_FORMAT + = ThreadLocal.withInitial(() -> new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)); + + private final AtomicReference myRepairStateSnapshot = new AtomicReference<>(); + + private final TableReference myTableReference; + private final RepairConfiguration myRepairConfiguration; + private final VnodeRepairStateFactory myVnodeRepairStateFactory; + private final HostStates myHostStates; + private final TableRepairMetrics myTableRepairMetrics; + private final ReplicaRepairGroupFactory myReplicaRepairGroupFactory; + private final PostUpdateHook myPostUpdateHook; + private final Node myNode; + + @SuppressWarnings("PMD.ConstructorCallsOverridableMethod") + public RepairStateImpl( + final Node node, + final TableReference tableReference, + final RepairConfiguration repairConfiguration, + final VnodeRepairStateFactory vnodeRepairStateFactory, + final HostStates hostStates, + final TableRepairMetrics tableRepairMetrics, + final ReplicaRepairGroupFactory replicaRepairGroupFactory, + final PostUpdateHook postUpdateHook) + { + myNode = node; + myTableReference = tableReference; + myRepairConfiguration = repairConfiguration; + myVnodeRepairStateFactory = vnodeRepairStateFactory; + myHostStates = hostStates; + myTableRepairMetrics = tableRepairMetrics; + myReplicaRepairGroupFactory = replicaRepairGroupFactory; + myPostUpdateHook = postUpdateHook; + + update(); + } + + @Override + public final void update() + { + RepairStateSnapshot oldRepairStateSnapshot = myRepairStateSnapshot.get(); + long now = System.currentTimeMillis(); + if (oldRepairStateSnapshot == null + || isRepairNeeded(oldRepairStateSnapshot.lastCompletedAt(), + oldRepairStateSnapshot.getEstimatedRepairTime(), + now)) + { + RepairStateSnapshot newRepairStateSnapshot = generateNewRepairState(myNode, oldRepairStateSnapshot, now); + if (myRepairStateSnapshot.compareAndSet(oldRepairStateSnapshot, newRepairStateSnapshot)) + { + myTableRepairMetrics.lastRepairedAt(myTableReference, newRepairStateSnapshot.lastCompletedAt()); + + int nonRepairedRanges + = (int) newRepairStateSnapshot.getVnodeRepairStates().getVnodeRepairStates().stream() + .filter(v -> vnodeIsRepairable(v, newRepairStateSnapshot, System.currentTimeMillis())) + .count(); + + int repairedRanges + = newRepairStateSnapshot.getVnodeRepairStates().getVnodeRepairStates().size() + - nonRepairedRanges; + myTableRepairMetrics.repairState(myTableReference, repairedRanges, nonRepairedRanges); + myTableRepairMetrics.remainingRepairTime(myTableReference, + newRepairStateSnapshot.getRemainingRepairTime(System.currentTimeMillis(), + myRepairConfiguration.getRepairIntervalInMs())); + LOG.trace("Table {} switched to repair state {}", myTableReference, newRepairStateSnapshot); + } + } + else + { + LOG.trace("Table {} keeping repair state {}", myTableReference, oldRepairStateSnapshot); + } + myPostUpdateHook.postUpdate(myRepairStateSnapshot.get()); + } + + /** + * Returns the repair state snapshot. + * + * @return RepairStateSnapshot + */ + @Override + public RepairStateSnapshot getSnapshot() + { + return myRepairStateSnapshot.get(); + } + + private RepairStateSnapshot generateNewRepairState(final Node node, final RepairStateSnapshot old, final long now) + { + VnodeRepairStates vnodeRepairStates = myVnodeRepairStateFactory.calculateNewState(node, myTableReference, old, now); + + return generateSnapshotForVnode(vnodeRepairStates, old, now); + } + + private RepairStateSnapshot generateSnapshotForVnode(final VnodeRepairStates vnodeRepairStates, + final RepairStateSnapshot old, final long createdAt) + { + long repairedAt = calculateRepairedAt(vnodeRepairStates, old); + + VnodeRepairStates updatedVnodeRepairStates = vnodeRepairStates.combineWithRepairedAt(repairedAt); + + List repairableVnodes = updatedVnodeRepairStates.getVnodeRepairStates().stream() + .filter(this::replicasAreRepairable) + .filter(v -> vnodeIsRepairable(v, old, System.currentTimeMillis())) + .collect(Collectors.toList()); + + List replicaRepairGroups + = myReplicaRepairGroupFactory.generateReplicaRepairGroups(repairableVnodes); + + return RepairStateSnapshot.newBuilder() + .withLastCompletedAt(repairedAt) + .withVnodeRepairStates(updatedVnodeRepairStates) + .withCreatedAt(createdAt) + .withReplicaRepairGroups(replicaRepairGroups) + .build(); + } + + private long calculateRepairedAt(final VnodeRepairStates vnodeRepairStates, final RepairStateSnapshot old) + { + RepairedAt repairedAt = RepairedAt.generate(vnodeRepairStates); + LOG.trace("RepairedAt: {}, calculated from: {}", repairedAt, vnodeRepairStates); + + long calculatedRepairedAt; + + if (!repairedAt.isRepaired()) + { + if (repairedAt.isPartiallyRepaired()) + { + calculatedRepairedAt = partiallyRepairedTableRepairedAt(repairedAt.getMaxRepairedAt(), old); + } + else + { + calculatedRepairedAt = newTableRepairedAt(); + } + } + else + { + calculatedRepairedAt = repairedTableRepairedAt(repairedAt.getMinRepairedAt(), old); + } + return calculatedRepairedAt; + } + + private long repairedTableRepairedAt(final long repairedAt, final RepairStateSnapshot old) + { + if (LOG.isDebugEnabled()) + { + long next = repairedAt + myRepairConfiguration.getRepairIntervalInMs(); + if (old != null) + { + next -= old.getEstimatedRepairTime(); + } + LOG.debug("Table {} fully repaired at {}, next repair at/after {}", myTableReference, + MY_DATE_FORMAT.get().format(new Date(repairedAt)), MY_DATE_FORMAT.get().format(new Date(next))); + } + return repairedAt; + } + + private long partiallyRepairedTableRepairedAt(final long maxRepairedAt, final RepairStateSnapshot old) + { + long runIntervalInMs = myRepairConfiguration.getRepairIntervalInMs(); + long repairedAt = Math.min(System.currentTimeMillis() - runIntervalInMs, maxRepairedAt); + if (LOG.isDebugEnabled()) + { + long next = repairedAt + runIntervalInMs; + if (old != null) + { + next -= old.getEstimatedRepairTime(); + } + LOG.debug("Table {} partially repaired at {}, next repair at/after {}", myTableReference, + MY_DATE_FORMAT.get().format(new Date(repairedAt)), MY_DATE_FORMAT.get().format(new Date(next))); + } + + return repairedAt; + } + + @SuppressWarnings("PMD.GuardLogStatement") + private long newTableRepairedAt() + { + long runIntervalInMs = myRepairConfiguration.getRepairIntervalInMs(); + long initialDelayInMs = myRepairConfiguration.getInitialDelayInMs(); + long assumedRepairedAt = System.currentTimeMillis() - runIntervalInMs + initialDelayInMs; + LOG.info("Assuming the table {} is new. Next repair will occur at {}.", + myTableReference, + MY_DATE_FORMAT.get().format(new Date(assumedRepairedAt + runIntervalInMs))); + return assumedRepairedAt; + } + + private boolean replicasAreRepairable(final VnodeRepairState vnodeRepairState) + { + for (DriverNode node : vnodeRepairState.getReplicas()) + { + if (!myHostStates.isUp(node)) + { + LOG.trace("{} not repairable, host {} is NOT UP", vnodeRepairState, node); + return false; + } + } + return true; + } + + /** + * Returns if repair is needed. If the job's estimated finished time has passed, it is up for repair. + * + * @param lastRepairedAt Time when last repaired. + * @param estimatedRepairTime The estimated repair time. + * @param now Current time. + * @return boolean + */ + @VisibleForTesting + boolean isRepairNeeded(final long lastRepairedAt, final long estimatedRepairTime, final long now) + { + return lastRepairedAt + (myRepairConfiguration.getRepairIntervalInMs() - estimatedRepairTime) <= now; + } + + private boolean vnodeIsRepairable(final VnodeRepairState vnodeRepairState, + final RepairStateSnapshot snapshot, + final long now) + { + long estimatedRepairTime = 0L; + if (snapshot != null) + { + estimatedRepairTime = snapshot.getEstimatedRepairTime(); + } + return isRepairNeeded(vnodeRepairState.lastRepairedAt(), estimatedRepairTime, now); + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/NormalizedBaseRange.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/NormalizedBaseRange.java new file mode 100644 index 00000000..6c6f0fc9 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/NormalizedBaseRange.java @@ -0,0 +1,163 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode; + +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import java.math.BigInteger; +import java.util.Objects; + +/** + * A normalized base range (vnode) that can transform sub ranges + * between a normalized and traditional state. + * + * The normalized base range starts from 0 instead of the token. + * All normalized sub ranges are offset from 0 rather than the + * start of the vnode. + * This makes it so that the start of a sub range is strictly + * smaller than the end. + * This is useful to avoid dealing with token ranges wrapping around + * the end of the token range. + */ +@SuppressWarnings("VisibilityModifier") +public class NormalizedBaseRange +{ + private static final BigInteger NORMALIZED_RANGE_START = BigInteger.ZERO; + + private final VnodeRepairState baseVnode; + final BigInteger end; + + public NormalizedBaseRange(final VnodeRepairState aBaseVnode) + { + this.baseVnode = aBaseVnode; + this.end = baseVnode.getTokenRange().rangeSize(); + } + + /** + * Check if the provided token is in this normalized range. + * + * @param normalizedToken The normalized token. + * @return True if the token is in this range. + */ + public boolean inRange(final BigInteger normalizedToken) + { + return normalizedToken.compareTo(NORMALIZED_RANGE_START) >= 0 && normalizedToken.compareTo(end) <= 0; + } + + /** + * Transform a traditional sub range of this vnode and aligns it's start + * offset from 0 rather than the vnode start. + * + * @param subRange The sub range to transform. + * @return NormalizedRange + * @throws IllegalArgumentException Thrown in case the provided sub range is not covered by this vnode. + */ + public NormalizedRange transform(final VnodeRepairState subRange) + { + if (!baseVnode.getTokenRange().isCovering(subRange.getTokenRange())) + { + throw new IllegalArgumentException(baseVnode + " is not covering " + subRange); + } + + BigInteger baseStart = BigInteger.valueOf(baseVnode.getTokenRange().start); + + BigInteger normalizedStart = BigInteger.valueOf(subRange.getTokenRange().start).subtract(baseStart); + if (normalizedStart.compareTo(BigInteger.ZERO) < 0) + { + normalizedStart = normalizedStart.add(LongTokenRange.FULL_RANGE); + } + + BigInteger normalizedEnd = normalizedStart.add(subRange.getTokenRange().rangeSize()); + + return new NormalizedRange(this, normalizedStart, normalizedEnd, subRange.getStartedAt(), + subRange.getFinishedAt(), subRange.getRepairTime()); + } + + /** + * Transform a normalized sub range of this vnode back to it's + * traditional counter-part. + * + * This resets the start offset back to the start of the vnode. + * + * @param range The normalized sub range to transform. + * @return The traditional sub range. + */ + public VnodeRepairState transform(final NormalizedRange range) + { + BigInteger baseStart = BigInteger.valueOf(baseVnode.getTokenRange().start); + + BigInteger startOffset = range.start(); + BigInteger endOffset = range.end(); + + BigInteger realStart = baseStart.add(startOffset); + BigInteger realEnd = baseStart.add(endOffset); + + if (realStart.compareTo(LongTokenRange.RANGE_END) > 0) + { + realStart = realStart.subtract(LongTokenRange.FULL_RANGE); + } + if (realEnd.compareTo(LongTokenRange.RANGE_END) > 0) + { + realEnd = realEnd.subtract(LongTokenRange.FULL_RANGE); + } + + return new VnodeRepairState(new LongTokenRange(realStart.longValueExact(), realEnd.longValueExact()), + baseVnode.getReplicas(), range.getStartedAt(), range.getFinishedAt(), range.getRepairTime()); + } + + /** + * Checks equality. + * + * @param o Object to test equality with. + * @return Boolean + */ + @Override + public boolean equals(final Object o) + { + if (this == o) + { + return true; + } + if (o == null || getClass() != o.getClass()) + { + return false; + } + NormalizedBaseRange that = (NormalizedBaseRange) o; + return baseVnode.equals(that.baseVnode) && end.equals(that.end); + } + + /** + * Return a hash code representation. + * + * @return int + */ + @Override + public int hashCode() + { + return Objects.hash(baseVnode, end); + } + + /** + * Return a string representation. + * + * @return String + */ + @Override + public String toString() + { + return String.format("(%d, %d]", NORMALIZED_RANGE_START, end); + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/NormalizedRange.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/NormalizedRange.java new file mode 100644 index 00000000..2efc8367 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/NormalizedRange.java @@ -0,0 +1,308 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode; + +import java.math.BigInteger; +import java.util.Objects; + +/** + * A normalized range based on a "base vnode". + * The range is normalized so that the token range in the base vnode + * starts at 0 and ends (at most) at 2^64. + * + * When comparing normalized ranges the ones with lowest start comes first. + * If two normalized ranges have the same start the one including more is sorted first. + * E.g (5, 10], (5, 15] is ordered as (5, 15], (5, 10] + */ +public class NormalizedRange implements Comparable +{ + static final long UNKNOWN_REPAIR_TIME = 0L; + private final NormalizedBaseRange base; + private final BigInteger start; + private final BigInteger end; + + private final long startedAt; + private final long finishedAt; + private final long repairTime; + + NormalizedRange(final NormalizedBaseRange theBase, + final BigInteger theStart, + final BigInteger theEnd, + final long wasStartedAt, + final long wasFinishedAt, + final long theRepairTime) + { + this.base = theBase; + this.start = theStart; + this.end = theEnd; + this.startedAt = wasStartedAt; + this.finishedAt = wasFinishedAt; + this.repairTime = theRepairTime; + } + + NormalizedRange(final NormalizedBaseRange theBase, + final BigInteger theStart, + final BigInteger theEnd, + final long wasStartedAt, + final long wasFinishedAt) + { + this.base = theBase; + this.start = theStart; + this.end = theEnd; + this.startedAt = wasStartedAt; + this.finishedAt = wasFinishedAt; + long tempRepairTime = UNKNOWN_REPAIR_TIME; + if (finishedAt > 0) + { + tempRepairTime = finishedAt - startedAt; + } + this.repairTime = tempRepairTime; + } + + /** + * Get the normalized start token of this sub range. + * + * @return The normalized start token + */ + public BigInteger start() + { + return start; + } + + /** + * Get the normalized end token of this sub range. + * + * @return The normalized end token + */ + public BigInteger end() + { + return end; + } + + /** + * Get the repair timestamp of this sub range. + * + * @return The repair timestamp. + */ + public long getStartedAt() + { + return startedAt; + } + + /** + * Get the finished repair timestamp of this sub range. + * + * @return The finished repair timestamp or -1 if not finished. + */ + public long getFinishedAt() + { + return finishedAt; + } + + /** + * Get the repair time. + * + * @return The current repair time. + */ + public long getRepairTime() + { + return repairTime; + } + + /** + * Create a new normalized range based on this sub range with the provided start + * and the current sub range end. + * + * @param newStart The new normalized start token to use. + * @return The new normalized range. + */ + public NormalizedRange mutateStart(final BigInteger newStart) + { + if (!base.inRange(newStart)) + { + throw new IllegalArgumentException("Token " + newStart + " not in range " + base); + } + + return new NormalizedRange(base, newStart, end, startedAt, finishedAt, 0); + } + + /** + * Create a new normalized range based on this sub range with the provided end + * and the current sub range start. + * + * @param newEnd The new normalized end token to use. + * @return The new normalized range. + */ + public NormalizedRange mutateEnd(final BigInteger newEnd) + { + if (!base.inRange(newEnd)) + { + throw new IllegalArgumentException("Token " + newEnd + " not in range " + base); + } + + return new NormalizedRange(base, start, newEnd, startedAt, finishedAt, 0); + } + + /** + * Create a new normalized range based on this sub range and the provided sub range. + * The new normalized sub range will span the range between the end of this and the + * start of the provided sub range. + * + * E.g. (5, 15] and (20, 30] generates a range (15, 20] + * + * @param other The new normalized start token to use. + * @param wasStartedAt The repair timestamp to use for the new normalized range. + * @param wasFinishedAt The repair finish timestamp to use for the new normalized range. + * @return The new normalized range. + */ + public NormalizedRange between(final NormalizedRange other, final long wasStartedAt, final long wasFinishedAt) + { + verifySameBaseRange(other.base); + + if (end.compareTo(other.start) >= 0) + { + throw new IllegalArgumentException("Cannot create range between " + this + " -> " + other); + } + + return new NormalizedRange(base, end, other.start, wasStartedAt, wasFinishedAt, 0); + } + + /** + * Split an overlap between the start of the provided range and the end of this. + * + * E.g. (5, 15] and (8, 17] splits to a new range (8, 15] + * + * @param other The overlapping range. + * @return The new normalized range using the highest repair timestamp of the two. + */ + public NormalizedRange splitEnd(final NormalizedRange other) + { + verifySameBaseRange(other.base); + + if (start.compareTo(other.start) > 0 || other.start.compareTo(end) >= 0) + { + throw new IllegalArgumentException("Cannot split end of " + this + " with " + other); + } + + long maxStartedAt = Math.max(this.startedAt, other.startedAt); + long minFinishedAt = Math.min(this.finishedAt, other.finishedAt); + return new NormalizedRange(base, other.start, end, maxStartedAt, minFinishedAt, 0); + } + + /** + * Combine this normalized range with the provided range assuming + * they are adjacent. + * + * E.g. (5, 15] and (15, 30] becomes (5, 30] + * + * @param other The adjacent range. + * @return The new normalized range using the lowest repair timestamp of the two. + */ + public NormalizedRange combine(final NormalizedRange other) + { + verifySameBaseRange(other.base); + + if (other.start.compareTo(end) != 0) + { + throw new IllegalArgumentException("Range " + other + " is not adjacent to " + this); + } + + long minStartedAt = Math.min(this.startedAt, other.startedAt); + long maxFinishedAt = Math.max(this.finishedAt, other.finishedAt); + return new NormalizedRange(base, start, other.end, minStartedAt, maxFinishedAt, + repairTime + other.repairTime); + } + + /** + * Check if this sub range covers the other sub range fully. + * + * @param other The sub range to compare + * @return True if this range covers the provided range. + */ + public boolean isCovering(final NormalizedRange other) + { + verifySameBaseRange(other.base); + + return start.compareTo(other.start) <= 0 && end.compareTo(other.end) >= 0; + } + + private void verifySameBaseRange(final NormalizedBaseRange other) + { + if (!base.equals(other)) + { + throw new IllegalArgumentException("Different bases" + base + ":" + other); + } + } + + /** + * Compares two ranges. + */ + @Override + public int compareTo(final NormalizedRange o) + { + verifySameBaseRange(o.base); + + int cmp = start.compareTo(o.start); + if (cmp != 0) + { + return cmp; + } + + return o.end.compareTo(end); + } + + /** + * Checks for equality. + */ + @Override + public boolean equals(final Object o) + { + if (this == o) + { + return true; + } + if (o == null || getClass() != o.getClass()) + { + return false; + } + NormalizedRange that = (NormalizedRange) o; + return startedAt == that.startedAt + && finishedAt == that.finishedAt + && repairTime == that.repairTime + && base.equals(that.base) + && start.equals(that.start) + && end.equals(that.end); + } + + /** + * Returns a hash representation. + */ + @Override + public int hashCode() + { + return Objects.hash(base, start, end, startedAt, finishedAt, repairTime); + } + + /** + * Returns a string representation. + */ + @Override + public String toString() + { + return String.format("(%d, %d], %d-%d, repairtime: %d", start, end, startedAt, finishedAt, repairTime); + } + +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/SubRangeRepairStates.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/SubRangeRepairStates.java new file mode 100644 index 00000000..4c2899f8 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/SubRangeRepairStates.java @@ -0,0 +1,186 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode; + +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairStates; +import com.google.common.collect.ImmutableList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +public final class SubRangeRepairStates implements VnodeRepairStates // CPD-OFF +{ + private final ImmutableList myVnodeRepairStatuses; + + private SubRangeRepairStates(final SubRangeRepairStates.Builder builder) + { + List baseVnodes = builder.myVnodeRepairStatesBase; + Collection partialVnodes = builder.myActualVnodeRepairStates.values(); + + List summarizedVnodes = VnodeRepairStateSummarizer.summarizePartialVnodes(baseVnodes, + partialVnodes); + + myVnodeRepairStatuses = ImmutableList.copyOf(summarizedVnodes); + } + + @Override + public Collection getVnodeRepairStates() + { + return myVnodeRepairStatuses; + } + + @Override + public SubRangeRepairStates combineWithRepairedAt(final long repairedAt) + { + Builder builder = newBuilder(getVnodeRepairStates()); + + for (VnodeRepairState vnodeRepairState : getVnodeRepairStates()) + { + VnodeRepairState vnodeRepairStateWithRepairedAt = new VnodeRepairState(vnodeRepairState.getTokenRange(), + vnodeRepairState.getReplicas(), repairedAt); + builder.updateVnodeRepairState(vnodeRepairStateWithRepairedAt); + } + + return builder.build(); + } + + @Override + public String toString() + { + return myVnodeRepairStatuses.toString(); + } + + public static Builder newBuilder(final Collection vnodeRepairStates) + { + return new Builder(vnodeRepairStates); + } + + @Override + public boolean equals(final Object o) + { + if (this == o) + { + return true; + } + if (o == null || getClass() != o.getClass()) + { + return false; + } + SubRangeRepairStates that = (SubRangeRepairStates) o; + return Objects.equals(myVnodeRepairStatuses, that.myVnodeRepairStatuses); + } + + @Override + public int hashCode() + { + return Objects.hash(myVnodeRepairStatuses); + } + + public static class Builder implements VnodeRepairStates.Builder + { + private final ImmutableList myVnodeRepairStatesBase; + private final Map myActualVnodeRepairStates = new HashMap<>(); + + public Builder(final Collection vnodeRepairStates) + { + ImmutableList.Builder builder = ImmutableList.builder(); + for (VnodeRepairState vnodeRepairState : vnodeRepairStates) + { + builder.add(vnodeRepairState); + } + myVnodeRepairStatesBase = builder.build(); + } + + /** + * Update vNode repair state. + * + * @param vnodeRepairState The vnode repair status to update. + * @return VnodeRepairStates.Builder + */ + @Override + public VnodeRepairStates.Builder updateVnodeRepairState(final VnodeRepairState vnodeRepairState) + { + for (VnodeRepairState baseVnode : myVnodeRepairStatesBase) + { + if (baseVnode.getTokenRange().isCovering(vnodeRepairState.getTokenRange())) + { + replaceIfNewer(baseVnode, vnodeRepairState); + break; + } + } + + return this; + } + + /** + * Build subrange repair states. + * + * @return SubRangeRepairStates + */ + @Override + public SubRangeRepairStates build() + { + return new SubRangeRepairStates(this); + } + + private void replaceIfNewer(final VnodeRepairState baseVnode, final VnodeRepairState newVnode) + { + if (baseVnode.getTokenRange().equals(newVnode.getTokenRange())) // Original vnode + { + if (shouldReplace(baseVnode, newVnode)) + { + myActualVnodeRepairStates.put(newVnode.getTokenRange(), newVnode); + } + } + else if (partialVnodeIsNewer(baseVnode, newVnode)) // Partial vnode + { + myActualVnodeRepairStates.put(newVnode.getTokenRange(), newVnode); + } + } + + private boolean shouldReplace(final VnodeRepairState baseVnode, final VnodeRepairState newVnode) + { + if (!baseVnode.isSameVnode(newVnode)) + { + return false; + } + + return isNewer(baseVnode, newVnode); + } + + private boolean partialVnodeIsNewer(final VnodeRepairState baseVnode, final VnodeRepairState newVnode) + { + if (!baseVnode.getReplicas().equals(newVnode.getReplicas())) + { + return false; + } + + return isNewer(baseVnode, newVnode); + } + + private boolean isNewer(final VnodeRepairState baseVnode, final VnodeRepairState newVnode) + { + VnodeRepairState oldVnode = myActualVnodeRepairStates.getOrDefault(baseVnode.getTokenRange(), baseVnode); + + return oldVnode.lastRepairedAt() < newVnode.lastRepairedAt() + || oldVnode.getFinishedAt() < newVnode.getFinishedAt(); + } + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairGroupFactory.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairGroupFactory.java new file mode 100644 index 00000000..2c79a6c9 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairGroupFactory.java @@ -0,0 +1,79 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode; + +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairedAt; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicaRepairGroup; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicaRepairGroupFactory; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * A factory for {@link ReplicaRepairGroup} that creates repair groups for all vnodes with common replicas. + * + * The generated list will contain the vnode groups in a sorted order so that the most urgent vnode to + * repair is first in the list. + */ +public final class VnodeRepairGroupFactory implements ReplicaRepairGroupFactory +{ + public static final VnodeRepairGroupFactory INSTANCE = new VnodeRepairGroupFactory(); + + private VnodeRepairGroupFactory() + { + // Nothing to do here + } + + @Override + public List generateReplicaRepairGroups(final List availableVnodeRepairStates) + { + List sortedVnodeRepairStates = availableVnodeRepairStates.stream() + .sorted(Comparator.comparingLong(VnodeRepairState::lastRepairedAt)) + .collect(Collectors.toList()); + + List sortedRepairGroups = new ArrayList<>(); + Set> countedReplicaGroups = new HashSet<>(); + + for (VnodeRepairState vnodeRepairState : sortedVnodeRepairStates) + { + ImmutableSet replicas = vnodeRepairState.getReplicas(); + + if (countedReplicaGroups.add(replicas)) + { + List vnodesForReplicas = availableVnodeRepairStates.stream() + .filter(v -> v.getReplicas().equals(replicas)).collect(Collectors.toList()); + RepairedAt repairedAt = RepairedAt.generate(vnodesForReplicas); + List commonVnodes = vnodesForReplicas.stream() + .map(VnodeRepairState::getTokenRange) + .collect(Collectors.toList()); + + sortedRepairGroups.add(new ReplicaRepairGroup(replicas, ImmutableList.copyOf(commonVnodes), + repairedAt.getMinRepairedAt())); + } + } + + return sortedRepairGroups; + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairStateFactoryImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairStateFactoryImpl.java new file mode 100644 index 00000000..c02dd87a --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairStateFactoryImpl.java @@ -0,0 +1,242 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairEntry; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairHistoryProvider; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairStateSnapshot; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairStateFactory; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairStates; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairStatus; +import com.google.common.collect.ImmutableSet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * A repair state factory which uses a {@link RepairHistoryProvider} to determine repair state. + */ +public class VnodeRepairStateFactoryImpl implements VnodeRepairStateFactory +{ + private static final Logger LOG = LoggerFactory.getLogger(VnodeRepairStateFactoryImpl.class); + + private final ReplicationState myReplicationState; + private final RepairHistoryProvider myRepairHistoryProvider; + private final boolean useSubRanges; + + public VnodeRepairStateFactoryImpl( + final ReplicationState replicationState, + final RepairHistoryProvider repairHistoryProvider, + final boolean toUseSubRanges) + { + myReplicationState = replicationState; + myRepairHistoryProvider = repairHistoryProvider; + this.useSubRanges = toUseSubRanges; + } + + /** + * {@inheritDoc} + */ + @Override + public VnodeRepairStates calculateNewState( + final Node node, + final TableReference tableReference, final RepairStateSnapshot previous, + final long iterateToTime) + { + Map> tokenRangeToReplicaMap + = myReplicationState.getTokenRangeToReplicas(tableReference, node); + long lastRepairedAt = previousLastRepairedAt(previous, tokenRangeToReplicaMap); + + Iterator repairEntryIterator; + + if (lastRepairedAt == VnodeRepairState.UNREPAIRED) + { + LOG.debug("No last repaired at found for {}, iterating over all repair entries", tableReference); + repairEntryIterator = myRepairHistoryProvider.iterate(node, tableReference, iterateToTime, + (repairEntry) -> acceptRepairEntries(repairEntry, tokenRangeToReplicaMap)); + } + else + { + LOG.debug("Table {} snapshot created at {}, iterating repair entries until that time", tableReference, + previous.getCreatedAt()); + repairEntryIterator = myRepairHistoryProvider.iterate(node, tableReference, iterateToTime, + previous.getCreatedAt(), (repairEntry) -> acceptRepairEntries(repairEntry, tokenRangeToReplicaMap)); + } + + return generateVnodeRepairStates(lastRepairedAt, previous, repairEntryIterator, tokenRangeToReplicaMap); + } + + /** + * {@inheritDoc} + */ + @Override + public VnodeRepairStates calculateClusterWideState( + final Node node, + final TableReference tableReference, + final long to, + final long from) + { + Map> tokenRanges = myReplicationState.getTokenRanges(tableReference, node); + Set allNodes = new HashSet<>(); + tokenRanges.values().forEach(n -> allNodes.addAll(n)); + List allRepairEntries = new ArrayList<>(); + for (DriverNode driverNode : allNodes) + { + Iterator repairEntryIterator = + myRepairHistoryProvider.iterate(driverNode.getNode(), + tableReference, to, from, (repairEntry) -> acceptRepairEntries(repairEntry, tokenRanges)); + while (repairEntryIterator.hasNext()) + { + RepairEntry repairEntry = repairEntryIterator.next(); + allRepairEntries.add(repairEntry); + } + } + return generateVnodeRepairStates(VnodeRepairState.UNREPAIRED, null, allRepairEntries.iterator(), tokenRanges); + } + + private VnodeRepairStates generateVnodeRepairStates(final long lastRepairedAt, + final RepairStateSnapshot previous, + final Iterator repairEntryIterator, + final Map> + tokenRangeToReplicaMap) + { + List vnodeRepairStatesBase = new ArrayList<>(); + + for (Map.Entry> entry : tokenRangeToReplicaMap.entrySet()) + { + LongTokenRange longTokenRange = entry.getKey(); + ImmutableSet replicas = entry.getValue(); + vnodeRepairStatesBase.add(new VnodeRepairState(longTokenRange, replicas, lastRepairedAt)); + } + + VnodeRepairStates.Builder vnodeRepairStatusesBuilder; + if (useSubRanges) + { + vnodeRepairStatusesBuilder = SubRangeRepairStates.newBuilder(vnodeRepairStatesBase); + } + else + { + vnodeRepairStatusesBuilder = VnodeRepairStatesImpl.newBuilder(vnodeRepairStatesBase); + } + + if (previous != null) + { + vnodeRepairStatusesBuilder.updateVnodeRepairStates(previous.getVnodeRepairStates().getVnodeRepairStates()); + } + + while (repairEntryIterator.hasNext()) + { + RepairEntry repairEntry = repairEntryIterator.next(); + LongTokenRange longTokenRange = repairEntry.getRange(); + ImmutableSet replicas = getReplicasForRange(longTokenRange, tokenRangeToReplicaMap); + + VnodeRepairState vnodeRepairState = new VnodeRepairState(longTokenRange, + replicas, repairEntry.getStartedAt(), repairEntry.getFinishedAt()); + + vnodeRepairStatusesBuilder.updateVnodeRepairState(vnodeRepairState); + } + + return vnodeRepairStatusesBuilder.build(); + } + + private long previousLastRepairedAt(final RepairStateSnapshot previous, + final Map> tokenToReplicaMap) + { + if (previous == null) + { + return VnodeRepairState.UNREPAIRED; + } + + long defaultUsedLastRepairedAt = previous.lastCompletedAt(); + + long lastRepairedAt = Long.MAX_VALUE; + + for (VnodeRepairState vnodeRepairState : previous.getVnodeRepairStates().getVnodeRepairStates()) + { + if (tokenToReplicaMap.containsKey(vnodeRepairState.getTokenRange()) + && lastRepairedAt > vnodeRepairState.lastRepairedAt()) + { + lastRepairedAt = vnodeRepairState.lastRepairedAt(); + } + } + + if (lastRepairedAt == VnodeRepairState.UNREPAIRED) + { + return defaultUsedLastRepairedAt; + } + + return lastRepairedAt == Long.MAX_VALUE ? VnodeRepairState.UNREPAIRED : lastRepairedAt; + } + + private boolean acceptRepairEntries(final RepairEntry repairEntry, + final Map> tokenRangeToReplicaMap) + { + if (RepairStatus.SUCCESS != repairEntry.getStatus()) + { + LOG.debug("Ignoring entry {}, repair was not successful", repairEntry); + return false; + } + + LongTokenRange repairedRange = repairEntry.getRange(); + + ImmutableSet nodes = getReplicasForRange(repairedRange, tokenRangeToReplicaMap); + if (nodes == null) + { + LOG.trace("Ignoring entry {}, replicas not present in tokenRangeToReplicas", repairEntry); + return false; + } + + if (!nodes.equals(repairEntry.getParticipants())) + { + LOG.debug("Ignoring entry {}, replicas {} not matching participants", repairEntry, nodes); + return false; + } + + return true; + } + + private ImmutableSet getReplicasForRange(final LongTokenRange range, + final Map> + tokenRangeToReplicaMap) + { + ImmutableSet nodes = tokenRangeToReplicaMap.get(range); + if (nodes == null && useSubRanges) + { + for (Map.Entry> vnode : tokenRangeToReplicaMap.entrySet()) + { + if (vnode.getKey().isCovering(range)) + { + nodes = vnode.getValue(); + break; + } + } + } + + return nodes; + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairStateSummarizer.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairStateSummarizer.java new file mode 100644 index 00000000..e6b0da92 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairStateSummarizer.java @@ -0,0 +1,234 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode; + +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +/** + * Utility class to handle partially repaired ranges and converting them back + * to full vnodes when possible in order to minimize memory usage. + */ +public final class VnodeRepairStateSummarizer +{ + private static final long ONE_HOUR_IN_MS = TimeUnit.HOURS.toMillis(1); + + private final NormalizedBaseRange myBaseVnode; + private final List mySummarizedRanges; + private final MergeStrategy myMergeStrategy; + + private VnodeRepairStateSummarizer(final VnodeRepairState baseVnode, + final Collection subStates, + final MergeStrategy mergeStrategy) + { + this.myBaseVnode = new NormalizedBaseRange(baseVnode); + this.mySummarizedRanges = subStates.stream() + .map(myBaseVnode::transform) + .sorted() + .collect(Collectors.toCollection(ArrayList::new)); + this.myMergeStrategy = mergeStrategy; + + // Add the full range first so that we can split out any sub ranges that we are missing + mySummarizedRanges.add(0, myBaseVnode.transform(baseVnode)); + } + + /** + * Summarize vnode repair states based on actual vnode data. + *

+ * Generates virtual node repair states based on the partial vnodes repaired. + * If there are partial ranges not covered the base vnode repair state will + * be filled in there. + *

+ * In case of overlapping ranges the ranges will be split in three parts like:
+ * (5, 15], (8, 30] will become (5, 8], (8, 15], (15, 30].
+ * The middle section will retain the highest repaired at of the two. + *

+ * Adjacent ranges repaired within one hour will be merged together. + * + * @param baseVnodes The base vnode set retrieved from the keyspace replication. + * @param partialVnodes The repaired vnodes that can be sub-ranges of the base vnodes. + * @return The summarized virtual node states. + */ + public static List summarizePartialVnodes(final List baseVnodes, + final Collection partialVnodes) + { + return summarizePartialVnodes(baseVnodes, partialVnodes, VnodeRepairStateSummarizer::isCloseInTime); + } + + /** + * Summarize vnode repair states based on actual vnode data. + *

+ * Generates virtual node repair states based on the partial vnodes repaired. + * If there are partial ranges not covered the base vnode repair state will + * be filled in there. + *

+ * In case of overlapping ranges the ranges will be split in three parts like:
+ * (5, 15], (8, 30] will become (5, 8], (8, 15], (15, 30].
+ * The middle section will retain the highest repaired at of the two. + *

+ * Adjacent ranges will be merged based on the provided merge strategy. + * + * @param baseVnodes The base vnode set retrieved from the keyspace replication. + * @param partialVnodes The repaired vnodes that can be sub-ranges of the base vnodes. + * @param mergeStrategy The merge strategy to use. + * @return The summarized virtual node states. + */ + public static List summarizePartialVnodes(final List baseVnodes, + final Collection partialVnodes, + final MergeStrategy mergeStrategy) + { + List vnodeRepairStates = new ArrayList<>(partialVnodes); + + for (VnodeRepairState baseState : baseVnodes) + { + List covering = new ArrayList<>(); + for (VnodeRepairState actualState : vnodeRepairStates) + { + if (baseState.getTokenRange().isCovering(actualState.getTokenRange())) + { + covering.add(actualState); + } + } + if (covering.isEmpty()) + { + vnodeRepairStates.add(baseState); + } + else + { + List replacement = new VnodeRepairStateSummarizer(baseState, + covering, mergeStrategy).summarize(); + vnodeRepairStates.removeAll(covering); + vnodeRepairStates.addAll(replacement); + } + } + + return vnodeRepairStates; + } + + public List summarize() + { + splitOverlapping(); + + int i = 0; + for (; i < mySummarizedRanges.size() - 1; i++) + { + NormalizedRange current = mySummarizedRanges.get(i); + NormalizedRange next = mySummarizedRanges.get(i + 1); + + if (myMergeStrategy.shouldMerge(current, next)) + { + // If two vnodes are close in time we merge them together using + // the lowest timestamp of the two + mySummarizedRanges.add(i, current.combine(next)); + + mySummarizedRanges.remove(current); + mySummarizedRanges.remove(next); + + // Check the newly generated vnode since it might be possible + // to merge it again + i--; + } + } + + return mySummarizedRanges.stream() + .map(myBaseVnode::transform) + .collect(Collectors.toList()); + } + + private void splitOverlapping() + { + int i = 0; + for (; i < mySummarizedRanges.size() - 1; i++) + { + NormalizedRange current = mySummarizedRanges.get(i); + NormalizedRange next = mySummarizedRanges.get(i + 1); + + if (current.isCovering(next)) + { + splitCoveringRange(current, next); + i--; + } + else if (current.end().compareTo(next.start()) > 0) + { + // Replace e.g. "(5, 15], (8, 30]" with "(5, 8], (8, 15], (15, 30]" + // The middle section (8, 15] gets the highest "repaired at" of the two overlapping ranges + mySummarizedRanges.remove(current); + mySummarizedRanges.remove(next); + + insertSorted(current.mutateEnd(next.start()), mySummarizedRanges); + insertSorted(current.splitEnd(next), mySummarizedRanges); + insertSorted(next.mutateStart(current.end()), mySummarizedRanges); + i--; + } + } + } + + private void splitCoveringRange(final NormalizedRange covering, final NormalizedRange covered) + { + if (covering.getStartedAt() >= covered.getStartedAt()) + { + // We already cover the sub range with a later repaired at, remove it + mySummarizedRanges.remove(covered); + } + else + { + // Since the covering range is repaired earlier than the covered range + // we replace the covering range with smaller ranges around the covered + // range. The covered range is already in place in the list so there + // is no need to modify it. + mySummarizedRanges.remove(covering); + + if (covering.start().compareTo(covered.start()) != 0) + { + insertSorted(covering.mutateEnd(covered.start()), mySummarizedRanges); + } + if (covering.end().compareTo(covered.end()) != 0) + { + insertSorted(covering.mutateStart(covered.end()), mySummarizedRanges); + } + } + } + + private static void insertSorted(final NormalizedRange toInsert, final List collection) + { + int index = Collections.binarySearch(collection, toInsert); + + if (index < 0) + { + index = (-index) - 1; + } + + collection.add(index, toInsert); + } + + private static boolean isCloseInTime(final NormalizedRange v1, final NormalizedRange v2) + { + return Math.abs(v1.getStartedAt() - v2.getStartedAt()) < ONE_HOUR_IN_MS; + } + + /** + * A merge strategy for adjacent sub ranges. + */ + public interface MergeStrategy + { + boolean shouldMerge(NormalizedRange range1, NormalizedRange range2); + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairStatesImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairStatesImpl.java new file mode 100644 index 00000000..1a80d715 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairStatesImpl.java @@ -0,0 +1,160 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode; + +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairStates; +import com.google.common.collect.ImmutableList; + +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Objects; + +public final class VnodeRepairStatesImpl implements VnodeRepairStates // CPD-OFF +{ + private final ImmutableList myVnodeRepairStatuses; + + private VnodeRepairStatesImpl(final Builder builder) + { + myVnodeRepairStatuses = ImmutableList.copyOf(builder.myVnodeRepairStates.values()); + } + + @Override + public Collection getVnodeRepairStates() + { + return myVnodeRepairStatuses; + } + + @Override + public VnodeRepairStatesImpl combineWithRepairedAt(final long repairedAt) + { + Builder builder = newBuilder(getVnodeRepairStates()); + + for (VnodeRepairState vnodeRepairState : getVnodeRepairStates()) + { + VnodeRepairState vnodeRepairStateWithRepairedAt = new VnodeRepairState(vnodeRepairState.getTokenRange(), + vnodeRepairState.getReplicas(), repairedAt); + builder.updateVnodeRepairState(vnodeRepairStateWithRepairedAt); + } + + return builder.build(); + } + + @Override + public String toString() + { + return myVnodeRepairStatuses.toString(); + } + + public static Builder newBuilder(final Collection vnodeRepairStates) + { + return new Builder(vnodeRepairStates); + } + + @Override + public boolean equals(final Object o) + { + if (this == o) + { + return true; + } + if (o == null || getClass() != o.getClass()) + { + return false; + } + VnodeRepairStatesImpl that = (VnodeRepairStatesImpl) o; + return Objects.equals(myVnodeRepairStatuses, that.myVnodeRepairStatuses); + } + + @Override + public int hashCode() + { + return Objects.hash(myVnodeRepairStatuses); + } + + public static class Builder implements VnodeRepairStates.Builder + { + private final Map myVnodeRepairStates = new LinkedHashMap<>(); + + public Builder(final Collection vnodeRepairStates) + { + for (VnodeRepairState vnodeRepairState : vnodeRepairStates) + { + myVnodeRepairStates.put(vnodeRepairState.getTokenRange(), vnodeRepairState); + } + } + + /** + * Update vNode repair states. + * + * @return Builder + */ + @Override + public Builder updateVnodeRepairStates(final Collection vnodeRepairStates) + { + for (VnodeRepairState vnodeRepairState : vnodeRepairStates) + { + updateVnodeRepairState(vnodeRepairState); + } + return this; + } + + /** + * Update vNode repair state. + * + * @return Builder + */ + @Override + public Builder updateVnodeRepairState(final VnodeRepairState vnodeRepairState) + { + VnodeRepairState oldVnode = myVnodeRepairStates.get(vnodeRepairState.getTokenRange()); + if (shouldReplace(oldVnode, vnodeRepairState)) + { + myVnodeRepairStates.put(vnodeRepairState.getTokenRange(), vnodeRepairState); + } + return this; + } + + /** + * Build vNode repair state. + * + * @return VnodeRepairStatesImpl + */ + @Override + public VnodeRepairStatesImpl build() + { + return new VnodeRepairStatesImpl(this); + } + + private boolean shouldReplace(final VnodeRepairState oldVnode, final VnodeRepairState newVnode) + { + if (oldVnode == null) + { + return false; + } + + if (!oldVnode.isSameVnode(newVnode)) + { + return false; + } + + return oldVnode.lastRepairedAt() < newVnode.lastRepairedAt() + || oldVnode.getFinishedAt() < newVnode.getFinishedAt(); + } + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairTask.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairTask.java new file mode 100644 index 00000000..97a92e2b --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/VnodeRepairTask.java @@ -0,0 +1,181 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.RepairTask; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxy; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairOptions; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairHistory; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.data.repairhistory.RepairHistoryService; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairStatus; +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.ScheduledJobException; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.stream.Collectors; + +public class VnodeRepairTask extends RepairTask +{ + private static final Logger LOG = LoggerFactory.getLogger(VnodeRepairTask.class); + private final ConcurrentMap myRepairSessions = + new ConcurrentHashMap<>(); + private final Set myTokenRanges; + private final Set myReplicas; + private volatile Set myUnknownRanges; + + public VnodeRepairTask(final Node currentNode, final DistributedJmxProxyFactory jmxProxyFactory, + final TableReference tableReference, + final RepairConfiguration repairConfiguration, final TableRepairMetrics tableRepairMetrics, + final RepairHistory repairHistory, final Set tokenRanges, final Set replicas, + final UUID jobId) + { + super(currentNode.getHostId(), jmxProxyFactory, tableReference, repairConfiguration, tableRepairMetrics); + myTokenRanges = Preconditions.checkNotNull(tokenRanges, "Token ranges must be set"); + myReplicas = Preconditions.checkNotNull(replicas, "Replicas must be set"); + for (LongTokenRange range : myTokenRanges) + { + myRepairSessions.put(range, repairHistory.newSession(currentNode, tableReference, jobId, range, + myReplicas)); + } + } + + @Override + protected final void onExecute() + { + myRepairSessions.values().forEach(RepairHistoryService.RepairSession::start); + } + + @Override + protected final void onFinish(final RepairStatus repairStatus) + { + if (repairStatus.equals(RepairStatus.FAILED)) + { + Set unrepairedRanges = new HashSet<>(); + if (myUnknownRanges != null) + { + unrepairedRanges.addAll(myUnknownRanges); + } + unrepairedRanges.addAll(getFailedRanges()); + LOG.warn("Unable to repair '{}', affected ranges: '{}'", this, unrepairedRanges); + } + myRepairSessions.values().forEach(rs -> rs.finish(repairStatus)); + myRepairSessions.clear(); + } + + @Override + protected final void verifyRepair(final DistributedJmxProxy proxy) throws ScheduledJobException + { + Set completedRanges = Sets.union(getFailedRanges(), getSuccessfulRanges()); + Set unknownRanges = Sets.difference(myTokenRanges, completedRanges); + if (!unknownRanges.isEmpty()) + { + LOG.debug("Unknown ranges: {}", unknownRanges); + LOG.debug("Completed ranges: {}", completedRanges); + myUnknownRanges = Collections.unmodifiableSet(unknownRanges); + proxy.forceTerminateAllRepairSessions(); + throw new ScheduledJobException(String.format("Unknown status of some ranges for %s", this)); + } + super.verifyRepair(proxy); + } + + @Override + protected final Map getOptions() + { + Map options = new HashMap<>(); + options.put(RepairOptions.PARALLELISM_KEY, getRepairConfiguration().getRepairParallelism().getName()); + options.put(RepairOptions.PRIMARY_RANGE_KEY, Boolean.toString(false)); + options.put(RepairOptions.COLUMNFAMILIES_KEY, getTableReference().getTable()); + options.put(RepairOptions.INCREMENTAL_KEY, Boolean.toString(false)); + + StringBuilder rangesStringBuilder = new StringBuilder(); + for (LongTokenRange range : myTokenRanges) + { + rangesStringBuilder.append(range.start).append(':').append(range.end).append(','); + } + options.put(RepairOptions.RANGES_KEY, rangesStringBuilder.toString()); + String replicasString = myReplicas.stream().map(host -> host.getPublicAddress().getHostAddress()) + .collect(Collectors.joining(",")); + options.put(RepairOptions.HOSTS_KEY, replicasString); + return options; + } + + @Override + protected final void onRangeFinished(final LongTokenRange range, final RepairStatus repairStatus) + { + super.onRangeFinished(range, repairStatus); + RepairHistoryService.RepairSession repairSession = myRepairSessions.remove(range); + if (repairSession == null) + { + LOG.error("{}: Finished range {} - but not included in the known repair sessions {}, all ranges are {}", + this, + range, + myRepairSessions.keySet(), + myTokenRanges); + } + else + { + repairSession.finish(repairStatus); + } + } + + /** + * String representation. + * + * @return String + */ + @Override + public String toString() + { + return String.format("Vnode repairTask of %s", getTableReference()); + } + + @VisibleForTesting + final Collection getUnknownRanges() + { + return myUnknownRanges; + } + + @VisibleForTesting + final Set getTokenRanges() + { + return Sets.newLinkedHashSet(myTokenRanges); + } + + @VisibleForTesting + final Set getReplicas() + { + return Sets.newHashSet(myReplicas); + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/package-info.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/package-info.java new file mode 100644 index 00000000..2fbd63bc --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains the implementations and resources for vnode repairs. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode; diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/TableRepairJob.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/TableRepairJob.java new file mode 100644 index 00000000..af26e341 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/TableRepairJob.java @@ -0,0 +1,502 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.table; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.RepairGroup; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledRepairJob; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledRepairJobView; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledTask; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairState; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairStateSnapshot; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicaRepairGroup; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairStates; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairPolicy; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableStorageStates; +import com.ericsson.bss.cassandra.ecchronos.data.repairhistory.RepairHistoryService; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; + +/** + * A scheduled job that keeps track of the repair status of a single table. The table is considered repaired for this + * node if all the ranges this node is responsible for is repaired within the minimum run interval. + *

+ * When run this job will create {@link com.ericsson.bss.cassandra.ecchronos.core.impl.repair.RepairTask RepairTasks} that repairs the table. + */ +public class TableRepairJob extends ScheduledRepairJob +{ + private static final Logger LOG = LoggerFactory.getLogger(TableRepairJob.class); + private static final int DAYS_IN_A_WEEK = 7; + private final Node myNode; + private final RepairState myRepairState; + private final TableStorageStates myTableStorageStates; + private final RepairHistoryService myRepairHistory; + + TableRepairJob(final Builder builder) + { + super(builder.configuration, builder.tableReference.getId(), builder.tableReference, builder.jmxProxyFactory, + builder.repairConfiguration, builder.repairPolicies, + builder.tableRepairMetrics); + myNode = Preconditions.checkNotNull(builder.myNode, + "Node must be set"); + myRepairState = Preconditions.checkNotNull(builder.repairState, + "Repair state must be set"); + myTableStorageStates = builder.tableStorageStates; + myRepairHistory = Preconditions.checkNotNull(builder.repairHistory, + "Repair history must be set"); + } + + /** + * Get scheduled repair job view. + * + * @return ScheduledRepairJobView + */ + @Override + public ScheduledRepairJobView getView() + { + long now = System.currentTimeMillis(); + return new ScheduledRepairJobView(getId(), getTableReference(), getRepairConfiguration(), + myRepairState.getSnapshot(), + getStatus(now), getProgress(now), getNextRunInMs(), getRepairConfiguration().getRepairType()); + } + + private long getNextRunInMs() + { + return (getLastSuccessfulRun() + getRepairConfiguration().getRepairIntervalInMs()) - getRunOffset(); + } + + private double getProgress(final long timestamp) + { + long interval = getRepairConfiguration().getRepairIntervalInMs(); + Collection states = myRepairState.getSnapshot().getVnodeRepairStates().getVnodeRepairStates(); + + long nRepaired = states.stream() + .filter(isRepaired(timestamp, interval)) + .count(); + + return states.isEmpty() + ? 0 + : (double) nRepaired / states.size(); + } + + private Predicate isRepaired(final long timestamp, final long interval) + { + return state -> timestamp - state.lastRepairedAt() <= interval; + } + + private ScheduledRepairJobView.Status getStatus(final long timestamp) + { + if (getRealPriority() != -1 && !super.runnable()) + { + return ScheduledRepairJobView.Status.BLOCKED; + } + long repairedAt = myRepairState.getSnapshot().lastCompletedAt(); + long msSinceLastRepair = timestamp - repairedAt; + RepairConfiguration config = getRepairConfiguration(); + + if (msSinceLastRepair >= config.getRepairErrorTimeInMs()) + { + return ScheduledRepairJobView.Status.OVERDUE; + } + if (msSinceLastRepair >= config.getRepairWarningTimeInMs()) + { + return ScheduledRepairJobView.Status.LATE; + } + if (msSinceLastRepair >= (config.getRepairIntervalInMs() - getRunOffset())) + { + return ScheduledRepairJobView.Status.ON_TIME; + } + return ScheduledRepairJobView.Status.COMPLETED; + } + + /** + * Iterator for scheduled tasks built up by repair groups. + * + * @return Scheduled task iterator + */ + @Override + public Iterator iterator() + { + RepairStateSnapshot repairStateSnapshot = myRepairState.getSnapshot(); + if (repairStateSnapshot.canRepair()) + { + List taskList = new ArrayList<>(); + + BigInteger tokensPerRepair = getTokensPerRepair(repairStateSnapshot.getVnodeRepairStates()); + + for (ReplicaRepairGroup replicaRepairGroup : repairStateSnapshot.getRepairGroups()) + { + RepairGroup.Builder builder = RepairGroup.newBuilder() + .withTableReference(getTableReference()) + .withRepairConfiguration(getRepairConfiguration()) + .withReplicaRepairGroup(replicaRepairGroup) + .withJmxProxyFactory(getJmxProxyFactory()) + .withTableRepairMetrics(getTableRepairMetrics()) + .withTokensPerRepair(tokensPerRepair) + .withRepairPolicies(getRepairPolicies()) + .withRepairHistory(myRepairHistory) + .withJobId(getId()) + .withNode(myNode); + + taskList.add(builder.build(getRealPriority(replicaRepairGroup.getLastCompletedAt()))); + } + + return taskList.iterator(); + } + else + { + return Collections.emptyIterator(); + } + } + + /** + * Update the state and set if the task was successful. + * + * @param successful + * If the job ran successfully. + */ + @Override + public void postExecute(final boolean successful) + { + try + { + myRepairState.update(); + } + catch (Exception e) + { + LOG.warn("Unable to check repair history, {}", this, e); + } + + super.postExecute(successful); + } + + /** + * Get last successful run. + * + * @return long + */ + @Override + public long getLastSuccessfulRun() + { + return myRepairState.getSnapshot().lastCompletedAt(); + } + + /** + * Get run offset. + * + * @return long + */ + @Override + public long getRunOffset() + { + return myRepairState.getSnapshot().getEstimatedRepairTime(); + } + + /** + * Runnable. + * + * @return boolean + */ + @Override + public boolean runnable() + { + return myRepairState.getSnapshot().canRepair() && super.runnable(); + } + + /** + * Refresh the repair state. + */ + @Override + public void refreshState() + { + try + { + myRepairState.update(); + } + catch (Exception e) + { + LOG.warn("Unable to check repair history, {}", this, e); + } + } + + /** + * Calculate real priority based on available tasks. + * @return priority + */ + @Override + public final int getRealPriority() + { + RepairStateSnapshot repairStateSnapshot = myRepairState.getSnapshot(); + int priority = -1; + if (repairStateSnapshot.canRepair()) + { + long minRepairedAt = System.currentTimeMillis(); + for (ReplicaRepairGroup replicaRepairGroup : repairStateSnapshot.getRepairGroups()) + { + long replicaGroupCompletedAt = replicaRepairGroup.getLastCompletedAt(); + if (replicaGroupCompletedAt < minRepairedAt) + { + minRepairedAt = replicaGroupCompletedAt; + } + } + priority = getRealPriority(minRepairedAt); + } + return priority; + } + + /** + * String representation. + * + * @return String + */ + @Override + public String toString() + { + return String.format("Repair job of %s", getTableReference()); + } + + private BigInteger getTokensPerRepair(final VnodeRepairStates vnodeRepairStates) + { + BigInteger tokensPerRepair = LongTokenRange.FULL_RANGE; + + if (getRepairConfiguration().getTargetRepairSizeInBytes() != RepairConfiguration.FULL_REPAIR_SIZE) + { + BigInteger tableSizeInBytes = BigInteger.valueOf(myTableStorageStates.getDataSize(myNode.getHostId(), getTableReference())); + + if (!BigInteger.ZERO.equals(tableSizeInBytes)) + { + BigInteger fullRangeSize = vnodeRepairStates.getVnodeRepairStates().stream() + .map(VnodeRepairState::getTokenRange) + .map(LongTokenRange::rangeSize) + .reduce(BigInteger.ZERO, BigInteger::add); + + BigInteger targetSizeInBytes = BigInteger.valueOf( + getRepairConfiguration().getTargetRepairSizeInBytes()); + + if (tableSizeInBytes.compareTo(targetSizeInBytes) > 0) + { + BigInteger targetRepairs = tableSizeInBytes.divide(targetSizeInBytes); + tokensPerRepair = fullRangeSize.divide(targetRepairs); + } + } + } + + return tokensPerRepair; + } + + @Override + public final boolean equals(final Object o) + { + if (this == o) + { + return true; + } + if (o == null || getClass() != o.getClass()) + { + return false; + } + if (!super.equals(o)) + { + return false; + } + TableRepairJob that = (TableRepairJob) o; + return Objects.equals(myRepairState, that.myRepairState) && Objects.equals(myTableStorageStates, + that.myTableStorageStates) && Objects.equals(myRepairHistory, that.myRepairHistory); + } + + @Override + public final int hashCode() + { + return Objects.hash(super.hashCode(), myRepairState, myTableStorageStates, myRepairHistory); + } + + @SuppressWarnings("VisibilityModifier") + public static class Builder + { + Configuration configuration = new ConfigurationBuilder() + .withPriority(Priority.LOW) + .withRunInterval(DAYS_IN_A_WEEK, TimeUnit.DAYS) + .build(); + private Node myNode; + private TableReference tableReference; + private DistributedJmxProxyFactory jmxProxyFactory; + private RepairState repairState; + private TableRepairMetrics tableRepairMetrics = null; + private RepairConfiguration repairConfiguration = RepairConfiguration.DEFAULT; + private TableStorageStates tableStorageStates; + private final List repairPolicies = new ArrayList<>(); + private RepairHistoryService repairHistory; + + /** + * Build table repair job with configuration. + * + * @param theConfiguration + * Configuration. + * @return Builder + */ + public Builder withConfiguration(final Configuration theConfiguration) + { + this.configuration = theConfiguration; + return this; + } + + /** + * Build table repair job with table reference. + * + * @param theTableReference + * Table reference. + * @return Builder + */ + public Builder withTableReference(final TableReference theTableReference) + { + this.tableReference = theTableReference; + return this; + } + + /** + * Build with configuration. + * + * @param node + * Node. + * @return Builder + */ + public Builder withNode(final Node node) + { + myNode = node; + return this; + } + + /** + * Build table repair job with JMX proxy factory. + * + * @param aJMXProxyFactory + * JMX proxy factory. + * @return Builder + */ + public Builder withJmxProxyFactory(final DistributedJmxProxyFactory aJMXProxyFactory) + { + this.jmxProxyFactory = aJMXProxyFactory; + return this; + } + + /** + * Build table repair job with repair state. + * + * @param theRepairState + * Repair state. + * @return Builder + */ + public Builder withRepairState(final RepairState theRepairState) + { + this.repairState = theRepairState; + return this; + } + + /** + * Build table repair job with table repair metrics. + * + * @param theTableRepairMetrics + * Table repair metrics. + * @return Builder + */ + public Builder withTableRepairMetrics(final TableRepairMetrics theTableRepairMetrics) + { + this.tableRepairMetrics = theTableRepairMetrics; + return this; + } + + /** + * Build table repair job with repair configuration. + * + * @param theRepairConfiguration + * The repair confiuration. + * @return Builder + */ + public Builder withRepairConfiguration(final RepairConfiguration theRepairConfiguration) + { + this.repairConfiguration = theRepairConfiguration; + return this; + } + + /** + * Build table repair job with table storage states. + * + * @param theTableStorageStates + * Table storage states. + * @return Builder + */ + public Builder withTableStorageStates(final TableStorageStates theTableStorageStates) + { + this.tableStorageStates = theTableStorageStates; + return this; + } + + /** + * Build table repair job with repair policies. + * + * @param tableRepairPolicies + * The table repair policies. + * @return Builder + */ + public Builder withRepairPolices(final Collection tableRepairPolicies) + { + this.repairPolicies.addAll(tableRepairPolicies); + return this; + } + + /** + * Build table repair job with repair history. + * + * @param aRepairHistory + * Repair history. + * @return Builder + */ + public Builder withRepairHistory(final RepairHistoryService aRepairHistory) + { + this.repairHistory = aRepairHistory; + return this; + } + + /** + * Build table repair job. + * + * @return TableRepairJob + */ + public TableRepairJob build() + { + Preconditions.checkNotNull(tableReference, "Table reference must be set"); + + return new TableRepairJob(this); + } + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/TableStorageStatesImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/TableStorageStatesImpl.java new file mode 100644 index 00000000..5c53d26c --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/TableStorageStatesImpl.java @@ -0,0 +1,239 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.table; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxy; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.table.ReplicatedTableProvider; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableStorageStates; +import java.io.Closeable; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; + +public final class TableStorageStatesImpl implements TableStorageStates, Closeable +{ + private static final Logger LOG = LoggerFactory.getLogger(TableStorageStatesImpl.class); + + private static final long DEFAULT_UPDATE_DELAY_IN_MS = TimeUnit.SECONDS.toMillis(60); + + private final AtomicReference>> myTableSizes = new AtomicReference<>(); + private final ScheduledExecutorService myScheduledExecutorService; + + private final ReplicatedTableProvider myReplicatedTableProvider; + private final DistributedJmxProxyFactory myJmxProxyFactory; + private final DistributedNativeConnectionProvider myNativeConnectionProvider; + + private TableStorageStatesImpl(final Builder builder) + { + myReplicatedTableProvider = builder.myReplicatedTableProvider; + myJmxProxyFactory = builder.myJmxProxyFactory; + myNativeConnectionProvider = builder.myNativeConnectionProvider; + + initializeEmptyTableSizeMap(); + + myScheduledExecutorService = Executors.newSingleThreadScheduledExecutor( + new ThreadFactoryBuilder().setNameFormat("TableStateUpdater-%d").build()); + myScheduledExecutorService.scheduleAtFixedRate(this::updateTableStates, + builder.myInitialDelayInMs, + builder.myUpdateDelayInMs, + TimeUnit.MILLISECONDS); + } + + private void initializeEmptyTableSizeMap() + { + for (Node node : myNativeConnectionProvider.getNodes()) + { + UUID nodeID = node.getHostId(); + initializeEmptyNodeMap(nodeID); + } + } + + private void initializeEmptyNodeMap(final UUID nodeID) + { + Map> emptyNewEntry = new HashMap<>(); + Map emptyDataSizes = new HashMap<>(); + ImmutableMap emptyTableSize = ImmutableMap.copyOf(emptyDataSizes); + emptyNewEntry.put(nodeID, emptyTableSize); + myTableSizes.set(emptyNewEntry); + } + + @Override + public long getDataSize(final UUID nodeID, final TableReference tableReference) + { + Map> dataSizes = myTableSizes.get(); + + if (!dataSizes.containsKey(nodeID)) + { + initializeEmptyNodeMap(nodeID); + } + + if (dataSizes.get(nodeID).containsKey(tableReference)) + { + return dataSizes.get(nodeID).get(tableReference); + } + return 0; + } + + @Override + public long getDataSize(final UUID nodeID) + { + Map> dataSizesMap = myTableSizes.get(); + + if (!dataSizesMap.containsKey(nodeID)) + { + initializeEmptyNodeMap(nodeID); + } + + ImmutableMap dataSizes = dataSizesMap.get(nodeID); + + if (dataSizes != null && !dataSizes.isEmpty()) + { + return dataSizes.values().stream().mapToLong(e -> e).sum(); + } + + return 0; + } + + @Override + public void close() + { + myScheduledExecutorService.shutdown(); + + myTableSizes.set(null); + } + + public static Builder builder() + { + return new Builder(); + } + + public static class Builder + { + private ReplicatedTableProvider myReplicatedTableProvider; + private DistributedJmxProxyFactory myJmxProxyFactory; + private DistributedNativeConnectionProvider myNativeConnectionProvider; + + private long myInitialDelayInMs = 0; + private long myUpdateDelayInMs = DEFAULT_UPDATE_DELAY_IN_MS; + + public final Builder withReplicatedTableProvider(final ReplicatedTableProvider replicatedTableProvider) + { + myReplicatedTableProvider = replicatedTableProvider; + return this; + } + + public final Builder withJmxProxyFactory(final DistributedJmxProxyFactory jmxProxyFactory) + { + myJmxProxyFactory = jmxProxyFactory; + return this; + } + + public final Builder withConnectionProvider(final DistributedNativeConnectionProvider nativeConnectionProvider) + { + myNativeConnectionProvider = nativeConnectionProvider; + return this; + } + + public final Builder withInitialDelay(final long initialDelay, final TimeUnit timeUnit) + { + myInitialDelayInMs = timeUnit.toMillis(initialDelay); + return this; + } + + public final Builder withUpdateDelay(final long updateDelay, final TimeUnit timeUnit) + { + myUpdateDelayInMs = timeUnit.toMillis(updateDelay); + return this; + } + + public final TableStorageStatesImpl build() + { + if (myReplicatedTableProvider == null) + { + throw new IllegalArgumentException("Replicated table provider cannot be null"); + } + + if (myJmxProxyFactory == null) + { + throw new IllegalArgumentException("JMX proxy factory cannot be null"); + } + + if (myNativeConnectionProvider == null) + { + throw new IllegalArgumentException("Native connection provider cannot be null"); + } + + return new TableStorageStatesImpl(this); + } + } + + @VisibleForTesting + void updateTableStates() + { + if (myJmxProxyFactory != null) + { + for (UUID nodeID : myTableSizes.get().keySet()) + { + try (DistributedJmxProxy jmxProxy = myJmxProxyFactory.connect()) + { + Map> newEntry = new HashMap<>(); + ImmutableMap tableSize = getTableSizes(nodeID, jmxProxy); + newEntry.put(nodeID, tableSize); + + myTableSizes.set(newEntry); + } + catch (IOException e) + { + LOG.error("Unable to update table sizes, future metrics might contain stale data", e); + } + } + } + } + + private ImmutableMap getTableSizes(final UUID nodeID, final DistributedJmxProxy jmxProxy) + { + Map dataSizes = new HashMap<>(); + + if (myReplicatedTableProvider != null) + { + for (TableReference tableReference : myReplicatedTableProvider.getAll()) + { + long diskSpaceUsed = jmxProxy.liveDiskSpaceUsed(nodeID, tableReference); + + LOG.debug("{} -> {}", tableReference, diskSpaceUsed); + dataSizes.put(tableReference, diskSpaceUsed); + } + } + + return ImmutableMap.copyOf(dataSizes); + } +} + diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/TestRepairGroup.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/TestRepairGroup.java index a8812d5f..15c995bb 100644 --- a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/TestRepairGroup.java +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/TestRepairGroup.java @@ -23,16 +23,20 @@ import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verifyNoMoreInteractions; import static org.mockito.Mockito.when; +import static org.mockito.ArgumentMatchers.any; +import com.datastax.oss.driver.api.core.metadata.Node; import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.incremental.IncrementalRepairTask; import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairHistory; import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicaRepairGroup; import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.data.repairhistory.RepairHistoryService; import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairParallelism; import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairType; import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.ScheduledJobException; @@ -68,13 +72,26 @@ public class TestRepairGroup @Mock private TableRepairMetrics myTableRepairMetrics; + @Mock + private RepairHistoryService myRepairHistoryService; + + @Mock + private RepairHistory.RepairSession myRepairSession; + + @Mock + private Node mockNode; + private final UUID myNodeID = UUID.randomUUID(); + private final UUID myJobId = UUID.randomUUID(); + private RepairConfiguration myRepairConfiguration; @Before public void init() { + when(mockNode.getHostId()).thenReturn(myNodeID); + when(myRepairHistoryService.newSession(any(), any(), any(), any(), any())).thenReturn(myRepairSession); myRepairConfiguration = RepairConfiguration.newBuilder() .withParallelism(RepairParallelism.PARALLEL) .withRepairWarningTime(RUN_INTERVAL_IN_DAYS * 2, TimeUnit.DAYS) @@ -125,7 +142,7 @@ public void testExecuteAllTasksSuccessful() throws ScheduledJobException ImmutableSet nodes = ImmutableSet.of(node); ReplicaRepairGroup replicaRepairGroup = new ReplicaRepairGroup(nodes, ImmutableList.of(range), System.currentTimeMillis()); - RepairGroup repairGroup = spy(builderFor(replicaRepairGroup).build(PRIORITY)); + RepairGroup repairGroup = spy(builderFor(replicaRepairGroup).withNode(mockNode).build(PRIORITY)); RepairTask repairTask1 = mock(RepairTask.class); RepairTask repairTask2 = mock(RepairTask.class); RepairTask repairTask3 = mock(RepairTask.class); @@ -151,7 +168,7 @@ public void testExecuteAllTasksFailed() throws ScheduledJobException ImmutableSet nodes = ImmutableSet.of(node); ReplicaRepairGroup replicaRepairGroup = new ReplicaRepairGroup(nodes, ImmutableList.of(range), System.currentTimeMillis()); - RepairGroup repairGroup = spy(builderFor(replicaRepairGroup).build(PRIORITY)); + RepairGroup repairGroup = spy(builderFor(replicaRepairGroup).withNode(mockNode).build(PRIORITY)); RepairTask repairTask1 = mock(RepairTask.class); RepairTask repairTask2 = mock(RepairTask.class); RepairTask repairTask3 = mock(RepairTask.class); @@ -177,7 +194,7 @@ public void testExecuteSomeTasksFailed() throws ScheduledJobException ImmutableSet nodes = ImmutableSet.of(node); ReplicaRepairGroup replicaRepairGroup = new ReplicaRepairGroup(nodes, ImmutableList.of(range), System.currentTimeMillis()); - RepairGroup repairGroup = spy(builderFor(replicaRepairGroup).build(PRIORITY)); + RepairGroup repairGroup = spy(builderFor(replicaRepairGroup).withNode(mockNode).build(PRIORITY)); RepairTask repairTask1 = mock(RepairTask.class); RepairTask repairTask2 = mock(RepairTask.class); RepairTask repairTask3 = mock(RepairTask.class); @@ -201,7 +218,9 @@ private RepairGroup.Builder builderFor(ReplicaRepairGroup replicaRepairGroup) .withRepairConfiguration(myRepairConfiguration) .withReplicaRepairGroup(replicaRepairGroup) .withJmxProxyFactory(myJmxProxyFactory) - .withTableRepairMetrics(myTableRepairMetrics); + .withTableRepairMetrics(myTableRepairMetrics) + .withRepairHistory(myRepairHistoryService) + .withJobId(myJobId); } private DriverNode mockNode(String dataCenter) diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/TestUtils.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/TestUtils.java new file mode 100644 index 00000000..e6d76bbc --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/TestUtils.java @@ -0,0 +1,206 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair; + +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode.VnodeRepairStatesImpl; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledRepairJobView; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairStateSnapshot; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairStates; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairParallelism; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairType; +import com.google.common.collect.ImmutableSet; +import org.assertj.core.util.Preconditions; +import org.mockito.internal.util.collections.Sets; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +import static com.ericsson.bss.cassandra.ecchronos.core.impl.table.MockTableReferenceFactory.tableReference; + + +public class TestUtils +{ + public static RepairStateSnapshot generateRepairStateSnapshot(long lastRepairedAt, VnodeRepairStates vnodeRepairStates) + { + return RepairStateSnapshot.newBuilder() + .withLastCompletedAt(lastRepairedAt) + .withVnodeRepairStates(vnodeRepairStates) + .withReplicaRepairGroups(Collections.emptyList()) + .build(); + } + + public static RepairConfiguration generateRepairConfiguration(long repairIntervalInMs) + { + return RepairConfiguration.newBuilder().withRepairInterval(repairIntervalInMs, TimeUnit.MILLISECONDS).build(); + } + + public static RepairConfiguration createRepairConfiguration(long interval, double unwindRatio, int warningTime, int errorTime) + { + return RepairConfiguration.newBuilder() + .withRepairInterval(interval, TimeUnit.MILLISECONDS) + .withParallelism(RepairParallelism.PARALLEL) + .withRepairUnwindRatio(unwindRatio) + .withRepairWarningTime(warningTime, TimeUnit.MILLISECONDS) + .withRepairErrorTime(errorTime, TimeUnit.MILLISECONDS) + .build(); + } + + public static class ScheduledRepairJobBuilder + { + private UUID id = UUID.randomUUID(); + private String keyspace; + private String table; + private long lastRepairedAt = 0; + private long repairInterval = 0; + private ImmutableSet replicas = ImmutableSet.of(); + private LongTokenRange longTokenRange = new LongTokenRange(1, 2); + private Collection vnodeRepairStateSet; + private RepairConfiguration repairConfiguration; + private double progress = 0; + private ScheduledRepairJobView.Status status = ScheduledRepairJobView.Status.ON_TIME; + private RepairType repairType = RepairType.VNODE; + + public ScheduledRepairJobBuilder withId(UUID id) + { + this.id = id; + return this; + } + + public ScheduledRepairJobBuilder withKeyspace(String keyspace) + { + this.keyspace = keyspace; + return this; + } + + public ScheduledRepairJobBuilder withTable(String table) + { + this.table = table; + return this; + } + + public ScheduledRepairJobBuilder withLastRepairedAt(long lastRepairedAt) + { + this.lastRepairedAt = lastRepairedAt; + return this; + } + + public ScheduledRepairJobBuilder withRepairInterval(long repairInterval) + { + this.repairInterval = repairInterval; + return this; + } + + public ScheduledRepairJobBuilder withVnodeRepairStateSet(Collection vnodeRepairStateSet) + { + this.vnodeRepairStateSet = vnodeRepairStateSet; + return this; + } + + public ScheduledRepairJobBuilder withStatus(ScheduledRepairJobView.Status status) + { + this.status = status; + return this; + } + + public ScheduledRepairJobBuilder withProgress(double progress) + { + this.progress = progress; + return this; + } + + public ScheduledRepairJobBuilder withRepairConfiguration(RepairConfiguration repairConfiguration) + { + this.repairConfiguration = repairConfiguration; + return this; + } + + public ScheduledRepairJobBuilder withRepairType(RepairType repairType) + { + this.repairType = repairType; + return this; + } + + + public ScheduledRepairJobView build() + { + Preconditions.checkNotNull(keyspace, "Keyspace cannot be null"); + Preconditions.checkNotNull(table, "Table cannot be null"); + Preconditions.checkArgument(lastRepairedAt > 0, "Last repaired not set"); + Preconditions.checkArgument(repairInterval > 0, "Repair interval not set"); + VnodeRepairStates vnodeRepairStates; + if ( vnodeRepairStateSet != null) + { + vnodeRepairStates = VnodeRepairStatesImpl.newBuilder(vnodeRepairStateSet).build(); + } + else + { + VnodeRepairState vnodeRepairState = createVnodeRepairState(longTokenRange, replicas, lastRepairedAt); + vnodeRepairStates = VnodeRepairStatesImpl.newBuilder(Sets.newSet(vnodeRepairState)).build(); + } + + if (repairConfiguration == null) + { + this.repairConfiguration = generateRepairConfiguration(repairInterval); + } + return new ScheduledRepairJobView(id, tableReference(keyspace, table), repairConfiguration, + generateRepairStateSnapshot(lastRepairedAt, vnodeRepairStates), status,progress, lastRepairedAt + repairInterval, repairType); + } + } + + + + public static VnodeRepairState createVnodeRepairState(long startToken, long endToken, ImmutableSet replicas, + long lastRepairedAt) + { + return createVnodeRepairState(new LongTokenRange(startToken, endToken), replicas, lastRepairedAt); + } + + public static VnodeRepairState createVnodeRepairState(LongTokenRange longTokenRange, ImmutableSet replicas, + long lastRepairedAt) + { + return new VnodeRepairState(longTokenRange, replicas, lastRepairedAt); + } + + public static String getFailedRepairMessage(LongTokenRange... ranges) + { + Collection rangeCollection = Arrays.asList(ranges); + return String.format("Repair session RepairSession for range %s failed with error ...", rangeCollection); + } + + public static String getRepairMessage(LongTokenRange... ranges) + { + Collection rangeCollection = Arrays.asList(ranges); + return String.format("Repair session RepairSession for range %s finished", rangeCollection); + } + + public static Map getNotificationData(int type, int progressCount, int total) + { + Map data = new HashMap<>(); + data.put("type", type); + data.put("progressCount", progressCount); + data.put("total", total); + return data; + } +} + diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestRepairSchedulerImpl.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestRepairSchedulerImpl.java index a708c5e7..8b2120a9 100644 --- a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestRepairSchedulerImpl.java +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestRepairSchedulerImpl.java @@ -16,17 +16,27 @@ import com.datastax.oss.driver.api.core.metadata.Node; import com.ericsson.bss.cassandra.ecchronos.core.impl.metrics.CassandraMetrics; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.TestUtils; import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.incremental.IncrementalRepairJob; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode.VnodeRepairStatesImpl; +import com.ericsson.bss.cassandra.ecchronos.core.impl.table.TableRepairJob; import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.RepairScheduler; import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduleManager; import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledJob; import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledRepairJobView; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairState; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairStateFactory; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairStateSnapshot; import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableStorageStates; +import com.ericsson.bss.cassandra.ecchronos.data.repairhistory.RepairHistoryService; import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairType; +import com.google.common.collect.ImmutableSet; import java.util.*; import org.junit.Before; import org.junit.Test; @@ -45,6 +55,8 @@ public class TestRepairSchedulerImpl { private static final TableReference TABLE_REFERENCE1 = tableReference("keyspace", "table1"); private static final TableReference TABLE_REFERENCE2 = tableReference("keyspace", "table2"); + private static final VnodeRepairState VNODE_REPAIR_STATE = TestUtils.createVnodeRepairState(1, 2, ImmutableSet.of(), System.currentTimeMillis()); + @Mock private DistributedJmxProxyFactory jmxProxyFactory; @@ -64,63 +76,89 @@ public class TestRepairSchedulerImpl @Mock private Node mockNode; + @Mock + private RepairStateFactory myRepairStateFactory; + + @Mock + private RepairState myRepairState; + + @Mock + private RepairStateSnapshot myRepairStateSnapshot; + + @Mock + private TableStorageStates myTableStorageStates; + + @Mock + private RepairHistoryService myRepairHistory; + private final UUID mockNodeID = UUID.randomUUID(); @Before public void setup() { when(mockNode.getHostId()).thenReturn(mockNodeID); + when(myRepairState.getSnapshot()).thenReturn(myRepairStateSnapshot); + when(myRepairStateFactory.create(eq(mockNode), eq(TABLE_REFERENCE1), any(), any())).thenReturn(myRepairState); + when(myRepairStateFactory.create(eq(mockNode), eq(TABLE_REFERENCE2), any(), any())).thenReturn(myRepairState); + VnodeRepairStatesImpl vnodeRepairStates = VnodeRepairStatesImpl.newBuilder(Arrays.asList(VNODE_REPAIR_STATE)).build(); + when(myRepairStateSnapshot.getVnodeRepairStates()).thenReturn(vnodeRepairStates); } @Test public void testConfigureNewTable() { - RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder() - .withReplicationState(myReplicationState).build(); + RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder().build(); repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(RepairConfiguration.DEFAULT)); verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(ScheduledJob.class)); verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(myRepairStateFactory).create(eq(mockNode), eq(TABLE_REFERENCE1), eq(RepairConfiguration.DEFAULT), any()); + verify(myRepairState, atLeastOnce()).update(); assertOneTableViewExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT); repairSchedulerImpl.close(); verify(scheduleManager).deschedule(eq(mockNodeID), any(ScheduledJob.class)); verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(myRepairStateFactory); verifyNoMoreInteractions(scheduleManager); } @Test public void testConfigureTwoTables() { - RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder() - .withReplicationState(myReplicationState).build(); + RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder().build(); repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(RepairConfiguration.DEFAULT)); repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE2, Collections.singleton(RepairConfiguration.DEFAULT)); verify(scheduleManager, timeout(1000).times(2)).schedule(eq(mockNodeID), any(ScheduledJob.class)); verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(myRepairStateFactory).create(eq(mockNode), eq(TABLE_REFERENCE1), eq(RepairConfiguration.DEFAULT), any()); + verify(myRepairStateFactory).create(eq(mockNode), eq(TABLE_REFERENCE2), eq(RepairConfiguration.DEFAULT), any()); + verify(myRepairState, atLeastOnce()).update(); repairSchedulerImpl.close(); verify(scheduleManager, times(1)).deschedule(eq(mockNodeID), any(ScheduledJob.class)); verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(myRepairStateFactory); verifyNoMoreInteractions(scheduleManager); } @Test public void testRemoveTableConfiguration() { - RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder() - .withReplicationState(myReplicationState).build(); + RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder().build(); repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(RepairConfiguration.DEFAULT)); verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(ScheduledJob.class)); verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(myRepairStateFactory).create(eq(mockNode), eq(TABLE_REFERENCE1), eq(RepairConfiguration.DEFAULT), any()); + verify(myRepairState, atLeastOnce()).update(); assertOneTableViewExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT); repairSchedulerImpl.removeConfiguration(mockNode, TABLE_REFERENCE1); @@ -129,14 +167,14 @@ public void testRemoveTableConfiguration() repairSchedulerImpl.close(); verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(myRepairStateFactory); verifyNoMoreInteractions(scheduleManager); } @Test public void testUpdateTableConfiguration() { - RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder() - .withReplicationState(myReplicationState).build(); + RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder().build(); long expectedUpdatedRepairInterval = TimeUnit.DAYS.toMillis(1); @@ -148,12 +186,17 @@ public void testUpdateTableConfiguration() verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(ScheduledJob.class)); verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(myRepairStateFactory).create(eq(mockNode), eq(TABLE_REFERENCE1), eq(RepairConfiguration.DEFAULT), any()); + verify(myRepairState, atLeastOnce()).update(); assertOneTableViewExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT); - repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(updatedRepairConfiguration)); + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, + Collections.singleton(updatedRepairConfiguration)); verify(scheduleManager, timeout(1000).times(2)).schedule(eq(mockNodeID), any(ScheduledJob.class)); verify(scheduleManager, timeout(1000)).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(myRepairStateFactory).create(eq(mockNode), eq(TABLE_REFERENCE1), eq(updatedRepairConfiguration), any()); + verify(myRepairState, atLeastOnce()).update(); assertOneTableViewExist(repairSchedulerImpl, TABLE_REFERENCE1, updatedRepairConfiguration); repairSchedulerImpl.close(); @@ -161,19 +204,21 @@ public void testUpdateTableConfiguration() assertThat(repairSchedulerImpl.getCurrentRepairJobs()).isEmpty(); verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(myRepairStateFactory); verifyNoMoreInteractions(scheduleManager); } @Test public void testUpdateTableConfigurationToSame() { - RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder() - .withReplicationState(myReplicationState).build(); + RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder().build(); repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(RepairConfiguration.DEFAULT)); verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(ScheduledJob.class)); verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(myRepairStateFactory).create(eq(mockNode), eq(TABLE_REFERENCE1), eq(RepairConfiguration.DEFAULT), any()); + verify(myRepairState, atLeastOnce()).update(); assertOneTableViewExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT); repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(RepairConfiguration.DEFAULT)); @@ -185,6 +230,7 @@ public void testUpdateTableConfigurationToSame() assertThat(repairSchedulerImpl.getCurrentRepairJobs()).isEmpty(); verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(myRepairStateFactory); verifyNoMoreInteractions(scheduleManager); } @@ -200,16 +246,22 @@ public void testConfigureTwoSchedulesForOneTable() repairConfigurations.add(incrementalRepairConfiguration); repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, repairConfigurations); - verify(scheduleManager, timeout(1000).times(2)).schedule(eq(mockNodeID), any(IncrementalRepairJob.class)); + verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(TableRepairJob.class)); + verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(IncrementalRepairJob.class)); verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(myRepairStateFactory).create(eq(mockNode), eq(TABLE_REFERENCE1), eq(RepairConfiguration.DEFAULT), + any()); + verify(myRepairState, atLeastOnce()).update(); - assertTableViewsExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT, incrementalRepairConfiguration); + assertTableViewsExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT, + incrementalRepairConfiguration); repairSchedulerImpl.close(); verify(scheduleManager, times(2)).deschedule(eq(mockNodeID), any(ScheduledJob.class)); assertThat(repairSchedulerImpl.getCurrentRepairJobs()).isEmpty(); verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(myRepairStateFactory); verifyNoMoreInteractions(scheduleManager); } @@ -217,27 +269,32 @@ public void testConfigureTwoSchedulesForOneTable() public void testScheduleChangesToIncremental() { RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder().withReplicationState(myReplicationState).build(); - repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(RepairConfiguration.DEFAULT)); + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, + Collections.singleton(RepairConfiguration.DEFAULT)); - // Should change to TableRepairJob.class when implemented - verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(IncrementalRepairJob.class)); + verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(TableRepairJob.class)); verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(myRepairStateFactory).create(eq(mockNode), eq(TABLE_REFERENCE1), eq(RepairConfiguration.DEFAULT), any()); + verify(myRepairState, atLeastOnce()).update(); assertTableViewsExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT); RepairConfiguration incrementalRepairConfiguration = RepairConfiguration.newBuilder().withRepairType( RepairType.INCREMENTAL).build(); - repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(incrementalRepairConfiguration)); + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, + Collections.singleton(incrementalRepairConfiguration)); - verify(scheduleManager, timeout(1000).times(2)).schedule(eq(mockNodeID), any(IncrementalRepairJob.class)); + verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(IncrementalRepairJob.class)); + verify(scheduleManager).deschedule(eq(mockNodeID), any(TableRepairJob.class)); assertTableViewsExist(repairSchedulerImpl, TABLE_REFERENCE1, incrementalRepairConfiguration); repairSchedulerImpl.close(); - verify(scheduleManager, times(2)).deschedule(eq(mockNodeID), any(IncrementalRepairJob.class)); + verify(scheduleManager).deschedule(eq(mockNodeID), any(IncrementalRepairJob.class)); assertThat(repairSchedulerImpl.getCurrentRepairJobs()).isEmpty(); verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(myRepairStateFactory); verifyNoMoreInteractions(scheduleManager); } @@ -278,6 +335,9 @@ private RepairSchedulerImpl.Builder defaultRepairSchedulerImplBuilder() .withJmxProxyFactory(jmxProxyFactory) .withTableRepairMetrics(myTableRepairMetrics) .withScheduleManager(scheduleManager) - .withCassandraMetrics(myCassandraMetrics); + .withRepairStateFactory(myRepairStateFactory) + .withTableStorageStates(myTableStorageStates) + .withCassandraMetrics(myCassandraMetrics) + .withRepairHistory(myRepairHistory); } } diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/TestAlarmPostUpdateHook.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/TestAlarmPostUpdateHook.java new file mode 100644 index 00000000..479a612f --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/TestAlarmPostUpdateHook.java @@ -0,0 +1,145 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.state; + +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairStateSnapshot; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.fm.RepairFaultReporter; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.time.Clock; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import static com.ericsson.bss.cassandra.ecchronos.core.impl.table.MockTableReferenceFactory.tableReference; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.*; + +@RunWith(MockitoJUnitRunner.class) +public class TestAlarmPostUpdateHook +{ + private static final String keyspaceName = "keyspace"; + private static final String tableName = "table"; + + private static final long RUN_INTERVAL_IN_DAYS = 1; + private static final long GC_GRACE_DAYS = 10; + + @Mock + private RepairStateSnapshot myRepairStateSnapshot; + + @Mock + private RepairFaultReporter myFaultReporter; + + @Mock + private Clock myClock; + + private AlarmPostUpdateHook myPostUpdateHook; + + private final TableReference myTableReference = tableReference(keyspaceName, tableName); + + @Before + public void startup() + { + RepairConfiguration repairConfiguration = RepairConfiguration.newBuilder() + .withRepairWarningTime(RUN_INTERVAL_IN_DAYS * 2, TimeUnit.DAYS) + .withRepairErrorTime(GC_GRACE_DAYS, TimeUnit.DAYS) + .build(); + myPostUpdateHook = new AlarmPostUpdateHook(myTableReference, repairConfiguration, myFaultReporter); + + myPostUpdateHook.setClock(myClock); + } + + @Test + public void testThatWarningAlarmIsSentAndCeased() + { + // setup - not repaired + long daysSinceLastRepair = 2; + long start = System.currentTimeMillis(); + long lastRepaired = start - TimeUnit.DAYS.toMillis(daysSinceLastRepair); + + Map expectedData = new HashMap<>(); + expectedData.put(RepairFaultReporter.FAULT_KEYSPACE, keyspaceName); + expectedData.put(RepairFaultReporter.FAULT_TABLE, tableName); + + // mock - not repaired + doReturn(lastRepaired).when(myRepairStateSnapshot).lastCompletedAt(); + when(myClock.millis()).thenReturn(start); + + myPostUpdateHook.postUpdate(myRepairStateSnapshot); + + // verify - not repaired + verify(myFaultReporter).raise(eq(RepairFaultReporter.FaultCode.REPAIR_WARNING), eq(expectedData)); + + // setup - repaired + lastRepaired = start; + start = System.currentTimeMillis(); + + // mock - repaired + doReturn(lastRepaired).when(myRepairStateSnapshot).lastCompletedAt(); + when(myClock.millis()).thenReturn(start); + + myPostUpdateHook.postUpdate(myRepairStateSnapshot); + + // verify alarm ceased in preValidate + verify(myFaultReporter).cease(eq(RepairFaultReporter.FaultCode.REPAIR_WARNING), eq(expectedData)); + reset(myFaultReporter); + + myPostUpdateHook.postUpdate(myRepairStateSnapshot); + + // verify - repaired + verify(myFaultReporter).cease(eq(RepairFaultReporter.FaultCode.REPAIR_WARNING), eq(expectedData)); + } + + @Test + public void testThatErrorAlarmIsSentAndCeased() + { + // setup - not repaired + long daysSinceLastRepair = GC_GRACE_DAYS; + long start = System.currentTimeMillis(); + long lastRepaired = start - TimeUnit.DAYS.toMillis(daysSinceLastRepair); + + Map expectedData = new HashMap<>(); + expectedData.put(RepairFaultReporter.FAULT_KEYSPACE, keyspaceName); + expectedData.put(RepairFaultReporter.FAULT_TABLE, tableName); + + // mock - not repaired + doReturn(lastRepaired).when(myRepairStateSnapshot).lastCompletedAt(); + when(myClock.millis()).thenReturn(start); + + myPostUpdateHook.postUpdate(myRepairStateSnapshot); + + // verify - not repaired + verify(myFaultReporter).raise(eq(RepairFaultReporter.FaultCode.REPAIR_ERROR), eq(expectedData)); + + // setup - repaired + lastRepaired = start; + start = System.currentTimeMillis(); + + // mock - repaired + doReturn(lastRepaired).when(myRepairStateSnapshot).lastCompletedAt(); + when(myClock.millis()).thenReturn(start); + + myPostUpdateHook.postUpdate(myRepairStateSnapshot); + + // verify - repaired + verify(myFaultReporter).cease(eq(RepairFaultReporter.FaultCode.REPAIR_WARNING), eq(expectedData)); + } +} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestNormalizedBaseRange.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestNormalizedBaseRange.java new file mode 100644 index 00000000..ccb63477 --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestNormalizedBaseRange.java @@ -0,0 +1,242 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode; + +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import com.google.common.collect.ImmutableSet; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.math.BigInteger; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatExceptionOfType; + +@RunWith(MockitoJUnitRunner.class) +public class TestNormalizedBaseRange +{ + private static final BigInteger START = BigInteger.ZERO; + + @Mock + DriverNode mockNode; + + @Test + public void testTransformBaseRange() + { + VnodeRepairState vnodeRepairState = withVnode(1L, 10L, 1234L, 1235L); + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(vnodeRepairState); + + NormalizedRange expectedRange = new NormalizedRange(normalizedBaseRange, START, bi(9L), 1234L, 1235L); + + NormalizedRange actualRange = normalizedBaseRange.transform(vnodeRepairState); + + assertThat(actualRange).isEqualTo(expectedRange); + + VnodeRepairState actualVnode = normalizedBaseRange.transform(actualRange); + assertThat(actualVnode).isEqualTo(vnodeRepairState); + } + + @Test + public void testTransformMaxTokenAsStart() + { + VnodeRepairState vnodeRepairState = withVnode(Long.MAX_VALUE, Long.MIN_VALUE, 1234L, 1235L); + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(vnodeRepairState); + + NormalizedRange expectedRange = new NormalizedRange(normalizedBaseRange, START, bi(1L), 1234L, 1235L); + + NormalizedRange actualRange = normalizedBaseRange.transform(vnodeRepairState); + + assertThat(actualRange).isEqualTo(expectedRange); + + VnodeRepairState actualVnode = normalizedBaseRange.transform(actualRange); + assertThat(actualVnode).isEqualTo(vnodeRepairState); + } + + @Test + public void testTransformMaxTokenAsEnd() + { + VnodeRepairState vnodeRepairState = withVnode(-5L, Long.MAX_VALUE, 1234L, 1235L); + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(vnodeRepairState); + + BigInteger end = bi(Long.MAX_VALUE).add(BigInteger.valueOf(5)); + NormalizedRange expectedRange = new NormalizedRange(normalizedBaseRange, START, end, 1234L, 1235L); + + NormalizedRange actualRange = normalizedBaseRange.transform(vnodeRepairState); + + assertThat(actualRange).isEqualTo(expectedRange); + + VnodeRepairState actualVnode = normalizedBaseRange.transform(actualRange); + assertThat(actualVnode).isEqualTo(vnodeRepairState); + } + + @Test + public void testTransformBaseRangeFullRange() + { + VnodeRepairState vnodeRepairState = withVnode(Long.MIN_VALUE, Long.MIN_VALUE, 1234L, 1235L); + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(vnodeRepairState); + + assertThat(normalizedBaseRange.end).isEqualTo(LongTokenRange.FULL_RANGE); + + NormalizedRange expectedRange = new NormalizedRange(normalizedBaseRange, START, LongTokenRange.FULL_RANGE, 1234L, 1235L); + + NormalizedRange actualRange = normalizedBaseRange.transform(vnodeRepairState); + + assertThat(actualRange).isEqualTo(expectedRange); + + VnodeRepairState actualVnode = normalizedBaseRange.transform(actualRange); + assertThat(actualVnode).isEqualTo(vnodeRepairState); + } + + @Test + public void testTransformSimpleRange() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(1L, 10L, 1234L, 1235L)); + VnodeRepairState subRange = withVnode(2L, 5L, 1235L, 1236L); + + NormalizedRange expectedRange = new NormalizedRange(normalizedBaseRange, bi(1L), bi(4L), 1235L, 1236L); + + NormalizedRange actualRange = normalizedBaseRange.transform(subRange); + + assertThat(actualRange).isEqualTo(expectedRange); + + VnodeRepairState actualVnode = normalizedBaseRange.transform(actualRange); + assertThat(actualVnode).isEqualTo(subRange); + } + + @Test + public void testTransformWraparoundRangeBeforeWraparound() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(-5L, -6L, 1234L, 1235L)); + VnodeRepairState subRange = withVnode(55L, 1000L, 1234L, 1235L); + + NormalizedRange expectedRange = new NormalizedRange(normalizedBaseRange, bi(60L), bi(1005L), 1234L, 1235L); + + NormalizedRange actualRange = normalizedBaseRange.transform(subRange); + + assertThat(actualRange).isEqualTo(expectedRange); + + VnodeRepairState actualVnode = normalizedBaseRange.transform(actualRange); + assertThat(actualVnode).isEqualTo(subRange); + } + + @Test + public void testTransformWraparoundRangeAfterWraparound() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(-5L, -6L, 1234L, 1235L)); + VnodeRepairState subRange = withVnode(-20L, -10L, 1234L, 1235L); + + BigInteger start = LongTokenRange.FULL_RANGE.subtract(bi(15L)); + BigInteger end = LongTokenRange.FULL_RANGE.subtract(bi(5L)); + + NormalizedRange expectedRange = new NormalizedRange(normalizedBaseRange, start, end, 1234L, 1235L); + + NormalizedRange actualRange = normalizedBaseRange.transform(subRange); + + assertThat(actualRange).isEqualTo(expectedRange); + + VnodeRepairState actualVnode = normalizedBaseRange.transform(actualRange); + assertThat(actualVnode).isEqualTo(subRange); + } + + @Test + public void testTransformWraparoundRangeInTheMiddle() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(-5L, -6L, 1234L, 1235L)); + VnodeRepairState subRange = withVnode(Long.MAX_VALUE - 5L, Long.MAX_VALUE - 4L, 1234L, 1235L); + + BigInteger start = bi(Long.MAX_VALUE); + BigInteger end = start.add(bi(1L)); + + NormalizedRange expectedRange = new NormalizedRange(normalizedBaseRange, start, end, 1234L, 1235L); + + NormalizedRange actualRange = normalizedBaseRange.transform(subRange); + + assertThat(actualRange).isEqualTo(expectedRange); + + VnodeRepairState actualVnode = normalizedBaseRange.transform(actualRange); + assertThat(actualVnode).isEqualTo(subRange); + } + + @Test + public void testTransformRangeIntersectingEnd() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(45L, 100L, 1234L, 1235L)); + VnodeRepairState subRange = withVnode(90L, 120L, 1234L, 1235L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> normalizedBaseRange.transform(subRange)); + } + + @Test + public void testTransformRangeIntersectingStart() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(-50L, 15L, 1234L, 1235L)); + VnodeRepairState subRange = withVnode(-100L, -30L, 1234L, 1235L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> normalizedBaseRange.transform(subRange)); + } + + @Test + public void testTransformRangeOutsideBoundary() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 200L, 1234L, 1235L)); + VnodeRepairState subRange = withVnode(300L, 400L, 1234L, 1235L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> normalizedBaseRange.transform(subRange)); + } + + @Test + public void testInRangeBoundary() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 150L, 1234L, 1235L)); + + assertThat(normalizedBaseRange.inRange(bi(0L))).isTrue(); + assertThat(normalizedBaseRange.inRange(bi(26L))).isTrue(); + assertThat(normalizedBaseRange.inRange(bi(50L))).isTrue(); + } + + @Test + public void testOutsideBoundary() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 150L, 1234L, 1235L)); + + assertThat(normalizedBaseRange.inRange(bi(-1L))).isFalse(); + assertThat(normalizedBaseRange.inRange(bi(51L))).isFalse(); + } + + private BigInteger bi(long token) + { + return BigInteger.valueOf(token); + } + + @Test + public void testEqualsContract() + { + EqualsVerifier.forClass(NormalizedBaseRange.class).usingGetClass() + .withPrefabValues( + VnodeRepairState.class, withVnode(0L, 0L, 1234L, 1235L), withVnode(0L, 1L, 1234L, 1235L)) + .withNonnullFields("baseVnode", "end") + .verify(); + } + + private VnodeRepairState withVnode(long start, long end, long startedAt, long finishedAt) + { + return new VnodeRepairState(new LongTokenRange(start, end), ImmutableSet.of(mockNode), startedAt, finishedAt); + } +} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestNormalizedRange.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestNormalizedRange.java new file mode 100644 index 00000000..4f1a5a57 --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestNormalizedRange.java @@ -0,0 +1,357 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode; + +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import com.google.common.collect.ImmutableSet; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.math.BigInteger; + + +import static com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode.NormalizedRange.UNKNOWN_REPAIR_TIME; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatExceptionOfType; + +@RunWith(MockitoJUnitRunner.class) +public class TestNormalizedRange +{ + private static final BigInteger START = BigInteger.ZERO; + + @Mock + DriverNode mockNode; + + @Test + public void testMutateStart() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 109L, 1234L, 1235L)); + NormalizedRange normalizedRange = new NormalizedRange(normalizedBaseRange, START, bi(9L), 1234L, 1235L); + + NormalizedRange withNewStart = normalizedRange.mutateStart(bi(8L)); + assertThat(withNewStart.start()).isEqualTo(bi(8L)); + assertThat(withNewStart.end()).isEqualTo(bi(9L)); + assertThat(withNewStart.getStartedAt()).isEqualTo(1234L); + assertThat(withNewStart.getFinishedAt()).isEqualTo(1235L); + assertThat(withNewStart.getRepairTime()).isEqualTo(UNKNOWN_REPAIR_TIME); + } + + @Test + public void testMutateStartOutsideBaseRange() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 109L, 1234L, 1235L)); + NormalizedRange normalizedRange = new NormalizedRange(normalizedBaseRange, START, bi(9L), 1234L, 1235L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> normalizedRange.mutateStart(bi(13L))); + } + + @Test + public void testMutateEnd() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 109L, 1234L, 1235L)); + NormalizedRange normalizedRange = new NormalizedRange(normalizedBaseRange, START, bi(9L), 1234L, 1235L); + + NormalizedRange withNewEnd = normalizedRange.mutateEnd(bi(8L)); + assertThat(withNewEnd.start()).isEqualTo(START); + assertThat(withNewEnd.end()).isEqualTo(bi(8L)); + assertThat(withNewEnd.getStartedAt()).isEqualTo(1234L); + assertThat(withNewEnd.getFinishedAt()).isEqualTo(1235L); + assertThat(withNewEnd.getRepairTime()).isEqualTo(UNKNOWN_REPAIR_TIME); + } + + @Test + public void testMutateEndOutsideBaseRange() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 109L, 1234L, 1235L)); + NormalizedRange normalizedRange = new NormalizedRange(normalizedBaseRange, START, bi(9L), 1234L, 1235L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> normalizedRange.mutateEnd(bi(13))); + } + + @Test + public void testBetween() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, START, bi(9L), 1234L, 1235L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, bi(13L), bi(15L), 1235L, 1236L); + + NormalizedRange between = firstRange.between(secondRange, 1236L, 1237L); + assertThat(between.start()).isEqualTo(bi(9L)); + assertThat(between.end()).isEqualTo(bi(13L)); + assertThat(between.getStartedAt()).isEqualTo(1236L); + assertThat(between.getFinishedAt()).isEqualTo(1237L); + assertThat(between.getRepairTime()).isEqualTo(UNKNOWN_REPAIR_TIME); + } + + @Test + public void testBetweenDifferentBaseRange() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 130L, 1234L, 1235L)); + NormalizedBaseRange normalizedBaseRange2 = new NormalizedBaseRange(withVnode(0L, 31L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, START, bi(9L), 1234L, 1235L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange2, bi(13L), bi(15L), 1235L, 1236L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> firstRange.between(secondRange, 1236L, 1237L)); + } + + @Test + public void testBetweenWrongOrder() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, bi(13L), bi(15L), 1235L, 1236L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, START, bi(9L), 1234L, 1235L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> firstRange.between(secondRange, 1236L, 1237L)); + } + + @Test + public void testBetweenAdjacent() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, START, bi(9L), 1235L, 1236L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, bi(9L), bi(13L), 1234L, 1235L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> firstRange.between(secondRange, 1236L, 1237L)); + } + + @Test + public void testSplitEnd() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, START, bi(13L), 1234L, 1235L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, bi(9L), bi(15L), 1235L, 1236L); + + NormalizedRange splitted = firstRange.splitEnd(secondRange); + assertThat(splitted.start()).isEqualTo(bi(9L)); + assertThat(splitted.end()).isEqualTo(bi(13L)); + assertThat(splitted.getStartedAt()).isEqualTo(1235L); + assertThat(splitted.getFinishedAt()).isEqualTo(1235L); + assertThat(splitted.getRepairTime()).isEqualTo(UNKNOWN_REPAIR_TIME); + } + + @Test + public void testSplitEndDifferentBaseRange() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedBaseRange normalizedBaseRange2 = new NormalizedBaseRange(withVnode(100L, 116L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, START, bi(13L), 1234L, 1235L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange2, bi(9L), bi(15L), 1235L, 1236L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> firstRange.splitEnd(secondRange)); + } + + @Test + public void testSplitEndWrongOrder() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, bi(9L), bi(15L), 1235L, 1236L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, START, bi(13L), 1234L, 1235L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> firstRange.splitEnd(secondRange)); + } + + @Test + public void testSplitEndAdjacent() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, START, bi(9L), 1235L, 1236L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, bi(9L), bi(13L), 1234L, 1235L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> firstRange.splitEnd(secondRange)); + } + + @Test + public void testCombine() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, START, bi(13L), 1234L, 1235L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, bi(13L), bi(15L), 1235L, 1236L); + + NormalizedRange combined = firstRange.combine(secondRange); + assertThat(combined.start()).isEqualTo(START); + assertThat(combined.end()).isEqualTo(bi(15L)); + assertThat(combined.getStartedAt()).isEqualTo(1234L); + assertThat(combined.getFinishedAt()).isEqualTo(1236L); + assertThat(combined.getRepairTime()).isEqualTo(2L); + } + + @Test + public void testCombineDifferentBaseRange() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedBaseRange normalizedBaseRange2 = new NormalizedBaseRange(withVnode(100L, 116L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, START, bi(13L), 1234L, 1235L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange2, bi(13L), bi(15L), 1235L, 1236L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> firstRange.combine(secondRange)); + } + + @Test + public void testCombineWrongOrder() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, START, bi(13L), 1234L, 1235L); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, bi(13L), bi(15L), 1235L, 1236L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> firstRange.combine(secondRange)); + } + + @Test + public void testIsCovering() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, START, bi(13L), 1234L, 1235L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, bi(10L), bi(12L), 1235L, 1236L); + + assertThat(firstRange.isCovering(secondRange)).isTrue(); + } + + @Test + public void testIsCoveringSameStart() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, START, bi(13L), 1234L, 1235L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, START, bi(12L), 1235L, 1236L); + + assertThat(firstRange.isCovering(secondRange)).isTrue(); + } + + @Test + public void testIsCoveringSameEnd() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, START, bi(13L), 1234L, 1235L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, bi(10L), bi(13L), 1235L, 1236L); + + assertThat(firstRange.isCovering(secondRange)).isTrue(); + } + + @Test + public void testIsCoveringOutsideStart() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, bi(4L), bi(13L), 1234L, 1235L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, bi(3L), bi(12L), 1235L, 1236L); + + assertThat(firstRange.isCovering(secondRange)).isFalse(); + } + + @Test + public void testIsCoveringOutsideEnd() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, bi(4L), bi(13L), 1234L, 1235L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, bi(5L), bi(15L), 1235L, 1236L); + + assertThat(firstRange.isCovering(secondRange)).isFalse(); + } + + @Test + public void testIsCoveringDifferentBaseRange() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedBaseRange normalizedBaseRange2 = new NormalizedBaseRange(withVnode(100L, 116L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, START, bi(13L), 1234L, 1235L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange2, bi(10L), bi(12L), 1235L, 1236L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> firstRange.isCovering(secondRange)); + } + + @Test + public void testIsCoveringReverse() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 115L, 1234L, 1235L)); + NormalizedRange firstRange = new NormalizedRange(normalizedBaseRange, bi(10L), bi(12L), 1235L, 1235L); + NormalizedRange secondRange = new NormalizedRange(normalizedBaseRange, START, bi(13L), 1234L, 1235L); + + assertThat(firstRange.isCovering(secondRange)).isFalse(); + } + + @Test + public void testCompareSimpleFirst() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 99L, 1234L, 1235L)); + NormalizedRange range1 = new NormalizedRange(normalizedBaseRange, bi(1L), bi(5L), 1234L, 1235L); + NormalizedRange range2 = new NormalizedRange(normalizedBaseRange, bi(5L), bi(10L), 1234L, 1235L); + + assertThat(range1.compareTo(range2)).isLessThan(0); + } + + @Test + public void testCompareSimpleAfter() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 99L, 1234L, 1235L)); + NormalizedRange range1 = new NormalizedRange(normalizedBaseRange, bi(1L), bi(5L), 1234L, 1235L); + NormalizedRange range2 = new NormalizedRange(normalizedBaseRange, bi(5L), bi(10L), 1234L, 1235L); + + assertThat(range2.compareTo(range1)).isGreaterThan(0); + } + + @Test + public void testCompareSame() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 99L, 1234L, 1235L)); + NormalizedRange range1 = new NormalizedRange(normalizedBaseRange, bi(1L), bi(5L), 1234L, 1235L); + NormalizedRange range2 = new NormalizedRange(normalizedBaseRange, bi(1L), bi(5L), 1234L, 1235L); + + assertThat(range2.compareTo(range1)).isEqualTo(0); + } + + @Test + public void testCompareSameStart() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 99L, 1234L, 1235L)); + NormalizedRange range1 = new NormalizedRange(normalizedBaseRange, bi(1L), bi(5L), 1234L, 1235L); + NormalizedRange range2 = new NormalizedRange(normalizedBaseRange, bi(1L), bi(10L), 1234L, 1235L); + + assertThat(range2.compareTo(range1)).isLessThan(0); + } + + @Test + public void testCompareDifferentBaseRanges() + { + NormalizedBaseRange normalizedBaseRange = new NormalizedBaseRange(withVnode(100L, 99L, 1234L, 1235L)); + NormalizedBaseRange normalizedBaseRange2 = new NormalizedBaseRange(withVnode(100L, Long.MAX_VALUE, 1234L, 1235L)); + NormalizedRange range1 = new NormalizedRange(normalizedBaseRange, bi(1L), bi(5L), 1234L, 1235L); + NormalizedRange range2 = new NormalizedRange(normalizedBaseRange2, bi(1L), bi(10L), 1234L, 1235L); + + assertThatExceptionOfType(IllegalArgumentException.class).isThrownBy(() -> range1.compareTo(range2)); + } + + @Test + public void testEqualsContract() + { + EqualsVerifier.forClass(NormalizedRange.class).usingGetClass() + .withPrefabValues( + VnodeRepairState.class, withVnode(0L, 0L, 1234L, 1235L), withVnode(0L, 1L, 1234L, 1236L)) + .withNonnullFields("base", "start", "end") + .verify(); + } + + private BigInteger bi(long token) + { + return BigInteger.valueOf(token); + } + + private VnodeRepairState withVnode(long start, long end, long startedAt, long finishedAt) + { + return new VnodeRepairState(new LongTokenRange(start, end), ImmutableSet.of(mockNode), startedAt, finishedAt); + } +} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestVnodeRepairStateFactoryImpl.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestVnodeRepairStateFactoryImpl.java new file mode 100644 index 00000000..b8ca5bfe --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestVnodeRepairStateFactoryImpl.java @@ -0,0 +1,815 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode; + +import static com.ericsson.bss.cassandra.ecchronos.core.impl.table.MockTableReferenceFactory.tableReference; +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairEntry; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairHistoryProvider; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairStateSnapshot; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairStateFactory; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairStates; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.google.common.collect.AbstractIterator; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; + +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.TimeUnit; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableSet; + +@RunWith(MockitoJUnitRunner.Silent.class) +public class TestVnodeRepairStateFactoryImpl +{ + private static final TableReference TABLE_REFERENCE = tableReference("ks", "tb"); + + @Mock + private ReplicationState mockReplicationState; + + private Map> tokenToNodeMap = new TreeMap<>( + Comparator.comparingLong(l -> l.start)); + + @Mock + private Node myMockNode; + + private RepairHistoryProvider repairHistoryProvider; + + private List repairHistory = new ArrayList<>(); + + @Before + public void setup() + { + repairHistoryProvider = new MockedRepairHistoryProvider(myMockNode, TABLE_REFERENCE); + when(mockReplicationState.getTokenRangeToReplicas(eq(TABLE_REFERENCE), eq(myMockNode))).thenReturn(tokenToNodeMap); + when(mockReplicationState.getTokenRanges(eq(TABLE_REFERENCE), eq(myMockNode))).thenReturn(tokenToNodeMap); + } + + @Test + public void testEmptyHistoryNoPreviousIsUnrepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 2), node1, node2); + withRange(range(2, 3), node1, node2); + + assertNewStateSameForVnodeAndSubrange(newUnrepairedState(range(1, 2)), + newUnrepairedState(range(2, 3))); + } + + @Test + public void testCalculateStateEmptyHistoryIsUnrepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 2), node1, node2); + withRange(range(2, 3), node1, node2); + + assertStateSameForVnodeAndSubrange(1234L, 1235L, newUnrepairedState(range(1, 2)), + newUnrepairedState(range(2, 3))); + } + + @Test + public void testCalculateStateClusterWideEmptyHistoryIsUnrepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 2), node1, node2); + withRange(range(2, 3), node1, node2); + + assertClusterWideStateSameForVnodeAndSubrange(1234L, 1235L, newUnrepairedState(range(1, 2)), + newUnrepairedState(range(2, 3))); + } + + @Test + public void testEmptyHistoryWithPreviousKeepsRepairedAt() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 2), node1, node2); + withRange(range(2, 3), node1, node2); + + RepairStateSnapshot previousSnapshot = snapshot(1234L, + newState(range(1, 2), 1234L, 1235L), + newState(range(2, 3), 2345L, 2346L)); + + assertNewVnodeStates(previousSnapshot, + newState(range(1, 2), 1234L, 1235L), + newState(range(2, 3), 2345L, 2346L)); + assertNewSubRangeStates(previousSnapshot, newState(range(1, 2), 1234L, -1L), + newState(range(2, 3), 2345L, 2346L)); + } + + @Test + public void testWithHistoryNoPreviousIsRepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 2), node1, node2); + withRange(range(2, 3), node1, node2); + + withSuccessfulRepairHistory(range(1, 2), 1234L, 1235L); + withSuccessfulRepairHistory(range(2, 3), 2345L, 2346L); + + assertNewStateSameForVnodeAndSubrange(newState(range(1, 2), 1234L, 1235L), + newState(range(2, 3), 2345L, 2346L)); + } + + @Test + public void testCalculateStateWithHistoryIsRepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 2), node1, node2); + withRange(range(2, 3), node1, node2); + + withSuccessfulRepairHistory(range(1, 2), 1234L, 1235L); + withSuccessfulRepairHistory(range(2, 3), 2345L, 2346L); + + assertStateSameForVnodeAndSubrange(2346L, 1234L, + newState(range(1, 2), 1234L, 1235L), + newState(range(2, 3), 2345L, 2346L)); + } + + @Test + public void testCalculateClusterWideStateWithHistoryIsRepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 2), node1, node2); + withRange(range(2, 3), node1, node2); + + withSuccessfulRepairHistory(range(1, 2), 1234L, 1235L); + withSuccessfulRepairHistory(range(2, 3), 2345L, 2346L); + + assertClusterWideStateSameForVnodeAndSubrange(2346L, 1234L, + newState(range(1, 2), 1234L, 1235L), + newState(range(2, 3), 2345L, 2346L)); + } + + @Test + public void testWithSubRangeHistoryNoPreviousIsRepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 5), node1, node2); + withRange(range(5, 10), node1, node2); + + long range1StartedAt = TimeUnit.DAYS.toMillis(10); + long range1FinishedAt = TimeUnit.DAYS.toMillis(11); + long range2StartedAt = TimeUnit.DAYS.toMillis(11); + long range2FinishedAt = TimeUnit.DAYS.toMillis(12); + + withSubRangeSuccessfulRepairHistory(range(1, 3), range1StartedAt, range1FinishedAt); + withSubRangeSuccessfulRepairHistory(range(3, 5), range1StartedAt, range1FinishedAt); + + withSubRangeSuccessfulRepairHistory(range(5, 8), range2StartedAt, range2FinishedAt); + withSubRangeSuccessfulRepairHistory(range(8, 10), range2StartedAt, range2FinishedAt); + + assertNewVnodeStates(newUnrepairedState(range(1, 5)), + newUnrepairedState(range(5, 10))); + assertNewSubRangeStates(newState(range(1, 5), range1StartedAt, range1FinishedAt, (range1FinishedAt-range1StartedAt) + (range1FinishedAt-range1StartedAt)), + newState(range(5, 10), range2StartedAt, range2FinishedAt, (range2FinishedAt-range2StartedAt) + (range2FinishedAt-range2StartedAt))); + } + + @Test + public void testWithSubRangeHistoryNoPreviousIsPartiallyRepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 5), node1, node2); + withRange(range(5, 10), node1, node2); + + long range1StartedAt = TimeUnit.DAYS.toMillis(10); + long range1FinishedAt = TimeUnit.DAYS.toMillis(10); + long range2StartedAt = TimeUnit.DAYS.toMillis(11); + long range2FinishedAt = TimeUnit.DAYS.toMillis(11); + + withSubRangeSuccessfulRepairHistory(range(1, 3), range1StartedAt, range1FinishedAt); + + withSubRangeSuccessfulRepairHistory(range(5, 8), range2StartedAt, range2FinishedAt); + withSubRangeSuccessfulRepairHistory(range(8, 10), range2StartedAt, range2FinishedAt); + + assertNewVnodeStates(newUnrepairedState(range(1, 5)), + newUnrepairedState(range(5, 10))); + assertNewSubRangeStates(newSubRangeState(range(1, 3), range1StartedAt, range1FinishedAt), + newSubRangeUnrepairedState(range(3, 5)), + newState(range(5, 10), range2StartedAt, range2FinishedAt)); + } + + @Test + public void testCalculateStateWithSubRangeHistoryIsPartiallyRepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 5), node1, node2); + withRange(range(5, 10), node1, node2); + + long range1StartedAt = TimeUnit.DAYS.toMillis(10); + long range1FinishedAt = TimeUnit.DAYS.toMillis(10); + long range2StartedAt = TimeUnit.DAYS.toMillis(11); + long range2FinishedAt = TimeUnit.DAYS.toMillis(11); + + withSubRangeSuccessfulRepairHistory(range(1, 3), range1StartedAt, range1FinishedAt); + + withSubRangeSuccessfulRepairHistory(range(5, 8), range2StartedAt, range2FinishedAt); + withSubRangeSuccessfulRepairHistory(range(8, 10), range2StartedAt, range2FinishedAt); + + assertVnodeStates(range1StartedAt, range2FinishedAt, newUnrepairedState(range(1, 5)), + newUnrepairedState(range(5, 10))); + assertSubRangeStates(range2FinishedAt, range1StartedAt, + newSubRangeState(range(1, 3), range1StartedAt, range1FinishedAt), + newSubRangeUnrepairedState(range(3, 5)), + newState(range(5, 10), range2StartedAt, range2FinishedAt)); + } + + @Test + public void testCalculateClusterWideStateWithSubRangeHistoryIsPartiallyRepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 5), node1, node2); + withRange(range(5, 10), node1, node2); + + long range1StartedAt = TimeUnit.DAYS.toMillis(10); + long range1FinishedAt = TimeUnit.DAYS.toMillis(10); + long range2StartedAt = TimeUnit.DAYS.toMillis(11); + long range2FinishedAt = TimeUnit.DAYS.toMillis(11); + + withSubRangeSuccessfulRepairHistory(range(1, 3), range1StartedAt, range1FinishedAt); + + withSubRangeSuccessfulRepairHistory(range(5, 8), range2StartedAt, range2FinishedAt); + withSubRangeSuccessfulRepairHistory(range(8, 10), range2StartedAt, range2FinishedAt); + + assertClusterWideVnodeStates(range1StartedAt, range2FinishedAt, newUnrepairedState(range(1, 5)), + newUnrepairedState(range(5, 10))); + assertClusterWideSubRangeStates(range2FinishedAt, range1StartedAt, + newSubRangeState(range(1, 3), range1StartedAt, range1FinishedAt), + newSubRangeUnrepairedState(range(3, 5)), + newState(range(5, 10), range2StartedAt, range2FinishedAt)); + } + + @Test + public void testWithSubRangeHistoryAndPreviousIsPartiallyRepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + long firstStartedAt = TimeUnit.DAYS.toMillis(8); + long firstFinishedAt = TimeUnit.DAYS.toMillis(8); + long range1StartedAt = TimeUnit.DAYS.toMillis(10); + long range1FinishedAt = TimeUnit.DAYS.toMillis(10); + long range2StartedAt = TimeUnit.DAYS.toMillis(11); + long range2FinishedAt = TimeUnit.DAYS.toMillis(11); + + withRange(range(1, 5), node1, node2); + withRange(range(5, 10), node1, node2); + + withSubRangeSuccessfulRepairHistory(range(1, 3), range1StartedAt, range1FinishedAt); + + withSubRangeSuccessfulRepairHistory(range(5, 8), range2StartedAt, range2FinishedAt); + withSubRangeSuccessfulRepairHistory(range(8, 10), range2StartedAt, range2FinishedAt); + + RepairStateSnapshot previousSnapshot = snapshot(firstStartedAt, + newState(range(1, 5), firstStartedAt, firstFinishedAt), + newState(range(5, 10), firstStartedAt, firstFinishedAt)); + + assertNewVnodeStates(previousSnapshot, + newState(range(1, 5), firstStartedAt, firstFinishedAt), + newState(range(5, 10), firstStartedAt, firstFinishedAt)); + + assertNewSubRangeStates(previousSnapshot, + newSubRangeState(range(1, 3), range1StartedAt, range1FinishedAt), + newSubRangeState(range(3, 5), firstStartedAt, VnodeRepairState.UNREPAIRED), + newSubRangeState(range(5, 10), range2StartedAt, range2FinishedAt)); + } + + @Test + public void testWithHistoryNoPreviousIsPartiallyRepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 2), node1, node2); + withRange(range(2, 3), node1, node2); + + long range1StartedAt = 1; + long range1FinishedAt = 2; + long range2StartedAt = 3; + + withSuccessfulRepairHistory(range(1, 2), range1StartedAt, range1FinishedAt); + withFailedRepairHistory(range(2, 3), range2StartedAt); + + assertNewStateSameForVnodeAndSubrange(newState(range(1, 2), range1StartedAt, range1FinishedAt), + newUnrepairedState(range(2, 3))); + } + + @Test + public void testCalculateStateWithHistoryIsPartiallyRepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 2), node1, node2); + withRange(range(2, 3), node1, node2); + + long range1StartedAt = 1; + long range1FinishedAt = 2; + long range2StartedAt = 3; + + withSuccessfulRepairHistory(range(1, 2), range1StartedAt, range1FinishedAt); + withFailedRepairHistory(range(2, 3), range2StartedAt); + + assertStateSameForVnodeAndSubrange(range2StartedAt, range1StartedAt, newState(range(1, 2), range1StartedAt, range1FinishedAt), + newUnrepairedState(range(2, 3))); + } + + @Test + public void testCalculateClusterWideStateWithHistoryIsPartiallyRepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + withRange(range(1, 2), node1, node2); + withRange(range(2, 3), node1, node2); + + long range1StartedAt = 1; + long range1FinishedAt = 2; + long range2StartedAt = 3; + + withSuccessfulRepairHistory(range(1, 2), range1StartedAt, range1FinishedAt); + withFailedRepairHistory(range(2, 3), range2StartedAt); + + assertClusterWideStateSameForVnodeAndSubrange(range2StartedAt, range1StartedAt, newState(range(1, 2), + range1StartedAt, range1FinishedAt), newUnrepairedState(range(2, 3))); + } + + @Test + public void testWithOldHistoryNoPreviousIsPartiallyRepaired() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + DriverNode node3 = withNode("127.0.0.3"); + + long range1StartedAt = 1; + long range1FinishedAt = 2; + long range2StartedAt = 3; + long range2FinishedAt = 4; + + withRange(range(1, 2), node1, node2); + withRange(range(2, 3), node1, node3); + withSuccessfulRepairHistory(range(2, 3), range2StartedAt, range2FinishedAt); // Previous replication + + replaceRange(range(2, 3), range(2, 3), node1, node2); + + withSuccessfulRepairHistory(range(1, 2), range1StartedAt, range1FinishedAt); + + assertNewStateSameForVnodeAndSubrange(newState(range(1, 2), range1StartedAt, range1FinishedAt), + newUnrepairedState(range(2, 3))); + } + + @Test + public void testWithHistoryAndPreviousAfterScaleOut() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + DriverNode node3 = withNode("127.0.0.3"); + + withRange(range(1, 4), node1, node2); + withRange(range(5, 0), node1, node2); + + replaceRange(range(1, 4), range(1, 2), node1, node3); + + RepairStateSnapshot previousSnapshot = snapshot(1234L, + newState(range(1, 4), 1234L, 1235L), + newState(range(5, 0), 1236L, 1237L)); + + assertNewStateSameForVnodeAndSubrange(previousSnapshot, + newState(range(1, 2), 1234L, VnodeRepairState.UNREPAIRED), + newState(range(5, 0), 1236L, 1237L)); + } + + @Test + public void testWithHistoryAndPreviousOnlyIteratesOverDiff() throws UnknownHostException + { + DriverNode node1 = withNode("127.0.0.1"); + DriverNode node2 = withNode("127.0.0.2"); + + LongTokenRange longTokenRange1 = range(1, 2); + LongTokenRange longTokenRange2 = range(2, 3); + withRange(longTokenRange1, node1, node2); + withRange(longTokenRange2, node1, node2); + ImmutableSet replicas = ImmutableSet.of(node1, node2); + + Map> tokenToHostMap = new HashMap<>(); + tokenToHostMap.put(longTokenRange1, replicas); + tokenToHostMap.put(longTokenRange2, replicas); + + long range1RepairedAt = 1; + long range2RepairedAt = 2; + RepairEntry repairEntry1 = new RepairEntry(longTokenRange1, range1RepairedAt, range1RepairedAt, replicas, "SUCCESS"); + RepairEntry repairEntry2 = new RepairEntry(longTokenRange2, range2RepairedAt, range2RepairedAt, replicas, "SUCCESS"); + List firstIterateRepairEntries = new ArrayList<>(); + firstIterateRepairEntries.add(repairEntry1); + firstIterateRepairEntries.add(repairEntry2); + + when(mockReplicationState.getTokenRangeToReplicas(eq(TABLE_REFERENCE), eq(myMockNode))).thenReturn(tokenToHostMap); + repairHistoryProvider = mock(RepairHistoryProvider.class); + when(repairHistoryProvider.iterate(eq(myMockNode), eq(TABLE_REFERENCE), any(long.class), any(Predicate.class))).thenReturn( + firstIterateRepairEntries.iterator()); + + assertNewVnodeStates(newState(range(1, 2), range1RepairedAt, range1RepairedAt), + newState(range(2, 3), range2RepairedAt, range2RepairedAt)); + + // Check that vnodes keep their states from old snapshot even if iterator is empty + long firstSnapshotCreatedAt = 3; + RepairStateSnapshot firstRepairStateSnapshot = snapshot(range1RepairedAt, firstSnapshotCreatedAt, + newState(range(1, 2), range1RepairedAt, range1RepairedAt), + newState(range(2, 3), range2RepairedAt, range2RepairedAt)); + List secondIterateRepairEntries = new ArrayList<>(); + + when(repairHistoryProvider.iterate(eq(myMockNode), eq(TABLE_REFERENCE), any(long.class), eq(firstSnapshotCreatedAt), + any(Predicate.class))).thenReturn(secondIterateRepairEntries.iterator()); + + assertNewVnodeStates(firstRepairStateSnapshot, newState(range(1, 2), range1RepairedAt, range1RepairedAt), + newState(range(2, 3), range2RepairedAt, range2RepairedAt)); + + // Check that vnodes get updated for the new repair entries and old are kept from old snapshot + long secondSnapshotCreatedAt = 5; + RepairStateSnapshot secondRepairStateSnapshot = snapshot(range1RepairedAt, secondSnapshotCreatedAt, + newState(range(1, 2), range1RepairedAt, range1RepairedAt), + newState(range(2, 3), range2RepairedAt, range2RepairedAt)); + long updateRange1RepairedAt = 4; + RepairEntry repairEntry3 = new RepairEntry(longTokenRange1, updateRange1RepairedAt, updateRange1RepairedAt, replicas, "SUCCESS"); + List thirdIterateRepairEntries = new ArrayList<>(); + thirdIterateRepairEntries.add(repairEntry3); + + when(repairHistoryProvider.iterate(eq(myMockNode), eq(TABLE_REFERENCE), any(long.class), eq(secondSnapshotCreatedAt), + any(Predicate.class))).thenReturn(thirdIterateRepairEntries.iterator()); + assertNewVnodeStates(secondRepairStateSnapshot, newState(range(1, 2), updateRange1RepairedAt, updateRange1RepairedAt), + newState(range(2, 3), range2RepairedAt, range2RepairedAt)); + } + + private RepairStateSnapshot snapshot(long repairedAt, VnodeRepairState... states) + { + return snapshot(repairedAt, repairedAt, states); + } + + private RepairStateSnapshot snapshot(long repairedAt, long createdAt, VnodeRepairState... states) + { + return RepairStateSnapshot.newBuilder() + .withLastCompletedAt(repairedAt) + .withReplicaRepairGroups(Collections.emptyList()) + .withVnodeRepairStates(vnodeRepairStates(states)) + .withCreatedAt(createdAt) + .build(); + } + + private VnodeRepairStates vnodeRepairStates(VnodeRepairState... states) + { + return VnodeRepairStatesImpl.newBuilder(Arrays.asList(states)).build(); + } + + private void withSubRangeSuccessfulRepairHistory(LongTokenRange range, long startedAt, long finishedAt) + { + ImmutableSet replicas = getKnownReplicasForSubRange(range); + withRepairHistory(range, startedAt, finishedAt, replicas, "SUCCESS"); + } + + private void withSuccessfulRepairHistory(LongTokenRange range, long startedAt, long finishedAt) + { + ImmutableSet replicas = getKnownReplicas(range); + withRepairHistory(range, startedAt, finishedAt, replicas, "SUCCESS"); + } + + private void withFailedRepairHistory(LongTokenRange range, long startedAt) + { + ImmutableSet replicas = getKnownReplicas(range); + withRepairHistory(range, startedAt, VnodeRepairState.UNREPAIRED, replicas, "FAILED"); + } + + private void withRepairHistory(LongTokenRange range, long startedAt, long finishedAt, ImmutableSet replicas, String status) + { + RepairEntry repairEntry = new RepairEntry(range, startedAt, finishedAt, replicas, status); + repairHistory.add(repairEntry); + } + + private VnodeRepairState newUnrepairedState(LongTokenRange range) + { + return newState(range, VnodeRepairState.UNREPAIRED, VnodeRepairState.UNREPAIRED); + } + + private VnodeRepairState newState(LongTokenRange range, long startedAt, long finishedAt) + { + return new VnodeRepairState(range, getKnownReplicas(range), startedAt, finishedAt); + } + + private VnodeRepairState newState(LongTokenRange range, long startedAt, long finishedAt, long repairTime) + { + return new VnodeRepairState(range, getKnownReplicas(range), startedAt, finishedAt, repairTime); + } + + private VnodeRepairState newSubRangeUnrepairedState(LongTokenRange range) + { + return newSubRangeState(range, VnodeRepairState.UNREPAIRED, VnodeRepairState.UNREPAIRED); + } + + private VnodeRepairState newSubRangeState(LongTokenRange range, long startedAt, long finishedAt) + { + return new VnodeRepairState(range, getKnownReplicasForSubRange(range), startedAt, finishedAt); + } + + private ImmutableSet getKnownReplicasForSubRange(LongTokenRange range) + { + ImmutableSet replicas = tokenToNodeMap.get(range); + if (replicas == null) + { + for (LongTokenRange vnode : tokenToNodeMap.keySet()) + { + if (vnode.isCovering(range)) + { + replicas = tokenToNodeMap.get(vnode); + break; + } + } + + assertThat(replicas).isNotNull(); + } + + return replicas; + } + + private ImmutableSet getKnownReplicas(LongTokenRange range) + { + ImmutableSet replicas = tokenToNodeMap.get(range); + assertThat(replicas).isNotNull(); + return replicas; + } + + private LongTokenRange range(long start, long end) + { + return new LongTokenRange(start, end); + } + + private void withRange(LongTokenRange range, DriverNode... replicas) + { + tokenToNodeMap.put(range, ImmutableSet.copyOf(replicas)); + } + + private void replaceRange(LongTokenRange previousRange, LongTokenRange newRange, DriverNode... newReplicas) + { + tokenToNodeMap.remove(previousRange); + withRange(newRange, newReplicas); + } + + private DriverNode withNode(String inetAddress) throws UnknownHostException + { + DriverNode node = mock(DriverNode.class); + InetAddress nodeAddress = InetAddress.getByName(inetAddress); + when(node.getPublicAddress()).thenReturn(nodeAddress); + return node; + } + + private void assertNewVnodeStates(VnodeRepairState... states) + { + assertNewVnodeStates(null, states); + } + + private void assertNewVnodeStates(RepairStateSnapshot previous, VnodeRepairState... states) + { + VnodeRepairStateFactory vnodeRepairStateFactory = new VnodeRepairStateFactoryImpl(mockReplicationState, repairHistoryProvider, false); + assertNewState(vnodeRepairStateFactory, previous, VnodeRepairStatesImpl.class, states); + } + + private void assertNewSubRangeStates(VnodeRepairState... states) + { + assertNewSubRangeStates(null, states); + } + + private void assertNewSubRangeStates(RepairStateSnapshot previous, VnodeRepairState... states) + { + VnodeRepairStateFactory subRangeRepairStateFactory = new VnodeRepairStateFactoryImpl(mockReplicationState, repairHistoryProvider, true); + assertNewState(subRangeRepairStateFactory, previous, SubRangeRepairStates.class, states); + } + + private void assertNewStateSameForVnodeAndSubrange(VnodeRepairState... states) + { + assertNewStateSameForVnodeAndSubrange(null, states); + } + + private void assertNewStateSameForVnodeAndSubrange(RepairStateSnapshot previous, VnodeRepairState... states) + { + assertNewVnodeStates(previous, states); + assertNewSubRangeStates(previous, states); + } + + private void assertNewState(VnodeRepairStateFactory factory, RepairStateSnapshot previous, Class expectedClass, VnodeRepairState... expectedStates) + { + assertNewState(factory, previous, expectedClass, Arrays.asList(expectedStates)); + } + + private void assertNewState(VnodeRepairStateFactory factory, RepairStateSnapshot previous, Class expectedClass, Collection expectedStates) + { + VnodeRepairStates newStates = factory.calculateNewState(myMockNode, TABLE_REFERENCE, previous, System.currentTimeMillis()); + assertThat(newStates).isInstanceOf(expectedClass); + + Collection vnodeRepairStates = newStates.getVnodeRepairStates(); + assertThat(vnodeRepairStates).containsOnlyElementsOf(expectedStates); + } + + private void assertClusterWideStateSameForVnodeAndSubrange(long to, long from, VnodeRepairState... states) + { + assertClusterWideVnodeStates(to, from, states); + assertClusterWideSubRangeStates(to, from, states); + } + + private void assertClusterWideVnodeStates(long to, long from, VnodeRepairState... states) + { + VnodeRepairStateFactory vnodeRepairStateFactory = new VnodeRepairStateFactoryImpl(mockReplicationState, repairHistoryProvider, false); + assertClusterWideState(vnodeRepairStateFactory, to, from, VnodeRepairStatesImpl.class, states); + } + + private void assertClusterWideSubRangeStates(long to, long from, VnodeRepairState... states) + { + VnodeRepairStateFactory subRangeRepairStateFactory = new VnodeRepairStateFactoryImpl(mockReplicationState, repairHistoryProvider, true); + assertClusterWideState(subRangeRepairStateFactory, to, from, SubRangeRepairStates.class, states); + } + + private void assertClusterWideState(VnodeRepairStateFactory factory, long to, long from, Class expectedClass, VnodeRepairState... expectedStates) + { + assertClusterWideState(factory, to, from, expectedClass, Arrays.asList(expectedStates)); + } + + private void assertClusterWideState(VnodeRepairStateFactory factory, long to, long from, Class expectedClass, Collection expectedStates) + { + VnodeRepairStates states = factory.calculateClusterWideState(myMockNode, TABLE_REFERENCE, to, from); + assertThat(states).isInstanceOf(expectedClass); + + Collection vnodeRepairStates = states.getVnodeRepairStates(); + assertThat(vnodeRepairStates).containsOnlyElementsOf(expectedStates); + } + + private void assertStateSameForVnodeAndSubrange(long to, long from, VnodeRepairState... states) + { + assertVnodeStates(to, from, states); + assertSubRangeStates(to, from, states); + } + + private void assertVnodeStates(long to, long from, VnodeRepairState... states) + { + VnodeRepairStateFactory vnodeRepairStateFactory = new VnodeRepairStateFactoryImpl(mockReplicationState, repairHistoryProvider, false); + assertState(vnodeRepairStateFactory, to, from, VnodeRepairStatesImpl.class, states); + } + + private void assertSubRangeStates(long to, long from, VnodeRepairState... states) + { + VnodeRepairStateFactory subRangeRepairStateFactory = new VnodeRepairStateFactoryImpl(mockReplicationState, repairHistoryProvider, true); + assertState(subRangeRepairStateFactory, to, from, SubRangeRepairStates.class, states); + } + + private void assertState(VnodeRepairStateFactory factory, long to, long from, Class expectedClass, VnodeRepairState... expectedStates) + { + assertState(factory, to, from, expectedClass, Arrays.asList(expectedStates)); + } + + private void assertState(VnodeRepairStateFactory factory, long to, long from, Class expectedClass, Collection expectedStates) + { + VnodeRepairStates newStates = factory.calculateClusterWideState(myMockNode, TABLE_REFERENCE, to, from); + assertThat(newStates).isInstanceOf(expectedClass); + + Collection vnodeRepairStates = newStates.getVnodeRepairStates(); + assertThat(vnodeRepairStates).containsOnlyElementsOf(expectedStates); + } + + private class MockedRepairHistoryProvider implements RepairHistoryProvider + { + private final TableReference myTableReference; + private final Node myNode; + + public MockedRepairHistoryProvider(Node node, TableReference tableReference) + { + myNode = node; + myTableReference = tableReference; + } + + @Override + public Iterator iterate(Node node, TableReference tableReference, long to, Predicate predicate) + { + assertThat(tableReference).isEqualTo(myTableReference); + + if (node != null) + { + assertThat(node).isEqualTo(myNode); + } + + return new MockedRepairEntryIterator(repairHistory.iterator(), predicate, to, -1L); + } + + @Override + public Iterator iterate(Node node, TableReference tableReference, long to, long from, Predicate predicate) + { + assertThat(tableReference).isEqualTo(myTableReference); + + if (node != null) + { + assertThat(node).isEqualTo(myNode); + } + + return new MockedRepairEntryIterator(repairHistory.iterator(), predicate, to, from); + } + } + + private static class MockedRepairEntryIterator extends AbstractIterator + { + private final Iterator myBaseIterator; + private final Predicate myPredicate; + private final long myTo; + private final long myFrom; + + MockedRepairEntryIterator(Iterator baseIterator, Predicate predicate, long to, long from) + { + myBaseIterator = baseIterator; + myPredicate = predicate; + myTo = to; + myFrom = from; + } + + @Override + protected RepairEntry computeNext() + { + while(myBaseIterator.hasNext()) + { + RepairEntry next = myBaseIterator.next(); + if (next.getFinishedAt() <= myTo) + { + if (myFrom != -1L) + { + if (next.getStartedAt() >= myFrom && myPredicate.apply(next)) + { + return next; + } + } + else + { + if (myPredicate.apply(next)) + { + return next; + } + } + } + } + + return endOfData(); + } + } +} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestVnodeRepairStateSummarizer.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestVnodeRepairStateSummarizer.java new file mode 100644 index 00000000..b05292ae --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/vnode/TestVnodeRepairStateSummarizer.java @@ -0,0 +1,388 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode; + +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import com.google.common.collect.ImmutableSet; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +@RunWith(MockitoJUnitRunner.class) +public class TestVnodeRepairStateSummarizer +{ + @Mock + DriverNode mockNode; + + @Test + public void summarizeSingleTokenNoPartial() + { + VnodeRepairState baseVnode = withVnode(500, 3000, dateToTimestamp("2020-03-12T16:00:00")); + List baseVnodes = Collections.singletonList(baseVnode); + + List actualVnodeRepairStates = summarize(baseVnode); + + assertThat(actualVnodeRepairStates).isEqualTo(baseVnodes); + } + + @Test + public void summarizeSingleTokenFullPartial() + { + VnodeRepairState baseVnode = withVnode(500, 3000, dateToTimestamp("2020-03-12T16:00:00")); + VnodeRepairState partialVnode = withVnode(500, 3000, dateToTimestamp("2020-03-13T16:00:00")); + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode); + + assertThat(actualVnodeRepairStates).containsExactly(partialVnode); + } + + @Test + public void summarizeSingleTokenMultiplePartialSequential() + { + VnodeRepairState baseVnode = withVnode(500, 3000, dateToTimestamp("2020-03-12T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(500, 1800, dateToTimestamp("2020-03-13T15:30:00")); + VnodeRepairState partialVnode2 = withVnode(1800, 2500, dateToTimestamp("2020-03-13T16:15:00")); + VnodeRepairState partialVnode3 = withVnode(2500, 3000, dateToTimestamp("2020-03-13T16:29:59")); + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1, partialVnode3); + + assertThat(actualVnodeRepairStates).containsExactly(withVnode(500, 3000, dateToTimestamp("2020-03-13T15:30:00"))); + } + + @Test + public void summarizeMultipleAdjacentTokenMultiplePartialSequential() + { + VnodeRepairState baseVnode1 = withVnode(500, 3000, dateToTimestamp("2020-03-12T16:00:00")); + VnodeRepairState baseVnode2 = withVnode(3000, 5500, dateToTimestamp("2020-03-12T16:05:00")); + + // Base1 + VnodeRepairState partialVnode1 = withVnode(500, 1800, dateToTimestamp("2020-03-13T15:30:00")); + VnodeRepairState partialVnode2 = withVnode(1800, 2500, dateToTimestamp("2020-03-13T16:15:00")); + VnodeRepairState partialVnode3 = withVnode(2500, 3000, dateToTimestamp("2020-03-13T16:29:59")); + + // Base2 + VnodeRepairState partialVnode4 = withVnode(3000, 3800, dateToTimestamp("2020-03-13T15:45:00")); + VnodeRepairState partialVnode5 = withVnode(3800, 4500, dateToTimestamp("2020-03-13T16:15:00")); + VnodeRepairState partialVnode6 = withVnode(4500, 5500, dateToTimestamp("2020-03-13T16:29:59")); + + List actualVnodeRepairStates = summarize(Arrays.asList(baseVnode1, baseVnode2), + partialVnode2, partialVnode1, partialVnode3, partialVnode4, partialVnode5, partialVnode6); + + assertThat(actualVnodeRepairStates).containsExactlyInAnyOrder( + withVnode(500, 3000, dateToTimestamp("2020-03-13T15:30:00")) + ,withVnode(3000, 5500, dateToTimestamp("2020-03-13T15:45:00")) + ); + } + + @Test + public void summarizeSingleTokenMultiplePartialSequentialMoreThanOneHour() + { + VnodeRepairState baseVnode = withVnode(500, 3000, dateToTimestamp("2020-03-12T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(500, 1800, dateToTimestamp("2020-03-13T15:30:00")); + VnodeRepairState partialVnode2 = withVnode(1800, 2500, dateToTimestamp("2020-03-13T16:15:00")); + VnodeRepairState partialVnode3 = withVnode(2500, 2800, dateToTimestamp("2020-03-13T16:30:01")); + VnodeRepairState partialVnode4 = withVnode(2800, 3000, dateToTimestamp("2020-03-13T16:31:00")); + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1, partialVnode3, partialVnode4); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(500, 2500, dateToTimestamp("2020-03-13T15:30:00")), + withVnode(2500, 3000, dateToTimestamp("2020-03-13T16:30:01")) + ); + } + + @Test + public void summarizeSingleTokenMultiplePartialNonSequential() + { + VnodeRepairState baseVnode = withVnode(500, 3000, dateToTimestamp("2020-03-12T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(600, 1800, dateToTimestamp("2020-03-13T16:00:00")); + VnodeRepairState partialVnode2 = withVnode(2000, 2800, dateToTimestamp("2020-03-13T16:05:00")); + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(500, 600, dateToTimestamp("2020-03-12T16:00:00")), + withVnode(600, 1800, dateToTimestamp("2020-03-13T16:00:00")), + withVnode(1800, 2000, dateToTimestamp("2020-03-12T16:00:00")), + withVnode(2000, 2800, dateToTimestamp("2020-03-13T16:05:00")), + withVnode(2800, 3000, dateToTimestamp("2020-03-12T16:00:00")) + ); + } + + @Test + public void summarizeSingleTokenMultiplePartialOverlapping() + { + VnodeRepairState baseVnode = withVnode(500, 3000, dateToTimestamp("2020-03-12T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(600, 1800, dateToTimestamp("2020-03-13T16:00:00")); + VnodeRepairState partialVnode2 = withVnode(1600, 2800, dateToTimestamp("2020-03-13T16:05:00")); + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(500, 600, dateToTimestamp("2020-03-12T16:00:00")), + withVnode(600, 2800, dateToTimestamp("2020-03-13T16:00:00")), + withVnode(2800, 3000, dateToTimestamp("2020-03-12T16:00:00")) + ); + } + + @Test + public void summarizeSingleTokenMultiplePartialOverlappingMoreThanOneHour() + { + VnodeRepairState baseVnode = withVnode(500, 3000, dateToTimestamp("2020-03-12T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(600, 1800, dateToTimestamp("2020-03-13T16:00:00")); + VnodeRepairState partialVnode2 = withVnode(1600, 2800, dateToTimestamp("2020-03-13T17:00:01")); + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(500, 600, dateToTimestamp("2020-03-12T16:00:00")), + withVnode(600, 1600, dateToTimestamp("2020-03-13T16:00:00")), + withVnode(1600, 2800, dateToTimestamp("2020-03-13T17:00:01")), + withVnode(2800, 3000, dateToTimestamp("2020-03-12T16:00:00")) + ); + } + + @Test + public void summarizeSingleTokenMultiplePartialOverlappingMoreThanOneHourBefore() + { + VnodeRepairState baseVnode = withVnode(500, 3000, dateToTimestamp("2020-03-12T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(600, 1800, dateToTimestamp("2020-03-13T17:00:01")); + VnodeRepairState partialVnode2 = withVnode(1600, 2800, dateToTimestamp("2020-03-13T16:00:00")); + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(500, 600, dateToTimestamp("2020-03-12T16:00:00")), + withVnode(600, 1800, dateToTimestamp("2020-03-13T17:00:01")), + withVnode(1800, 2800, dateToTimestamp("2020-03-13T16:00:00")), + withVnode(2800, 3000, dateToTimestamp("2020-03-12T16:00:00")) + ); + } + + @Test + public void summarizeSingleTokenMultiplePartialWrapAroundSequential() + { + VnodeRepairState baseVnode = withVnode(30000, 15000, dateToTimestamp("2020-03-12T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(40000, 50000, dateToTimestamp("2020-03-13T16:00:00")); + VnodeRepairState partialVnode2 = withVnode(-5000, 10000, dateToTimestamp("2020-03-13T16:05:00")); + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(30000, 40000, dateToTimestamp("2020-03-12T16:00:00")), + withVnode(40000, 50000, dateToTimestamp("2020-03-13T16:00:00")), + withVnode(50000, -5000, dateToTimestamp("2020-03-12T16:00:00")), + withVnode(-5000, 10000, dateToTimestamp("2020-03-13T16:05:00")), + withVnode(10000, 15000, dateToTimestamp("2020-03-12T16:00:00")) + ); + } + + @Test + public void summarizeSingleTokenMultiplePartialOverlappingWrapAround() + { + VnodeRepairState baseVnode = withVnode(5000, 3000, dateToTimestamp("2020-03-12T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(600, 1800, dateToTimestamp("2020-03-13T16:00:00")); + VnodeRepairState partialVnode2 = withVnode(1600, 2800, dateToTimestamp("2020-03-13T16:05:00")); + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(5000, 600, dateToTimestamp("2020-03-12T16:00:00")), + withVnode(600, 2800, dateToTimestamp("2020-03-13T16:00:00")), + withVnode(2800, 3000, dateToTimestamp("2020-03-12T16:00:00")) + ); + } + + @Test + public void summarizeMultipleCoveringAndIntersectingRanges() + { + VnodeRepairState baseVnode = withVnode(100, 3000, dateToTimestamp("2020-03-12T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(100, 500, dateToTimestamp("2020-03-13T16:00:00")); // 100 -> 250 + VnodeRepairState partialVnode2 = withVnode(200, 300, dateToTimestamp("2020-03-12T16:05:00")); + VnodeRepairState partialVnode3 = withVnode(250, 450, dateToTimestamp("2020-03-12T16:05:00")); + VnodeRepairState partialVnode4 = withVnode(250, 600, dateToTimestamp("2020-03-13T17:05:00")); // 250 -> 600 + VnodeRepairState partialVnode5 = withVnode(400, 500, dateToTimestamp("2020-03-12T16:05:00")); + VnodeRepairState partialVnode6 = withVnode(400, 3000, dateToTimestamp("2020-03-12T16:05:00")); // 600 -> 3000 + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1, partialVnode3, partialVnode4, partialVnode5, partialVnode6); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(100, 250, dateToTimestamp("2020-03-13T16:00:00")), + withVnode(250, 600, dateToTimestamp("2020-03-13T17:05:00")), + withVnode(600, 3000, dateToTimestamp("2020-03-12T16:05:00")) + ); + } + + @Test + public void summarizeMultipleIntersectingRanges() + { + VnodeRepairState baseVnode = withVnode(100, 3000, dateToTimestamp("2020-03-12T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(200, 300, dateToTimestamp("2020-03-12T16:00:00")); + VnodeRepairState partialVnode2 = withVnode(200, 400, dateToTimestamp("2020-03-12T16:05:00")); + VnodeRepairState partialVnode3 = withVnode(200, 500, dateToTimestamp("2020-03-13T16:00:00")); // 100 -> 500 + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1, partialVnode3); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(100, 200, dateToTimestamp("2020-03-12T16:00:00")), + withVnode(200, 500, dateToTimestamp("2020-03-13T16:00:00")), + withVnode(500, 3000, dateToTimestamp("2020-03-12T16:00:00")) + ); + } + + @Test + public void summarizeMultipleCoveringAndIntersectingRangesWithLaterRepairedAt() + { + VnodeRepairState baseVnode = withVnode(100, 3000, dateToTimestamp("2020-03-12T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(100, 500, dateToTimestamp("2020-03-13T16:00:00")); // 100 -> 200 + VnodeRepairState partialVnode2 = withVnode(200, 300, dateToTimestamp("2020-03-14T16:05:00")); // 200 -> 500 (1) + VnodeRepairState partialVnode3 = withVnode(250, 450, dateToTimestamp("2020-03-14T16:05:00")); + VnodeRepairState partialVnode4 = withVnode(250, 600, dateToTimestamp("2020-03-13T17:05:00")); // 500 -> 600 + VnodeRepairState partialVnode5 = withVnode(400, 500, dateToTimestamp("2020-03-14T16:05:00")); // 200 -> 500 (2) + VnodeRepairState partialVnode6 = withVnode(400, 3000, dateToTimestamp("2020-03-13T16:05:00")); // 600 -> 3000 + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1, partialVnode3, partialVnode4, partialVnode5, partialVnode6); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(100, 200, dateToTimestamp("2020-03-13T16:00:00")), + withVnode(200, 500, dateToTimestamp("2020-03-14T16:05:00")), + withVnode(500, 600, dateToTimestamp("2020-03-13T17:05:00")), + withVnode(600, 3000, dateToTimestamp("2020-03-13T16:05:00")) + ); + } + + @Test + public void summarizeMultipleCoveringAndIntersectingRangesWithEarlierRepairedAt() + { + VnodeRepairState baseVnode = withVnode(100, 3000, dateToTimestamp("2020-03-15T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(100, 500, dateToTimestamp("2020-03-13T16:00:00")); + VnodeRepairState partialVnode2 = withVnode(200, 300, dateToTimestamp("2020-03-14T16:05:00")); + VnodeRepairState partialVnode3 = withVnode(250, 450, dateToTimestamp("2020-03-14T16:05:00")); + VnodeRepairState partialVnode4 = withVnode(250, 600, dateToTimestamp("2020-03-13T17:05:00")); + VnodeRepairState partialVnode5 = withVnode(400, 500, dateToTimestamp("2020-03-14T16:05:00")); + VnodeRepairState partialVnode6 = withVnode(400, 3000, dateToTimestamp("2020-03-13T16:05:00")); + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1, partialVnode3, partialVnode4, partialVnode5, partialVnode6); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(100, 3000, dateToTimestamp("2020-03-15T16:00:00")) + ); + } + + @Test + public void summarizeWraparoundMultipleCoveringAndIntersectingRanges() + { + VnodeRepairState baseVnode = withVnode(5000, 3000, dateToTimestamp("2020-03-12T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(5000, -12000, dateToTimestamp("2020-03-13T16:00:00")); // 5000 -> 7000 + VnodeRepairState partialVnode2 = withVnode(6000, -14000, dateToTimestamp("2020-03-12T16:05:00")); + VnodeRepairState partialVnode3 = withVnode(7000, -13000, dateToTimestamp("2020-03-12T16:05:00")); + VnodeRepairState partialVnode4 = withVnode(7000, -10000, dateToTimestamp("2020-03-13T17:05:00")); // 7000 -> -10000 + VnodeRepairState partialVnode5 = withVnode(-15000, -12000, dateToTimestamp("2020-03-12T16:05:00")); + VnodeRepairState partialVnode6 = withVnode(-15000, 3000, dateToTimestamp("2020-03-12T16:05:00")); // -10000 -> 3000 + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1, partialVnode3, partialVnode4, partialVnode5, partialVnode6); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(5000, 7000, dateToTimestamp("2020-03-13T16:00:00")), + withVnode(7000, -10000, dateToTimestamp("2020-03-13T17:05:00")), + withVnode(-10000, 3000, dateToTimestamp("2020-03-12T16:05:00")) + ); + } + + @Test + public void summarizeWraparoundMultipleCoveringAndIntersectingRangesWithLaterRepairedAt() + { + VnodeRepairState baseVnode = withVnode(5000, 3000, dateToTimestamp("2020-03-12T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(5000, -12000, dateToTimestamp("2020-03-13T16:00:00")); // 5000 -> 6000 + VnodeRepairState partialVnode2 = withVnode(6000, -14000, dateToTimestamp("2020-03-14T16:05:00")); // 6000 -> -12000 + VnodeRepairState partialVnode3 = withVnode(7000, -13000, dateToTimestamp("2020-03-14T16:05:00")); + VnodeRepairState partialVnode4 = withVnode(7000, -10000, dateToTimestamp("2020-03-13T17:05:00")); // -12000 -> -10000 + VnodeRepairState partialVnode5 = withVnode(-15000, -12000, dateToTimestamp("2020-03-14T16:05:00")); + VnodeRepairState partialVnode6 = withVnode(-15000, 3000, dateToTimestamp("2020-03-13T16:05:00")); // -10000 -> 3000 + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1, partialVnode3, partialVnode4, partialVnode5, partialVnode6); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(5000, 6000, dateToTimestamp("2020-03-13T16:00:00")), + withVnode(6000, -12000, dateToTimestamp("2020-03-14T16:05:00")), + withVnode(-12000, -10000, dateToTimestamp("2020-03-13T17:05:00")), + withVnode(-10000, 3000, dateToTimestamp("2020-03-13T16:05:00")) + ); + } + + @Test + public void summarizeWraparoundMultipleCoveringAndIntersectingRangesWithEarlierRepairedAt() + { + VnodeRepairState baseVnode = withVnode(5000, 3000, dateToTimestamp("2020-03-15T16:00:00")); + + VnodeRepairState partialVnode1 = withVnode(5000, -12000, dateToTimestamp("2020-03-13T16:00:00")); // 5000 -> 6000 + VnodeRepairState partialVnode2 = withVnode(6000, -14000, dateToTimestamp("2020-03-14T16:05:00")); // 6000 -> -12000 + VnodeRepairState partialVnode3 = withVnode(7000, -13000, dateToTimestamp("2020-03-14T16:05:00")); + VnodeRepairState partialVnode4 = withVnode(7000, -10000, dateToTimestamp("2020-03-13T17:05:00")); // -12000 -> -10000 + VnodeRepairState partialVnode5 = withVnode(-15000, -12000, dateToTimestamp("2020-03-14T16:05:00")); + VnodeRepairState partialVnode6 = withVnode(-15000, 3000, dateToTimestamp("2020-03-13T16:05:00")); // -10000 -> 3000 + + List actualVnodeRepairStates = summarize(baseVnode, partialVnode2, partialVnode1, partialVnode3, partialVnode4, partialVnode5, partialVnode6); + + assertThat(actualVnodeRepairStates).containsExactly( + withVnode(5000, 3000, dateToTimestamp("2020-03-15T16:00:00")) + ); + } + + private List summarize(VnodeRepairState baseVnode, VnodeRepairState... partialVnodes) + { + return summarize(Collections.singletonList(baseVnode), partialVnodes); + } + + private List summarize(List baseVnodes, VnodeRepairState... partialVnodes) + { + return VnodeRepairStateSummarizer.summarizePartialVnodes(baseVnodes, Arrays.asList(partialVnodes)); + } + + private VnodeRepairState withVnode(long start, long end, long lastRepairedAt) + { + return new VnodeRepairState(new LongTokenRange(start, end), ImmutableSet.of(mockNode), lastRepairedAt); + } + + private long dateToTimestamp(String date) + { + return LocalDateTime.parse(date).toEpochSecond(ZoneOffset.UTC) * 1000; + } +} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/TestRepairedAt.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/TestRepairedAt.java new file mode 100644 index 00000000..ab027085 --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/TestRepairedAt.java @@ -0,0 +1,95 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.state; + +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.vnode.VnodeRepairStatesImpl; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairedAt; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairState; +import com.ericsson.bss.cassandra.ecchronos.core.state.VnodeRepairStates; +import com.google.common.collect.ImmutableSet; +import org.junit.Test; + +import java.util.Arrays; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.mock; + +public class TestRepairedAt +{ + @Test + public void testRepaired() + { + LongTokenRange range = new LongTokenRange(1, 2); + LongTokenRange range2 = new LongTokenRange(2, 3); + DriverNode node1 = mock(DriverNode.class); + + VnodeRepairState vnodeRepairState = new VnodeRepairState(range, ImmutableSet.of(node1), 1234L); + VnodeRepairState vnodeRepairState2 = new VnodeRepairState(range2, ImmutableSet.of(node1), 1235L); + + VnodeRepairStates vnodeRepairStates = VnodeRepairStatesImpl.newBuilder(Arrays.asList(vnodeRepairState, vnodeRepairState2)) + .build(); + + RepairedAt repairedAt = RepairedAt.generate(vnodeRepairStates); + + assertThat(repairedAt.isRepaired()).isTrue(); + assertThat(repairedAt.isPartiallyRepaired()).isFalse(); + assertThat(repairedAt.getMaxRepairedAt()).isEqualTo(1235L); + assertThat(repairedAt.getMinRepairedAt()).isEqualTo(1234L); + } + + @Test + public void testPartiallyRepaired() + { + LongTokenRange range = new LongTokenRange(1, 2); + LongTokenRange range2 = new LongTokenRange(2, 3); + DriverNode node1 = mock(DriverNode.class); + + VnodeRepairState vnodeRepairState = new VnodeRepairState(range, ImmutableSet.of(node1), 1234L); + VnodeRepairState vnodeRepairState2 = new VnodeRepairState(range2, ImmutableSet.of(node1), VnodeRepairState.UNREPAIRED); + + VnodeRepairStates vnodeRepairStates = VnodeRepairStatesImpl.newBuilder(Arrays.asList(vnodeRepairState, vnodeRepairState2)) + .build(); + + RepairedAt repairedAt = RepairedAt.generate(vnodeRepairStates); + + assertThat(repairedAt.isRepaired()).isFalse(); + assertThat(repairedAt.isPartiallyRepaired()).isTrue(); + assertThat(repairedAt.getMaxRepairedAt()).isEqualTo(1234L); + assertThat(repairedAt.getMinRepairedAt()).isEqualTo(VnodeRepairState.UNREPAIRED); + } + + @Test + public void testNotRepaired() + { + LongTokenRange range = new LongTokenRange(1, 2); + LongTokenRange range2 = new LongTokenRange(2, 3); + DriverNode node1 = mock(DriverNode.class); + + VnodeRepairState vnodeRepairState = new VnodeRepairState(range, ImmutableSet.of(node1), VnodeRepairState.UNREPAIRED); + VnodeRepairState vnodeRepairState2 = new VnodeRepairState(range2, ImmutableSet.of(node1), VnodeRepairState.UNREPAIRED); + + VnodeRepairStates vnodeRepairStates = VnodeRepairStatesImpl.newBuilder(Arrays.asList(vnodeRepairState, vnodeRepairState2)) + .build(); + + RepairedAt repairedAt = RepairedAt.generate(vnodeRepairStates); + + assertThat(repairedAt.isRepaired()).isFalse(); + assertThat(repairedAt.isPartiallyRepaired()).isFalse(); + assertThat(repairedAt.getMaxRepairedAt()).isEqualTo(VnodeRepairState.UNREPAIRED); + assertThat(repairedAt.getMinRepairedAt()).isEqualTo(VnodeRepairState.UNREPAIRED); + } +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/metadata/DriverNode.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/metadata/DriverNode.java index 6399e782..f7a3ee56 100644 --- a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/metadata/DriverNode.java +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/metadata/DriverNode.java @@ -64,6 +64,15 @@ public String getDatacenter() return node.getDatacenter(); } + /** + * Get the whole node object. + * + * @return the node. + */ + public Node getNode() + { + return node; + } /** * Check for equality. */ diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/HostStates.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/HostStates.java new file mode 100644 index 00000000..5ca373e6 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/HostStates.java @@ -0,0 +1,41 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; + +/** + * Interface used to determine node statuses. + */ +public interface HostStates +{ + /** + * Check if a host is up. + * + * @param node The node. + * @return True if the host is up. False will be returned if the state is unknown or if the host is down. + */ + boolean isUp(Node node); + + /** + * Check if a node is up. + * + * @param node The node. + * @return True if the node is up. False will be returned if the state is unknown or if the node is down. + */ + boolean isUp(DriverNode node); +} + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/PostUpdateHook.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/PostUpdateHook.java new file mode 100644 index 00000000..d58cb150 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/PostUpdateHook.java @@ -0,0 +1,29 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +/** + * Interface for injecting code to be executed after {@link RepairState#update()}. + */ +@FunctionalInterface +public interface PostUpdateHook +{ + /** + * Runs each time the {@link RepairState} is updated. + * + * @param repairStateSnapshot The current repair state snapshot + */ + void postUpdate(RepairStateSnapshot repairStateSnapshot); +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairEntry.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairEntry.java new file mode 100644 index 00000000..79f6b47c --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairEntry.java @@ -0,0 +1,141 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairStatus; +import java.util.Collections; +import java.util.Objects; +import java.util.Set; + +/** + * An entry from the repair history. + */ +public final class RepairEntry +{ + private final LongTokenRange myRange; + private final long myStartedAt; + private final Set myParticipants; + private final RepairStatus myStatus; + private final long myFinishedAt; + + /** + * Constructor. + * + * @param range Token range. + * @param startedAt Start timestamp. + * @param finishedAt End timestamp. + * @param participants Node participants. + * @param status The status. + */ + public RepairEntry(final LongTokenRange range, + final long startedAt, + final long finishedAt, + final Set participants, + final String status) + { + myRange = range; + myStartedAt = startedAt; + myFinishedAt = finishedAt; + myParticipants = Collections.unmodifiableSet(participants); + myStatus = RepairStatus.getFromStatus(status); + } + + /** + * Get range. + * + * @return LongTokenRange + */ + public LongTokenRange getRange() + { + return myRange; + } + + /** + * Get started at. + * + * @return long + */ + public long getStartedAt() + { + return myStartedAt; + } + + /** + * Get finished at. + * + * @return long + */ + public long getFinishedAt() + { + return myFinishedAt; + } + + /** + * Get participants. + * + * @return The participants + */ + public Set getParticipants() + { + return myParticipants; + } + + /** + * Get status. + * + * @return RepairStatus + */ + public RepairStatus getStatus() + { + return myStatus; + } + + /** + * Equality. + * + * @param o The object to test equality with. + * @return boolean + */ + @Override + public boolean equals(final Object o) + { + if (this == o) + { + return true; + } + if (o == null || getClass() != o.getClass()) + { + return false; + } + RepairEntry that = (RepairEntry) o; + return myStartedAt == that.myStartedAt + && myFinishedAt == that.myFinishedAt + && Objects.equals(myRange, that.myRange) + && Objects.equals(myParticipants, that.myParticipants) + && myStatus == that.myStatus; + } + + /** + * Hash representation. + * + * @return int + */ + @Override + public int hashCode() + { + return Objects.hash(myRange, myStartedAt, myFinishedAt, myParticipants, myStatus); + } +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairHistory.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairHistory.java new file mode 100644 index 00000000..8e90ac07 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairHistory.java @@ -0,0 +1,81 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairStatus; +import java.util.Set; +import java.util.UUID; + +public interface RepairHistory +{ + RepairHistory NO_OP = new NoOpRepairHistory(); + + RepairSession newSession( + Node node, + TableReference tableReference, + UUID jobId, + LongTokenRange range, + Set participants); + + interface RepairSession + { + void start(); + + void finish(RepairStatus repairStatus); + } + + class NoOpRepairHistory implements RepairHistory + { + private static final RepairSession NO_OP = new NoOpRepairSession(); + + /** + * New session. + */ + @Override + public RepairSession newSession( + final Node node, + final TableReference tableReference, + final UUID jobId, + final LongTokenRange range, + final Set participants) + { + return NO_OP; + } + } + + class NoOpRepairSession implements RepairSession + { + /** + * Start. + */ + @Override + public void start() + { + // Do nothing + } + + /** + * End. + */ + @Override + public void finish(final RepairStatus repairStatus) + { + // Do nothing + } + } +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairHistoryProvider.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairHistoryProvider.java new file mode 100644 index 00000000..a55c18c6 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairHistoryProvider.java @@ -0,0 +1,50 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import com.datastax.oss.driver.api.core.metadata.Node; +import java.util.Iterator; +import com.google.common.base.Predicate; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; + +/** + * Interface used to retrieve repair history. + */ +public interface RepairHistoryProvider +{ + /** + * Iterate the repair history for the provided table starting from the {@code from} and going backwards. + * The predicate is used to decide which repair entries should be filtered out of the result. + * + * @param tableReference The table for which the history should be iterated. + * @param to The latest point in time to iterate to. + * @param predicate The predicate used to filter out entries in the iterator results. + * @return A filtered iterator for the repair history of the table. + */ + Iterator iterate(Node node, TableReference tableReference, long to, Predicate predicate); + + /** + * Iterate the repair history for the provided table starting from the {@code from} and going backwards until + * {@code to}. The predicate is used to decide which repair entries should be filtered out of the result. + * + * @param tableReference The table for which the history should be iterated. + * @param to The last point in time to iterate to. + * @param from The point in time to start iterating from. + * @param predicate The predicate used to filter out entries in the iterator results. + * @return A filtered iterator for the repair history of the table. + */ + Iterator iterate(Node node, TableReference tableReference, long to, long from, Predicate predicate); +} + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairState.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairState.java new file mode 100644 index 00000000..2c410237 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairState.java @@ -0,0 +1,35 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +/** + * Interface used by TableRepairJob to update and get a snapshot of the current repair state of a table. + * + * @see RepairStateSnapshot + */ +public interface RepairState +{ + /** + * Update the repair state for the table in the specified node. + */ + void update(); + + /** + * Get an immutable copy of the current repair state. + * + * @return The immutable copy. + */ + RepairStateSnapshot getSnapshot(); +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairStateFactory.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairStateFactory.java new file mode 100644 index 00000000..312f305a --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairStateFactory.java @@ -0,0 +1,28 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; + +public interface RepairStateFactory +{ + RepairState create( + Node node, + TableReference tableReference, + RepairConfiguration repairConfiguration, + PostUpdateHook postUpdateHook); +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairedAt.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairedAt.java new file mode 100644 index 00000000..8af2c2ff --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairedAt.java @@ -0,0 +1,117 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import java.util.Collection; + +/** + * Utility class to determine collective repaired at information for {@link VnodeRepairStates}. + * + * A value of {@link Long#MAX_VALUE} indicates that no repair information is available. + * A value of {@link VnodeRepairState#UNREPAIRED} indicates that the status is unknown. + */ +public final class RepairedAt +{ + private final long myMinRepairedAt; + private final long myMaxRepairedAt; + + private RepairedAt(final long minRepairedAt, final long maxRepairedAt) + { + myMinRepairedAt = minRepairedAt; + myMaxRepairedAt = maxRepairedAt; + } + + /** + * Check if all vnodes have repaired at information. + * + * @return True if all vnodes have repaired at information. + */ + public boolean isRepaired() + { + return myMinRepairedAt != Long.MAX_VALUE && myMinRepairedAt != VnodeRepairState.UNREPAIRED; + } + + /** + * Check if only some vnodes have repaired at information. + * + * @return True if some vnodes have been repaired but not all. + */ + public boolean isPartiallyRepaired() + { + return myMinRepairedAt == VnodeRepairState.UNREPAIRED && myMaxRepairedAt != myMinRepairedAt; + } + + /** + * Get the highest repaired at for the vnodes. + * + * @return The highest repaired at. + */ + public long getMaxRepairedAt() + { + return myMaxRepairedAt; + } + + /** + * Get the lowest repaired at for the vnodes. + * + * @return The lowest repaired at. + */ + public long getMinRepairedAt() + { + return myMinRepairedAt; + } + + @Override + public String toString() + { + return String.format("(min=%d,max=%d,isRepaired=%b,isPartiallyRepaired=%b)", + getMinRepairedAt(), getMaxRepairedAt(), isRepaired(), isPartiallyRepaired()); + } + + /** + * Generate a repaired at. + * + * @param vnodeRepairStates Vnode repair states. + * @return RepairedAt + */ + public static RepairedAt generate(final VnodeRepairStates vnodeRepairStates) + { + return RepairedAt.generate(vnodeRepairStates.getVnodeRepairStates()); + } + + public static RepairedAt generate(final Collection vnodeRepairStates) + { + long minRepairedAt = Long.MAX_VALUE; + long maxRepairedAt = Long.MIN_VALUE; + + for (VnodeRepairState vnodeRepairState : vnodeRepairStates) + { + long repairedAt = vnodeRepairState.lastRepairedAt(); + + if (repairedAt > maxRepairedAt) + { + maxRepairedAt = repairedAt; + } + + if (repairedAt < minRepairedAt) + { + minRepairedAt = repairedAt; + } + } + + return new RepairedAt(minRepairedAt, maxRepairedAt); + } +} + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/ReplicaRepairGroupFactory.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/ReplicaRepairGroupFactory.java new file mode 100644 index 00000000..4612eff0 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/ReplicaRepairGroupFactory.java @@ -0,0 +1,38 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import java.util.List; + +/** + * A factory interface that creates {@link ReplicaRepairGroup ReplicaRepairGroups} based on the currently repairable + * vnodes. + */ +public interface ReplicaRepairGroupFactory +{ + /** + * Generate a sorted list of {@link ReplicaRepairGroup} based on the provided {@link VnodeRepairState}. + * + * It is assumed that all vnodes passed to this method should be repaired (now). + * + * Which vnodes/replicas are included in the {@link ReplicaRepairGroup} is up to the specific implementation but + * the list should be sorted with the most urgent {@link ReplicaRepairGroup} first. + * + * @param availableVnodeRepairStates The currently repairable vnodes. + * @return The repair groups based on the provided vnodes. + */ + List generateReplicaRepairGroups(List availableVnodeRepairStates); +} + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/TokenSubRangeUtil.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/TokenSubRangeUtil.java new file mode 100644 index 00000000..526be86f --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/TokenSubRangeUtil.java @@ -0,0 +1,125 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.InternalException; +import com.google.common.collect.Lists; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.List; + +/** + * Utility class to split a token range into smaller sub-ranges. + */ +public class TokenSubRangeUtil +{ + private static final Logger LOG = LoggerFactory.getLogger(TokenSubRangeUtil.class); + + private final LongTokenRange tokenRange; + private final BigInteger tokenStart; + private final BigInteger totalRangeSize; + + public TokenSubRangeUtil(final LongTokenRange aTokenRange) + { + this.tokenRange = aTokenRange; + this.tokenStart = BigInteger.valueOf(aTokenRange.start); + this.totalRangeSize = this.tokenRange.rangeSize(); + } + + /** + * Generates a number of sub ranges of mostly equal size. + * The last sub range can be slightly smaller than the others + * due to rounding. + * + * @param tokenPerSubRange The number of wanted tokens per subrange + * @return The sub ranges containing the full range. + */ + public List generateSubRanges(final BigInteger tokenPerSubRange) + { + if (totalRangeSize.compareTo(tokenPerSubRange) <= 0) + { + return Lists.newArrayList(tokenRange); // Full range is smaller than wanted tokens + } + + long actualSubRangeCount = totalRangeSize.divide(tokenPerSubRange).longValueExact(); + if (totalRangeSize.remainder(tokenPerSubRange).compareTo(BigInteger.ZERO) > 0) + { + actualSubRangeCount++; + } + + List subRanges = new ArrayList<>(); + for (long l = 0; l < actualSubRangeCount - 1; l++) + { + subRanges.add(newSubRange(tokenPerSubRange, l)); + } + + LongTokenRange lastRange = subRanges.get(subRanges.size() - 1); + subRanges.add(new LongTokenRange(lastRange.end, tokenRange.end)); + + // Verify sub range size match full range size + validateSubRangeSize(subRanges); + + return subRanges; + } + + private void validateSubRangeSize(final List subRanges) + { + BigInteger subRangeSize = BigInteger.ZERO; + + for (LongTokenRange range : subRanges) + { + subRangeSize = subRangeSize.add(range.rangeSize()); + } + + if (subRangeSize.compareTo(totalRangeSize) != 0) + { + BigInteger difference = totalRangeSize.subtract(subRangeSize).abs(); + String msg = String.format( + "Unexpected sub-range generation for %s. Difference: %s. Sub-ranges generated: %s", + tokenRange, + difference, + subRanges); + + LOG.error(msg); + throw new InternalException(msg); + } + } + + private LongTokenRange newSubRange(final BigInteger rangeSize, final long rangeId) + { + BigInteger rangeOffset = rangeSize.multiply(BigInteger.valueOf(rangeId)); + BigInteger rangeStartTmp = tokenStart.add(rangeOffset); + BigInteger rangeEndTmp = rangeStartTmp.add(rangeSize); + + long rangeStart = enforceValidBounds(rangeStartTmp); + long rangeEnd = enforceValidBounds(rangeEndTmp); + + return new LongTokenRange(rangeStart, rangeEnd); + } + + private long enforceValidBounds(final BigInteger tokenValue) + { + if (tokenValue.compareTo(LongTokenRange.RANGE_END) > 0) + { + return tokenValue.subtract(LongTokenRange.FULL_RANGE).longValueExact(); + } + + return tokenValue.longValueExact(); + } +} + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairStateFactory.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairStateFactory.java new file mode 100644 index 00000000..b52f072c --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairStateFactory.java @@ -0,0 +1,50 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; + +/** + * A factory to create {@link VnodeRepairStates} for a specific table. + */ +public interface VnodeRepairStateFactory +{ + /** + * Calculate the current repair state based on the previous. + * + * If the previous repair state is unknown it should be calculated from start. + * + * @param tableReference The table to calculate the new repair state for vnodes. + * @param previous The previous repair state or null if non exists. + * @param iterateToTime The time to iterate repair entries to. + * @return The calculated repair state. + */ + VnodeRepairStates calculateNewState( + Node node, + TableReference tableReference, RepairStateSnapshot previous, + long iterateToTime); + + /** + * Calculate the repair state for a time window. + * + * @param tableReference The table to calculate the repair state for vnodes. + * @param to Timestamp from when the repair state should start + * @param from Timestamp to when the repair state should stop + * @return The repair state during the specified time window. + */ + VnodeRepairStates calculateClusterWideState(Node node, TableReference tableReference, long to, long from); +} + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/TableStorageStates.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/TableStorageStates.java new file mode 100644 index 00000000..ed8d2bea --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/TableStorageStates.java @@ -0,0 +1,38 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.table; + +import java.util.UUID; + +/** + * Interface for retrieving storage usage for all tables this nodes should repair. + */ +public interface TableStorageStates +{ + /** + * Get the data size of the provided table on the local node. + * + * @param tableReference The table to get the data size of. + * @return The data size of the provided table on this node. + */ + long getDataSize(UUID nodeID, TableReference tableReference); + + /** + * Get the total data size of all tables on the local node. + * + * @return The data size of all tables on this node. + */ + long getDataSize(UUID nodeID); +} diff --git a/core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestRepairEntry.java b/core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestRepairEntry.java new file mode 100644 index 00000000..763bf1bf --- /dev/null +++ b/core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestRepairEntry.java @@ -0,0 +1,114 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairStatus; +import com.google.common.collect.Sets; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +@RunWith(MockitoJUnitRunner.class) +public class TestRepairEntry +{ + @Mock + private DriverNode mockNode; + + @Test + public void testGetters() + { + LongTokenRange expectedLongTokenRange = new LongTokenRange(0, 1); + long expectedStartedAt = 5; + long expectedFinishedAt = expectedStartedAt + 5; + Set expectedParticipants = Sets.newHashSet(mockNode); + RepairStatus expectedStatus = RepairStatus.SUCCESS; + + RepairEntry repairEntry = new RepairEntry(expectedLongTokenRange, expectedStartedAt, expectedFinishedAt, expectedParticipants, expectedStatus.toString()); + + assertThat(repairEntry.getRange()).isEqualTo(expectedLongTokenRange); + assertThat(repairEntry.getStartedAt()).isEqualTo(expectedStartedAt); + assertThat(repairEntry.getFinishedAt()).isEqualTo(expectedFinishedAt); + assertThat(repairEntry.getParticipants()).isEqualTo(expectedParticipants); + assertThat(repairEntry.getStatus()).isEqualTo(expectedStatus); + } + + @Test + public void testRepairEntriesAreEqual() + { + RepairEntry repairEntry = new RepairEntry(new LongTokenRange(0, 1), 5, 5, Sets.newHashSet(mockNode), "SUCCESS"); + RepairEntry repairEntry2 = new RepairEntry(new LongTokenRange(0, 1), 5, 5, Sets.newHashSet(mockNode), "SUCCESS"); + + assertThat(repairEntry).isEqualTo(repairEntry2); + assertThat(repairEntry.hashCode()).isEqualTo(repairEntry2.hashCode()); + } + + @Test + public void testRepairEntriesWithDifferentRangeAreNotEqual() + { + RepairEntry repairEntry = new RepairEntry(new LongTokenRange(0, 1), 5, 5, Sets.newHashSet(mockNode), "SUCCESS"); + RepairEntry repairEntry2 = new RepairEntry(new LongTokenRange(1, 2), 5, 5, Sets.newHashSet(mockNode), "SUCCESS"); + + assertThat(repairEntry).isNotEqualTo(repairEntry2); + } + + @Test + public void testRepairEntriesWithDifferentStartedAtAreNotEqual() + { + RepairEntry repairEntry = new RepairEntry(new LongTokenRange(0, 1), 5, 7, Sets.newHashSet(mockNode), "SUCCESS"); + RepairEntry repairEntry2 = new RepairEntry(new LongTokenRange(0, 1), 6, 7, Sets.newHashSet(mockNode), "SUCCESS"); + + assertThat(repairEntry).isNotEqualTo(repairEntry2); + } + + @Test + public void testRepairEntriesWithDifferentFinishedAtAreNotEqual() + { + RepairEntry repairEntry = new RepairEntry(new LongTokenRange(0, 1), 5, 6, Sets.newHashSet(mockNode), "SUCCESS"); + RepairEntry repairEntry2 = new RepairEntry(new LongTokenRange(0, 1), 5, 7, Sets.newHashSet(mockNode), "SUCCESS"); + + assertThat(repairEntry).isNotEqualTo(repairEntry2); + } + + @Test + public void testRepairEntriesWithDifferentParticipantsAreNotEqual() + { + RepairEntry repairEntry = new RepairEntry(new LongTokenRange(0, 1), 5, 5, Sets.newHashSet(mockNode), "SUCCESS"); + RepairEntry repairEntry2 = new RepairEntry(new LongTokenRange(0, 1), 5, 5, Sets.newHashSet(), "SUCCESS"); + + assertThat(repairEntry).isNotEqualTo(repairEntry2); + } + + @Test + public void testRepairEntriesWithDifferentStatusAreNotEqual() + { + RepairEntry repairEntry = new RepairEntry(new LongTokenRange(0, 1), 5, 5, Sets.newHashSet(mockNode), "SUCCESS"); + RepairEntry repairEntry2 = new RepairEntry(new LongTokenRange(0, 1), 5, 5, Sets.newHashSet(mockNode), "FAILED"); + + assertThat(repairEntry).isNotEqualTo(repairEntry2); + } + + @Test + public void testEqualsContract() + { + EqualsVerifier.forClass(RepairEntry.class).usingGetClass().verify(); + } +} diff --git a/data/pom.xml b/data/pom.xml index 135d7d9b..015d6dbd 100644 --- a/data/pom.xml +++ b/data/pom.xml @@ -35,6 +35,12 @@ ${project.version} + + com.ericsson.bss.cassandra.ecchronos + core + ${project.version} + + com.ericsson.bss.cassandra.ecchronos utils diff --git a/data/src/main/java/com/ericsson/bss/cassandra/ecchronos/data/repairhistory/RepairHistoryService.java b/data/src/main/java/com/ericsson/bss/cassandra/ecchronos/data/repairhistory/RepairHistoryService.java index f91f33a3..55ed7494 100644 --- a/data/src/main/java/com/ericsson/bss/cassandra/ecchronos/data/repairhistory/RepairHistoryService.java +++ b/data/src/main/java/com/ericsson/bss/cassandra/ecchronos/data/repairhistory/RepairHistoryService.java @@ -16,19 +16,46 @@ import com.datastax.oss.driver.api.core.ConsistencyLevel; import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.cql.AsyncResultSet; import com.datastax.oss.driver.api.core.cql.BoundStatement; import com.datastax.oss.driver.api.core.cql.PreparedStatement; import com.datastax.oss.driver.api.core.cql.ResultSet; import com.datastax.oss.driver.api.core.cql.Row; +import com.datastax.oss.driver.api.core.cql.Statement; +import com.datastax.oss.driver.api.core.metadata.Node; +import com.datastax.oss.driver.api.core.uuid.Uuids; import com.datastax.oss.driver.api.querybuilder.QueryBuilder; + +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.NodeResolver; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairEntry; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairHistory; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairHistoryProvider; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.history.SessionState; import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairStatus; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Function; import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; +import com.google.common.collect.AbstractIterator; import java.time.Instant; import java.time.LocalDate; import java.time.ZoneId; +import java.util.HashMap; +import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionStage; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import java.util.stream.StreamSupport; import org.slf4j.Logger; @@ -37,7 +64,7 @@ import static com.datastax.oss.driver.api.querybuilder.QueryBuilder.bindMarker; import static com.datastax.oss.driver.api.querybuilder.QueryBuilder.selectFrom; -public final class RepairHistoryService +public final class RepairHistoryService implements RepairHistory, RepairHistoryProvider { private static final Logger LOG = LoggerFactory.getLogger(RepairHistoryService.class); @@ -60,10 +87,22 @@ public final class RepairHistoryService private final PreparedStatement myCreateStatement; private final PreparedStatement myUpdateStatement; private final PreparedStatement mySelectStatement; - private final CqlSession myCqlSession; + private final PreparedStatement myIterateStatement; - public RepairHistoryService(final CqlSession cqlSession) + private final CqlSession myCqlSession; + private final ReplicationState myReplicationState; + private final NodeResolver myNodeResolver; + private final long myLookbackTimeInMs; + + public RepairHistoryService( + final CqlSession cqlSession, + final ReplicationState replicationState, + final NodeResolver nodeResolver, + final long lookbackTimeInMs) { + myLookbackTimeInMs = lookbackTimeInMs; + myReplicationState = replicationState; + myNodeResolver = nodeResolver; myCqlSession = Preconditions.checkNotNull(cqlSession, "CqlSession cannot be null"); myCreateStatement = myCqlSession .prepare(QueryBuilder.insertInto(KEYSPACE_NAME, TABLE_NAME) @@ -80,6 +119,7 @@ public RepairHistoryService(final CqlSession cqlSession) .value(COLUMN_FINISHED_AT, bindMarker()) .build() .setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM)); + myUpdateStatement = myCqlSession .prepare(QueryBuilder.update(KEYSPACE_NAME, TABLE_NAME) .setColumn(COLUMN_JOB_ID, bindMarker()) @@ -109,6 +149,13 @@ public RepairHistoryService(final CqlSession cqlSession) .isEqualTo(bindMarker()) .build() .setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM)); + myIterateStatement = myCqlSession.prepare(QueryBuilder.selectFrom(KEYSPACE_NAME, TABLE_NAME) + .columns(COLUMN_STARTED_AT, COLUMN_FINISHED_AT, COLUMN_STATUS, COLUMN_RANGE_BEGIN, COLUMN_RANGE_END) + .whereColumn(COLUMN_TABLE_ID).isEqualTo(bindMarker()) + .whereColumn(COLUMN_NODE_ID).isEqualTo(bindMarker()) + .whereColumn(COLUMN_REPAIR_ID).isGreaterThanOrEqualTo(bindMarker()) + .whereColumn(COLUMN_REPAIR_ID).isLessThanOrEqualTo(bindMarker()).build() + .setConsistencyLevel(ConsistencyLevel.LOCAL_ONE)); } /** @@ -267,6 +314,49 @@ public ResultSet insertRepairHistoryInfo(final RepairHistoryData repairHistoryDa return tmpResultSet; } + @Override + public Iterator iterate(final Node node, + final TableReference tableReference, + final long to, + final Predicate predicate) + { + long from = System.currentTimeMillis() - myLookbackTimeInMs; + return iterate(node, tableReference, to, from, predicate); + } + + @Override + public Iterator iterate( + final Node node, + final TableReference tableReference, + final long to, + final long from, + final Predicate predicate + ) + { + UUID start = Uuids.startOf(from); + UUID finish = Uuids.endOf(to); + + Statement statement = myIterateStatement.bind(tableReference.getId(), node.getHostId(), start, finish); + ResultSet resultSet = myCqlSession.execute(statement); + + return new RepairEntryIterator(node, tableReference, resultSet, predicate); + } + + @Override + public RepairSession newSession( + final Node node, + final TableReference tableReference, + final UUID jobId, + final LongTokenRange range, + final Set participants) + { + DriverNode driverNode = myNodeResolver.fromUUID(node.getHostId()).orElseThrow(IllegalStateException::new); + Preconditions.checkArgument(participants.contains(driverNode), + "Current node must be part of repair"); + + return new RepairSessionImpl(tableReference.getId(), driverNode.getId(), jobId, range, participants); + } + public ResultSet updateRepairHistoryInfo(final RepairHistoryData repairHistoryData) { BoundStatement updateRepairHistoryInfo = myUpdateStatement.bind(repairHistoryData.getJobId(), @@ -324,4 +414,220 @@ private RepairHistoryData convertRowToRepairHistoryData(final Row row, final lon .withLookBackTimeInMilliseconds(lookBackTimeInMs) .build(); } + + public final class RepairEntryIterator extends AbstractIterator + { + private final TableReference tableReference; + private final Iterator rowIterator; + private final Predicate predicate; + private final Node myNode; + + RepairEntryIterator( + final Node node, + final TableReference aTableReference, + final ResultSet aResultSet, + final Predicate aPredicate) + { + myNode = node; + this.tableReference = aTableReference; + this.rowIterator = aResultSet.iterator(); + this.predicate = aPredicate; + } + + @Override + protected RepairEntry computeNext() + { + while (rowIterator.hasNext()) + { + Row row = rowIterator.next(); + + if (validateFields(row)) + { + RepairEntry repairEntry = buildFrom(row); + if (repairEntry != null && predicate.apply(repairEntry)) + { + return repairEntry; + } + } + } + + return endOfData(); + } + + private RepairEntry buildFrom(final Row row) + { + long rangeBegin = Long.parseLong(row.getString(COLUMN_RANGE_BEGIN)); + long rangeEnd = Long.parseLong(row.getString(COLUMN_RANGE_END)); + + LongTokenRange tokenRange = new LongTokenRange(rangeBegin, rangeEnd); + long startedAt = row.getInstant(COLUMN_STARTED_AT).toEpochMilli(); + Instant finished = row.getInstant(COLUMN_FINISHED_AT); + long finishedAt = -1L; + if (finished != null) + { + finishedAt = finished.toEpochMilli(); + } + Set nodes = myReplicationState.getNodesClusterWide(tableReference, tokenRange, myNode); + + if (nodes == null) + { + LOG.debug("Token range {} was not found in metadata", tokenRange); + return null; + } + String status = row.getString(COLUMN_STATUS); + + return new RepairEntry(tokenRange, startedAt, finishedAt, nodes, status); + } + + private boolean validateFields(final Row row) + { + return !row.isNull(COLUMN_RANGE_BEGIN) + && !row.isNull(COLUMN_RANGE_END) + && !row.isNull(COLUMN_STARTED_AT) + && !row.isNull(COLUMN_STATUS); + } + } + + class RepairSessionImpl implements RepairSession + { + private final UUID myTableID; + private final UUID myNodeID; + private final UUID myJobID; + private final LongTokenRange myRange; + private final Set myParticipants; + private final AtomicReference mySessionState = new AtomicReference<>(SessionState.NO_STATE); + private final AtomicReference myRepairID = new AtomicReference<>(null); + private final AtomicReference myStartedAt = new AtomicReference<>(null); + + RepairSessionImpl(final UUID tableID, + final UUID nodeID, + final UUID jobID, + final LongTokenRange range, + final Set participants) + { + myTableID = tableID; + myNodeID = nodeID; + myJobID = jobID; + myRange = range; + myParticipants = participants.stream() + .map(DriverNode::getId) + .collect(Collectors.toSet()); + } + + @VisibleForTesting + UUID getId() + { + return myRepairID.get(); + } + + @Override + public void start() + { + transitionTo(SessionState.STARTED); + myStartedAt.compareAndSet(null, Instant.now()); + } + + /** + * Transition to state DONE, as long as the previous status was STARTED. Set finished at to current timestamp. + * + * @param repairStatus The repair status + */ + @Override + public void finish(final RepairStatus repairStatus) + { + Preconditions.checkArgument(!RepairStatus.STARTED.equals(repairStatus), + "Repair status must change from started"); + transitionTo(SessionState.DONE); + String rangeBegin = Long.toString(myRange.start); + String rangeEnd = Long.toString(myRange.end); + Instant finishedAt = Instant.now(); + myRepairID.compareAndSet(null, Uuids.timeBased()); + insertWithRetry(participant -> insertFinish(rangeBegin, rangeEnd, repairStatus, finishedAt, participant)); + } + + private void insertWithRetry(final Function> insertFunction) + { + Map futures = new HashMap<>(); + + for (UUID participant : myParticipants) + { + CompletableFuture future = insertFunction.apply(participant).toCompletableFuture(); + futures.put(participant, future); + } + + + boolean loggedException = false; + + for (Map.Entry entry : futures.entrySet()) + { + CompletableFuture future = entry.getValue(); + + try + { + future.get(2, TimeUnit.SECONDS); + } + catch (InterruptedException e) + { + Thread.currentThread().interrupt(); + } + catch (ExecutionException | TimeoutException e) + { + UUID participant = entry.getKey(); + LOG.warn("Unable to update repair history for {} - {}, retrying", participant, this); + if (!loggedException) + { + LOG.warn("", e); + loggedException = true; + } + insertFunction.apply(participant); + } + } + } + + private CompletionStage insertFinish(final String rangeBegin, + final String rangeEnd, + final RepairStatus repairStatus, + final Instant finishedAt, + final UUID participant) + { + BoundStatement statement = myCreateStatement.bind( + myTableID, + myNodeID, + myRepairID.get(), + myJobID, + participant, + rangeBegin, + rangeEnd, + null, + repairStatus.toString(), + myStartedAt.get(), + finishedAt); + + return myCqlSession.executeAsync(statement); + } + + /** + * Return a string representation. + * + * @return String + */ + @Override + public String toString() + { + return String.format("table_id=%s,repair_id=%s,job_id=%s,range=%s,participants=%s", myTableID, myRepairID.get(), + myJobID, myRange, myParticipants); + } + + private void transitionTo(final SessionState newState) + { + SessionState currentState = mySessionState.get(); + Preconditions.checkState(currentState.canTransition(newState), + "Cannot transition from " + currentState + " to " + newState); + + if (!mySessionState.compareAndSet(currentState, newState)) + { + throw new IllegalStateException("Cannot transition from " + mySessionState.get() + " to " + newState); + } + } + } } diff --git a/data/src/test/java/com/ericsson/bss/cassandra/ecchronos/data/repairhistory/TestRepairHistoryService.java b/data/src/test/java/com/ericsson/bss/cassandra/ecchronos/data/repairhistory/TestRepairHistoryService.java index 7edfce6c..c416777b 100644 --- a/data/src/test/java/com/ericsson/bss/cassandra/ecchronos/data/repairhistory/TestRepairHistoryService.java +++ b/data/src/test/java/com/ericsson/bss/cassandra/ecchronos/data/repairhistory/TestRepairHistoryService.java @@ -18,6 +18,8 @@ import com.datastax.oss.driver.api.core.cql.Row; import com.datastax.oss.driver.api.core.cql.SimpleStatement; import com.datastax.oss.driver.api.core.uuid.Uuids; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.NodeResolver; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; import com.ericsson.bss.cassandra.ecchronos.data.utils.AbstractCassandraTest; import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairStatus; import java.io.IOException; @@ -28,6 +30,7 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.mockito.Mock; import static com.ericsson.bss.cassandra.ecchronos.data.repairhistory.RepairHistoryData.Builder; import static com.ericsson.bss.cassandra.ecchronos.data.repairhistory.RepairHistoryData.copyOf; @@ -37,7 +40,6 @@ public class TestRepairHistoryService extends AbstractCassandraTest { - private static final String ECCHRONOS_KEYSPACE = "ecchronos"; private static final String COLUMN_NODE_ID = "node_id"; private static final String COLUMN_TABLE_ID = "table_id"; @@ -47,6 +49,12 @@ public class TestRepairHistoryService extends AbstractCassandraTest private RepairHistoryService myRepairHistoryService; + @Mock + NodeResolver mockNodeResolver; + + @Mock + ReplicationState mockReplicationState; + @Before public void setup() throws IOException { @@ -70,7 +78,11 @@ public void setup() throws IOException "WITH CLUSTERING ORDER BY (repair_id DESC);", ECCHRONOS_KEYSPACE); AbstractCassandraTest.mySession.execute(query); - myRepairHistoryService = new RepairHistoryService(AbstractCassandraTest.mySession); + myRepairHistoryService = new RepairHistoryService( + AbstractCassandraTest.mySession, + mockReplicationState, + mockNodeResolver, + 1L); } @After diff --git a/fault.manager.impl/pom.xml b/fault.manager.impl/pom.xml new file mode 100644 index 00000000..a9548eef --- /dev/null +++ b/fault.manager.impl/pom.xml @@ -0,0 +1,73 @@ + + + + 4.0.0 + + com.ericsson.bss.cassandra.ecchronos + agent + 1.0.0-SNAPSHOT + + + fault.manager.impl + + + + + com.ericsson.bss.cassandra.ecchronos + fault.manager + ${project.version} + + + + + org.slf4j + slf4j-api + + + + junit + junit + test + + + org.assertj + assertj-core + test + + + + + + + org.apache.felix + maven-bundle-plugin + + true + META-INF + + * + com.ericsson.bss.cassandra.ecchronos.fm.impl.* + + + + + + + \ No newline at end of file diff --git a/fault.manager.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/impl/LoggingFaultReporter.java b/fault.manager.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/impl/LoggingFaultReporter.java new file mode 100644 index 00000000..25e4d36c --- /dev/null +++ b/fault.manager.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/impl/LoggingFaultReporter.java @@ -0,0 +1,55 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.fm.impl; + +import com.ericsson.bss.cassandra.ecchronos.fm.RepairFaultReporter; +import java.util.HashMap; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class LoggingFaultReporter implements RepairFaultReporter +{ + private static final Logger LOG = LoggerFactory.getLogger(LoggingFaultReporter.class); + private final Map alarms = new HashMap<>(); + + public final Map getAlarms() + { + return alarms; + } + + @Override + public final void raise(final FaultCode faultCode, final Map data) + { + FaultCode oldCode = alarms.put(data.hashCode(), faultCode); + if (oldCode == null || (oldCode == FaultCode.REPAIR_WARNING && faultCode == FaultCode.REPAIR_ERROR)) + { + LOG.error("Raising alarm: {} - {}", faultCode, data); + } + } + + @Override + public final void cease(final FaultCode faultCode, final Map data) + { + FaultCode code = alarms.get(data.hashCode()); + if (code != null) + { + LOG.info("Ceasing alarm: {} - {}", code, data); + alarms.remove(data.hashCode(), code); + } + } +} + diff --git a/fault.manager.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/impl/package-info.java b/fault.manager.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/impl/package-info.java new file mode 100644 index 00000000..4914c053 --- /dev/null +++ b/fault.manager.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/impl/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains the default implementation of the API for fault management in ecChronos. + */ +package com.ericsson.bss.cassandra.ecchronos.fm.impl; diff --git a/fault.manager.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/fm/impl/TestLoggingFaultReporter.java b/fault.manager.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/fm/impl/TestLoggingFaultReporter.java new file mode 100644 index 00000000..e1b367cb --- /dev/null +++ b/fault.manager.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/fm/impl/TestLoggingFaultReporter.java @@ -0,0 +1,93 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.fm.impl; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.ericsson.bss.cassandra.ecchronos.fm.RepairFaultReporter; +import org.junit.Test; +import java.util.HashMap; +import java.util.Map; + +public class TestLoggingFaultReporter +{ + LoggingFaultReporter loggingFaultReporter = new LoggingFaultReporter(); + + @Test + public void testAlarmIncreasingSeverity() + { + Map data = new HashMap<>(); + data.put(RepairFaultReporter.FAULT_KEYSPACE, "keyspace"); + data.put(RepairFaultReporter.FAULT_TABLE, "table"); + + loggingFaultReporter.raise(RepairFaultReporter.FaultCode.REPAIR_WARNING, data); + assertThat(loggingFaultReporter.getAlarms().size()).isEqualTo(1); + assertThat(loggingFaultReporter.getAlarms()).containsValue(RepairFaultReporter.FaultCode.REPAIR_WARNING); + + loggingFaultReporter.raise(RepairFaultReporter.FaultCode.REPAIR_ERROR, data); + assertThat(loggingFaultReporter.getAlarms().size()).isEqualTo(1); + assertThat(loggingFaultReporter.getAlarms()).containsValue(RepairFaultReporter.FaultCode.REPAIR_ERROR); + } + + @Test + public void testRaiseMultipleTimes() + { + Map data = new HashMap<>(); + data.put(RepairFaultReporter.FAULT_KEYSPACE, "keyspace"); + data.put(RepairFaultReporter.FAULT_TABLE, "table"); + + loggingFaultReporter.raise(RepairFaultReporter.FaultCode.REPAIR_WARNING, data); + loggingFaultReporter.raise(RepairFaultReporter.FaultCode.REPAIR_WARNING, data); + loggingFaultReporter.raise(RepairFaultReporter.FaultCode.REPAIR_WARNING, data); + loggingFaultReporter.raise(RepairFaultReporter.FaultCode.REPAIR_WARNING, data); + loggingFaultReporter.raise(RepairFaultReporter.FaultCode.REPAIR_WARNING, data); + assertThat(loggingFaultReporter.getAlarms().size()).isEqualTo(1); + assertThat(loggingFaultReporter.getAlarms()).containsValue(RepairFaultReporter.FaultCode.REPAIR_WARNING); + } + + @Test + public void testCeaseAlarms() + { + Map data = new HashMap<>(); + data.put(RepairFaultReporter.FAULT_KEYSPACE, "keyspace"); + data.put(RepairFaultReporter.FAULT_TABLE, "table"); + + loggingFaultReporter.raise(RepairFaultReporter.FaultCode.REPAIR_ERROR, data); + assertThat(loggingFaultReporter.getAlarms().size()).isEqualTo(1); + + loggingFaultReporter.cease(RepairFaultReporter.FaultCode.REPAIR_ERROR, data); + assertThat(loggingFaultReporter.getAlarms().size()).isEqualTo(0); + } + + @Test + public void testCeaseMultipleAlarms() + { + Map data = new HashMap<>(); + data.put(RepairFaultReporter.FAULT_KEYSPACE, "keyspace"); + data.put(RepairFaultReporter.FAULT_TABLE, "table"); + Map anotherData = new HashMap<>(); + anotherData.put(RepairFaultReporter.FAULT_KEYSPACE, "keyspace2"); + anotherData.put(RepairFaultReporter.FAULT_TABLE, "table2"); + + loggingFaultReporter.raise(RepairFaultReporter.FaultCode.REPAIR_WARNING, data); + assertThat(loggingFaultReporter.getAlarms().size()).isEqualTo(1); + + loggingFaultReporter.cease(RepairFaultReporter.FaultCode.REPAIR_WARNING, anotherData); + assertThat(loggingFaultReporter.getAlarms().size()).isEqualTo(1); + + loggingFaultReporter.cease(RepairFaultReporter.FaultCode.REPAIR_ERROR, data); + assertThat(loggingFaultReporter.getAlarms().size()).isEqualTo(0); + } +} diff --git a/fault.manager/pom.xml b/fault.manager/pom.xml new file mode 100644 index 00000000..31a34404 --- /dev/null +++ b/fault.manager/pom.xml @@ -0,0 +1,62 @@ + + + + 4.0.0 + + com.ericsson.bss.cassandra.ecchronos + agent + 1.0.0-SNAPSHOT + + + fault.manager + + + + org.apache.maven.plugins + maven-dependency-plugin + + + dependencies + generate-sources + + tree + + + compile + target/dependency-tree.txt + + + + + + + org.apache.felix + maven-bundle-plugin + + true + META-INF + + com.ericsson.bss.cassandra.ecchronos.fm.* + + + + + + \ No newline at end of file diff --git a/fault.manager/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/RepairFaultReporter.java b/fault.manager/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/RepairFaultReporter.java new file mode 100644 index 00000000..93093fb7 --- /dev/null +++ b/fault.manager/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/RepairFaultReporter.java @@ -0,0 +1,45 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.fm; + +import java.util.Map; + +public interface RepairFaultReporter +{ + String FAULT_KEYSPACE = "KEYSPACE"; + String FAULT_TABLE = "TABLE"; + + enum FaultCode + { + REPAIR_WARNING, + REPAIR_ERROR + } + + /** + * This method might be called multiple times with the same parameters, + * implementations of this method should control whether the alarm should be raised. + * @param faultCode The fault code + * @param data The data containing keyspace and table + */ + void raise(FaultCode faultCode, Map data); + + /** + * This method might be called multiple times with the same parameters, + * implementations of this method should control whether the alarm should be cleared. + * @param faultCode The fault code + * @param data The data containing keyspace and table + */ + void cease(FaultCode faultCode, Map data); +} diff --git a/fault.manager/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/package-info.java b/fault.manager/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/package-info.java new file mode 100644 index 00000000..14f44f7d --- /dev/null +++ b/fault.manager/src/main/java/com/ericsson/bss/cassandra/ecchronos/fm/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains the API for fault management in ecChronos. + */ +package com.ericsson.bss.cassandra.ecchronos.fm; diff --git a/pom.xml b/pom.xml index f7070566..4158a229 100644 --- a/pom.xml +++ b/pom.xml @@ -77,6 +77,8 @@ core core.impl utils + fault.manager + fault.manager.impl diff --git a/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/history/SessionState.java b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/history/SessionState.java new file mode 100644 index 00000000..b60a401c --- /dev/null +++ b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/history/SessionState.java @@ -0,0 +1,32 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.utils.enums.history; + +public enum SessionState +{ + DONE(null), STARTED(DONE), NO_STATE(STARTED); + + private final SessionState nextValid; + + SessionState(final SessionState theNextValid) + { + this.nextValid = theNextValid; + } + + public boolean canTransition(final SessionState nextState) + { + return nextState.equals(nextValid); + } +} diff --git a/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/history/package-info.java b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/history/package-info.java new file mode 100644 index 00000000..20e230b7 --- /dev/null +++ b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/history/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains the enums related with ecChronos repair history. + */ +package com.ericsson.bss.cassandra.ecchronos.utils.enums.history; diff --git a/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/exceptions/InternalException.java b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/exceptions/InternalException.java new file mode 100644 index 00000000..05838a18 --- /dev/null +++ b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/exceptions/InternalException.java @@ -0,0 +1,29 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.utils.exceptions; + +/** + * An exception indicating an internal issue has occured. + * This typically indicates an unexpected bug in the software. + */ +public class InternalException extends RuntimeException +{ + private static final long serialVersionUID = 8519513326549621415L; + + public InternalException(final String message) + { + super(message); + } +} From 865e1f64adfb2e8ba15e2b267b26e5d6043ea955 Mon Sep 17 00:00:00 2001 From: sajid riaz Date: Mon, 4 Nov 2024 15:31:41 +0100 Subject: [PATCH 2/8] Cassandra based distributed locking mechanism # 741 - Cassandra tables called lock and lock_priority, to manage task execution and synchronization across multiple nodes. --- .../connection/AgentConnectionConfig.java | 12 + .../config/connection/package-info.java | 1 + .../lockfactory/CasLockFactoryConfig.java | 62 ++++ .../config/lockfactory/LockFactoryConfig.java | 34 ++ .../config/lockfactory/package-info.java | 19 + .../AgentNativeConnectionProvider.java | 73 ++-- application/src/main/resources/ecc.yml | 41 +++ .../ecchronos/core/impl/locks/CASLock.java | 217 +++++++++++ .../core/impl/locks/CASLockFactory.java | 345 ++++++++++++++++++ .../impl/locks/CASLockFactoryBuilder.java | 137 +++++++ .../locks/CASLockFactoryCacheContext.java | 83 +++++ .../core/impl/locks/CASLockProperties.java | 83 +++++ .../core/impl/locks/CASLockStatement.java | 225 ++++++++++++ .../core/impl/locks/HostStatesImpl.java | 216 +++++++++++ .../ecchronos/core/impl/locks/LockCache.java | 136 +++++++ .../core/impl/locks/LockCollection.java | 58 +++ .../core/impl/locks/NodePriority.java | 43 +++ .../core/impl/locks/package-info.java | 18 + .../core/impl/utils/ConsistencyType.java | 22 ++ .../core/impl/utils/package-info.java | 18 + .../core/exceptions/LockException.java | 38 ++ .../core/exceptions/package-info.java | 19 + .../ecchronos/core/locks/DriverNode.java | 96 +++++ .../ecchronos/core/locks/HostStates.java | 50 +++ .../ecchronos/core/locks/LockFactory.java | 95 +++++ .../ecchronos/core/locks/package-info.java | 18 + 26 files changed, 2116 insertions(+), 43 deletions(-) create mode 100644 application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/CasLockFactoryConfig.java create mode 100644 application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/LockFactoryConfig.java create mode 100644 application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/package-info.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLock.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactoryBuilder.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactoryCacheContext.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockStatement.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/HostStatesImpl.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCache.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCollection.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/NodePriority.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/package-info.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/utils/ConsistencyType.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/utils/package-info.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/LockException.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/package-info.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/DriverNode.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/HostStates.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/LockFactory.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/package-info.java diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/AgentConnectionConfig.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/AgentConnectionConfig.java index b6e9f797..2234f289 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/AgentConnectionConfig.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/AgentConnectionConfig.java @@ -38,6 +38,7 @@ public final class AgentConnectionConfig private HostAware myHostAware = new HostAware(); private Class myDatacenterAwarePolicy = DataCenterAwarePolicy.class; private String myInstanceName; + private boolean myRemoteRouting = true; /** * Default constructor for AgentConnectionConfig. @@ -47,6 +48,17 @@ public AgentConnectionConfig() } + @JsonProperty("remoteRouting") + public boolean getRemoteRouting() + { + return myRemoteRouting; + } + + @JsonProperty("remoteRouting") + public void setRemoteRouting(final boolean remoteRouting) + { + myRemoteRouting = remoteRouting; + } /** * Gets unique ecchronos instance name. * diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/package-info.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/package-info.java index 6cc872e1..9a12b172 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/package-info.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/package-info.java @@ -16,3 +16,4 @@ * Contains configurations related to outbound connections (CQL and JMX). */ package com.ericsson.bss.cassandra.ecchronos.application.config.connection; + diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/CasLockFactoryConfig.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/CasLockFactoryConfig.java new file mode 100644 index 00000000..59ccc3fa --- /dev/null +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/CasLockFactoryConfig.java @@ -0,0 +1,62 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.application.config.lockfactory; + +import com.ericsson.bss.cassandra.ecchronos.core.impl.utils.ConsistencyType; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Locale; + +public class CasLockFactoryConfig +{ + private static final long DEFAULT_EXPIRY_TIME_IN_SECONDS = 30L; + private static final String DEFAULT_KEYSPACE_NAME = "ecchronos"; + private String myKeyspaceName = DEFAULT_KEYSPACE_NAME; + private long myExpiryTimeInSeconds = DEFAULT_EXPIRY_TIME_IN_SECONDS; + private ConsistencyType myConsistencySerial = ConsistencyType.DEFAULT; + + public final long getFailureCacheExpiryTimeInSeconds() + { + return myExpiryTimeInSeconds; + } + + @JsonProperty ("cache_expiry_time_in_seconds") + public final void setFailureCacheExpiryTimeInSeconds(final long expiryTimeInSeconds) + { + myExpiryTimeInSeconds = expiryTimeInSeconds; + } + + public final String getKeyspaceName() + { + return myKeyspaceName; + } + + @JsonProperty ("keyspace") + public final void setKeyspaceName(final String keyspaceName) + { + myKeyspaceName = keyspaceName; + } + + @JsonProperty ("consistencySerial") + public final ConsistencyType getConsistencySerial() + { + return myConsistencySerial; + } + + @JsonProperty ("consistencySerial") + public final void setConsistencySerial(final String consistencySerial) + { + myConsistencySerial = ConsistencyType.valueOf(consistencySerial.toUpperCase(Locale.US)); + } +} diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/LockFactoryConfig.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/LockFactoryConfig.java new file mode 100644 index 00000000..d0a431cd --- /dev/null +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/LockFactoryConfig.java @@ -0,0 +1,34 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.application.config.lockfactory; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class LockFactoryConfig +{ + private CasLockFactoryConfig myCasLockFactoryConfig = new CasLockFactoryConfig(); + + @JsonProperty("cas") + public final CasLockFactoryConfig getCasLockFactoryConfig() + { + return myCasLockFactoryConfig; + } + + @JsonProperty("cas") + public final void setCasLockFactoryConfig(final CasLockFactoryConfig casLockFactoryConfig) + { + myCasLockFactoryConfig = casLockFactoryConfig; + } +} diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/package-info.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/package-info.java new file mode 100644 index 00000000..762efdad --- /dev/null +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/package-info.java @@ -0,0 +1,19 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Contains configurations related to lock factory. + */ +package com.ericsson.bss.cassandra.ecchronos.application.config.lockfactory; diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java index f53fe1c8..af68f05b 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java @@ -49,6 +49,7 @@ public class AgentNativeConnectionProvider implements DistributedNativeConnectio private static final Logger LOG = LoggerFactory.getLogger(AgentNativeConnectionProvider.class); private final DistributedNativeConnectionProviderImpl myDistributedNativeConnectionProviderImpl; + private final boolean myRemoteRouting; /** * Constructs an {@code AgentNativeConnectionProvider} with the specified configuration, security supplier, and @@ -62,14 +63,15 @@ public class AgentNativeConnectionProvider implements DistributedNativeConnectio * the handler for managing SSL/TLS certificates. */ public AgentNativeConnectionProvider( - final Config config, - final Supplier cqlSecuritySupplier, - final CertificateHandler certificateHandler, - final DefaultRepairConfigurationProvider defaultRepairConfigurationProvider - ) + final Config config, + final Supplier cqlSecuritySupplier, + final CertificateHandler certificateHandler, + final DefaultRepairConfigurationProvider defaultRepairConfigurationProvider) { - AgentConnectionConfig agentConnectionConfig = config.getConnectionConfig().getCqlConnection() + AgentConnectionConfig agentConnectionConfig = config.getConnectionConfig() + .getCqlConnection() .getAgentConnectionConfig(); + myRemoteRouting = agentConnectionConfig.getRemoteRouting(); Security.CqlSecurity cqlSecurity = cqlSecuritySupplier.get(); boolean authEnabled = cqlSecurity.getCqlCredentials().isEnabled(); boolean tlsEnabled = cqlSecurity.getCqlTlsConfig().isEnabled(); @@ -112,25 +114,24 @@ public AgentNativeConnectionProvider( * @return the configured {@link DistributedNativeBuilder}. */ public final DistributedNativeBuilder resolveAgentProviderBuilder( - final DistributedNativeBuilder builder, - final AgentConnectionConfig agentConnectionConfig - ) + final DistributedNativeBuilder builder, + final AgentConnectionConfig agentConnectionConfig) { switch (agentConnectionConfig.getType()) { - case datacenterAware: - LOG.info("Using DatacenterAware as Agent Config"); - return builder.withDatacenterAware(resolveDatacenterAware( - agentConnectionConfig.getDatacenterAware())); - case rackAware: - LOG.info("Using RackAware as Agent Config"); - return builder.withRackAware(resolveRackAware( - agentConnectionConfig.getRackAware())); - case hostAware: - LOG.info("Using HostAware as Agent Config"); - return builder.withHostAware(resolveHostAware( - agentConnectionConfig.getHostAware())); - default: + case datacenterAware: + LOG.info("Using DatacenterAware as Agent Config"); + return builder.withDatacenterAware(resolveDatacenterAware( + agentConnectionConfig.getDatacenterAware())); + case rackAware: + LOG.info("Using RackAware as Agent Config"); + return builder.withRackAware(resolveRackAware( + agentConnectionConfig.getRackAware())); + case hostAware: + LOG.info("Using HostAware as Agent Config"); + return builder.withHostAware(resolveHostAware( + agentConnectionConfig.getHostAware())); + default: } return builder; } @@ -143,8 +144,7 @@ public final DistributedNativeBuilder resolveAgentProviderBuilder( * @return a list of {@link InetSocketAddress} representing the resolved contact points. */ public final List resolveInitialContactPoints( - final Map contactPoints - ) + final Map contactPoints) { List resolvedContactPoints = new ArrayList<>(); for (AgentConnectionConfig.Host host : contactPoints.values()) @@ -166,11 +166,7 @@ public final List resolveInitialContactPoints( public final List resolveDatacenterAware(final AgentConnectionConfig.DatacenterAware datacenterAware) { List datacenterNames = new ArrayList<>(); - for - ( - AgentConnectionConfig.DatacenterAware.Datacenter datacenter - : - datacenterAware.getDatacenters().values()) + for (AgentConnectionConfig.DatacenterAware.Datacenter datacenter : datacenterAware.getDatacenters().values()) { datacenterNames.add(datacenter.getName()); } @@ -187,12 +183,7 @@ public final List resolveDatacenterAware(final AgentConnectionConfig.Dat public final List> resolveRackAware(final AgentConnectionConfig.RackAware rackAware) { List> rackList = new ArrayList<>(); - for - ( - AgentConnectionConfig.RackAware.Rack rack - : - rackAware.getRacks().values() - ) + for (AgentConnectionConfig.RackAware.Rack rack : rackAware.getRacks().values()) { Map rackInfo = new HashMap<>(); rackInfo.put("datacenterName", rack.getDatacenterName()); @@ -212,12 +203,7 @@ public final List> resolveRackAware(final AgentConnectionCon public final List resolveHostAware(final AgentConnectionConfig.HostAware hostAware) { List resolvedHosts = new ArrayList<>(); - for - ( - AgentConnectionConfig.Host host - : - hostAware.getHosts().values() - ) + for (AgentConnectionConfig.Host host : hostAware.getHosts().values()) { InetSocketAddress tmpAddress = new InetSocketAddress(host.getHost(), host.getPort()); resolvedHosts.add(tmpAddress); @@ -238,8 +224,8 @@ public final List resolveHostAware(final AgentConnectionConfi * if the connection is in an illegal state. */ public final DistributedNativeConnectionProviderImpl tryEstablishConnection( - final DistributedNativeBuilder builder - ) throws AllNodesFailedException, IllegalStateException + final DistributedNativeBuilder builder) throws AllNodesFailedException, + IllegalStateException { try { @@ -285,6 +271,7 @@ public void close() throws IOException { myDistributedNativeConnectionProviderImpl.close(); } + /** * Add a nw node to the list of nodes. * @param myNode diff --git a/application/src/main/resources/ecc.yml b/application/src/main/resources/ecc.yml index 253f496a..ed88f3cb 100644 --- a/application/src/main/resources/ecc.yml +++ b/application/src/main/resources/ecc.yml @@ -98,6 +98,13 @@ connection: connectionDelay: time: 45 unit: MINUTES + ## + ## Allow routing requests directly to a remote datacenter. + ## This allows locks for other datacenters to be taken in that datacenter instead of via the local datacenter. + ## If clients are prevented from connecting directly to Cassandra nodes in other sites this is not possible. + ## If remote routing is disabled, instead SERIAL consistency will be used for those request. + ## + remoteRouting: true jmx: ## ## The class used to provide JMX connections to Apache Cassandra. @@ -165,6 +172,15 @@ repair: priority: granularity_unit: HOURS ## + ## Specifies the type of lock to use for repairs. + ## "vnode" will lock each node involved in a repair individually and increase the number of + ## parallel repairs that can run in a single data center. + ## "datacenter" will lock each data center involved in a repair and only allow a single repair per data center. + ## "datacenter_and_vnode" will combine both options and allow a smooth transition between them without allowing + ## multiple repairs to run concurrently on a single node. + ## + lock_type: vnode + ## ## Specifies the unwind ratio to smooth out the load that repairs generate. ## This value is a ratio between 0 -> 100% of the execution time of a repair session. ## @@ -261,3 +277,28 @@ rest_server: ## host: localhost port: 8080 + + lock_factory: + cas: + ## + ## The keyspace used for the CAS lock factory tables. + ## + keyspace: ecchronos + ## + ## The number of seconds until the lock failure cache expires. + ## If an attempt to secure a lock is unsuccessful, + ## all subsequent attempts will be failed until + ## the cache expiration time is reached. + ## + cache_expiry_time_in_seconds: 30 + ## + ## Allow to override consistency level for LWT (lightweight transactions). Possible values are: + ## "DEFAULT" - Use consistency level based on remoteRouting. + ## "SERIAL" - Use SERIAL consistency for LWT regardless of remoteRouting. + ## "LOCAL" - Use LOCAL_SERIAL consistency for LWT regardless of remoteRouting. + ## + ## if you use remoteRouting: false and LOCAL then all locks will be taken locally + ## in DC. I.e There's a risk that multiple nodes in different datacenters will be able to lock the + ## same nodes causing multiple repairs on the same range/node at the same time. + ## + consistencySerial: "DEFAULT" diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLock.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLock.java new file mode 100644 index 00000000..b76880ec --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLock.java @@ -0,0 +1,217 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; + +import static com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory.DistributedLock; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.datastax.oss.driver.api.core.cql.ResultSet; +import com.datastax.oss.driver.api.core.cql.Row; +import com.ericsson.bss.cassandra.ecchronos.core.exceptions.LockException; + +/** + * Represents a container for builder configurations and state for the CASLock. + * This class is used to decouple builder fields from CASLockFactory to avoid excessive field count. + */ +class CASLock implements DistributedLock, Runnable +{ + private static final Logger LOG = LoggerFactory.getLogger(CASLock.class); + + private final String myDataCenter; + private final String myResource; + private final int myPriority; + private final Map myMetadata; + + private final AtomicReference> myUpdateFuture = new AtomicReference<>(); + + private final AtomicInteger myFailedUpdateAttempts = new AtomicInteger(); + + private final int myLocallyHighestPriority; + private final int globalHighPriority; + + private final UUID myUuid; + + private final CASLockStatement myCasLockStatement; + + CASLock(final String dataCenter, + final String resource, + final int priority, + final Map metadata, + final UUID uuid, + final CASLockStatement casLockStatement) + { + myDataCenter = dataCenter; + myResource = resource; + myPriority = priority; + myMetadata = metadata; + myUuid = uuid; + myCasLockStatement = casLockStatement; + + List nodePriorities = computePriorities(); + + myLocallyHighestPriority = nodePriorities.stream() + .filter(n -> n.getUuid().equals(myUuid)) + .map(NodePriority::getPriority) + .findFirst() + .orElse(myPriority); + globalHighPriority = nodePriorities.stream() + .filter(n -> !n.getUuid().equals(myUuid)) + .map(NodePriority::getPriority) + .max(Integer::compare) + .orElse(myPriority); + } + + public boolean lock() + { + if (compete()) + { + LOG.trace("Trying to acquire lock for resource {}", myResource); + if (tryLock()) + { + ScheduledExecutorService executor = myCasLockStatement.getCasLockProperties().getExecutor(); + LOG.trace("Lock for resource {} acquired", myResource); + ScheduledFuture future = executor.scheduleAtFixedRate(this, + myCasLockStatement.getCasLockFactoryCacheContext().getLockUpdateTimeInSeconds(), + myCasLockStatement.getCasLockFactoryCacheContext().getLockUpdateTimeInSeconds(), TimeUnit.SECONDS); + myUpdateFuture.set(future); + + return true; + } + } + + return false; + } + + @Override + public void run() + { + try + { + updateLock(); + myFailedUpdateAttempts.set(0); + } + catch (LockException e) + { + int failedAttempts = myFailedUpdateAttempts.incrementAndGet(); + + if (failedAttempts >= myCasLockStatement.getCasLockFactoryCacheContext().getFailedLockRetryAttempts()) + { + LOG.error("Unable to re-lock resource '{}' after {} failed attempts", myResource, failedAttempts); + } + else + { + LOG.warn("Unable to re-lock resource '{}', {} failed attempts", myResource, failedAttempts, e); + } + } + } + + @Override + public void close() + { + ScheduledFuture future = myUpdateFuture.get(); + if (future != null) + { + future.cancel(true); + myCasLockStatement.execute( + myDataCenter, + myCasLockStatement.getRemoveLockStatement().bind(myResource, myUuid)); + + if (myLocallyHighestPriority <= myPriority) + { + myCasLockStatement.execute( + myDataCenter, + myCasLockStatement.getRemoveLockPriorityStatement().bind(myResource, myUuid)); + } + else + { + LOG.debug("Locally highest priority ({}) is higher than current ({}), will not remove", + myLocallyHighestPriority, + myPriority); + } + } + } + + private void updateLock() throws LockException + { + ResultSet resultSet = myCasLockStatement.execute(myDataCenter, + myCasLockStatement.getUpdateLockStatement().bind(myUuid, myMetadata, myResource, myUuid)); + + if (!resultSet.wasApplied()) + { + throw new LockException("CAS query failed"); + } + } + + private boolean compete() + { + if (myLocallyHighestPriority <= myPriority) + { + insertPriority(); + } + + LOG.trace("Highest priority for resource {}: {}", myResource, globalHighPriority); + return myPriority >= globalHighPriority; + } + + private void insertPriority() + { + myCasLockStatement.execute( + myDataCenter, + myCasLockStatement.getCompeteStatement().bind(myResource, myUuid, myPriority)); + } + + private boolean tryLock() + { + return myCasLockStatement.execute( + myDataCenter, + myCasLockStatement.getLockStatement().bind(myResource, myUuid, myMetadata)).wasApplied(); + } + + private List computePriorities() + { + List nodePriorities = new ArrayList<>(); + + ResultSet resultSet = myCasLockStatement.execute( + myDataCenter, + myCasLockStatement.getGetPriorityStatement().bind(myResource)); + + for (Row row : resultSet) + { + int priority = row.getInt(CASLockStatement.COLUMN_PRIORITY); + UUID hostId = row.getUuid(CASLockStatement.COLUMN_NODE); + + nodePriorities.add(new NodePriority(hostId, priority)); + } + + return nodePriorities; + } + + int getFailedAttempts() + { + return myFailedUpdateAttempts.get(); + } + +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java new file mode 100644 index 00000000..1540fac5 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java @@ -0,0 +1,345 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; + +import com.datastax.oss.driver.api.core.ConsistencyLevel; +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.cql.ResultSet; +import com.datastax.oss.driver.api.core.cql.Row; +import com.datastax.oss.driver.api.core.metadata.Metadata; +import com.datastax.oss.driver.api.core.metadata.Node; +import com.datastax.oss.driver.api.core.metadata.TokenMap; +import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; +import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; +import com.ericsson.bss.cassandra.ecchronos.core.exceptions.LockException; +import com.ericsson.bss.cassandra.ecchronos.core.locks.HostStates; +import com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +/** + * Lock factory using Cassandras LWT (Compare-And-Set operations) to create and maintain locks. + * + * Expected keyspace/tables: + *

+ * CREATE KEYSPACE IF NOT EXISTS ecchronos WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 1};
+ *
+ * CREATE TABLE IF NOT EXISTS ecchronos.lock (
+ * resource text,
+ * node uuid,
+ * metadata map<text,text>,
+ * PRIMARY KEY(resource))
+ * WITH default_time_to_live = 600 AND gc_grace_seconds = 0;
+ *
+ * CREATE TABLE IF NOT EXISTS ecchronos.lock_priority(
+ * resource text,
+ * node uuid,
+ * priority int,
+ * PRIMARY KEY(resource, node))
+ * WITH default_time_to_live = 600 AND gc_grace_seconds = 0;
+ * 
+ */ +public final class CASLockFactory implements LockFactory, Closeable +{ + private static final Logger LOG = LoggerFactory.getLogger(CASLockFactory.class); + + private static final String TABLE_LOCK = "lock"; + private static final String TABLE_LOCK_PRIORITY = "lock_priority"; + private static final int REFRESH_INTERVAL_RATIO = 10; + private static final int DEFAULT_LOCK_TIME_IN_SECONDS = 600; + + private final UUID myUuid; + private final HostStates myHostStates; + private final CASLockFactoryCacheContext myCasLockFactoryCacheContext; + + private final CASLockProperties myCasLockProperties; + private final CASLockStatement myCasLockStatement; + + CASLockFactory(final CASLockFactoryBuilder builder) + { + myCasLockProperties = new CASLockProperties(builder.getKeyspaceName(), + Executors.newSingleThreadScheduledExecutor( + new ThreadFactoryBuilder().setNameFormat("LockRefresher-%d").build()), + builder.getConsistencyType(), + builder.getNativeConnectionProvider().getCqlSession(), + builder.getStatementDecorator()); + + myHostStates = builder.getHostStates(); + + verifySchemasExists(); + + UUID hostId = builder.getNode().getHostId(); + + if (hostId == null) + { + hostId = UUID.randomUUID(); + LOG.warn("Unable to determine local nodes host id, using {} instead", hostId); + } + + myUuid = hostId; + myCasLockFactoryCacheContext = buildCasLockFactoryCacheContext(builder.getCacheExpiryTimeInSecond()); + + myCasLockStatement = new CASLockStatement(myCasLockProperties, myCasLockFactoryCacheContext); + } + + private CASLockFactoryCacheContext buildCasLockFactoryCacheContext(final long cacheExpiryTimeInSeconds) + { + int lockTimeInSeconds = getDefaultTimeToLiveFromLockTable(); + int lockUpdateTimeInSeconds = lockTimeInSeconds / REFRESH_INTERVAL_RATIO; + int myFailedLockRetryAttempts = (lockTimeInSeconds / lockUpdateTimeInSeconds) - 1; + + return CASLockFactoryCacheContext.newBuilder() + .withLockUpdateTimeInSeconds(lockUpdateTimeInSeconds) + .withFailedLockRetryAttempts(myFailedLockRetryAttempts) + .withLockCache(new LockCache(this::doTryLock, cacheExpiryTimeInSeconds)) + .build(); + } + + private int getDefaultTimeToLiveFromLockTable() + { + TableMetadata tableMetadata = myCasLockProperties.getSession() + .getMetadata() + .getKeyspace(myCasLockProperties.getKeyspaceName()) + .flatMap(ks -> ks.getTable(TABLE_LOCK)) + .orElse(null); + if (tableMetadata == null || tableMetadata.getOptions() == null) + { + LOG.warn("Could not parse default ttl of {}.{}", myCasLockProperties.getKeyspaceName(), TABLE_LOCK); + return DEFAULT_LOCK_TIME_IN_SECONDS; + } + Map tableOptions = tableMetadata.getOptions(); + return (Integer) tableOptions.get(CqlIdentifier.fromInternal("default_time_to_live")); + } + + @Override + public DistributedLock tryLock(final String dataCenter, + final String resource, + final int priority, + final Map metadata) + throws LockException + { + return myCasLockFactoryCacheContext.getLockCache() + .getLock(dataCenter, resource, priority, metadata); + } + + @Override + public Map getLockMetadata(final String dataCenter, final String resource) throws LockException + { + ResultSet resultSet = myCasLockStatement.execute( + dataCenter, myCasLockStatement.getLockMetadataStatement().bind(resource)); + + Row row = resultSet.one(); + + if (row != null) + { + return row.getMap("metadata", String.class, String.class); + } + else + { + throw new LockException("Unable to retrieve metadata for resource " + resource); + } + } + + @Override + public boolean sufficientNodesForLocking(final String dataCenter, final String resource) + { + try + { + Set nodes = getNodesForResource(dataCenter, resource); + + int quorum = nodes.size() / 2 + 1; + int liveNodes = liveNodes(nodes); + + LOG.trace("Live nodes {}, quorum: {}", liveNodes, quorum); + + return liveNodes >= quorum; + } + catch (UnsupportedEncodingException e) + { + LOG.warn("Unable to encode resource bytes", e); + } + + return false; + } + + @Override + public Optional getCachedFailure(final String dataCenter, final String resource) + { + return myCasLockFactoryCacheContext.getLockCache().getCachedFailure(dataCenter, resource); + } + + @Override + public void close() + { + myCasLockProperties.getExecutor().shutdown(); + try + { + if (!myCasLockProperties.getExecutor().awaitTermination(1, TimeUnit.SECONDS)) + { + LOG.warn("Executing tasks did not finish within one second"); + } + } + catch (InterruptedException e) + { + LOG.warn("Interrupted while waiting for executor to shut down", e); + } + } + + @VisibleForTesting + UUID getHostId() + { + return myUuid; + } + + @VisibleForTesting + CASLockFactoryCacheContext getCasLockFactoryCacheContext() + { + return myCasLockFactoryCacheContext; + } + + @VisibleForTesting + CASLockStatement getCasLockStatement() + { + return myCasLockStatement; + } + + @VisibleForTesting + ConsistencyLevel getSerialConsistencyLevel() + { + return myCasLockProperties.getSerialConsistencyLevel(); + } + + public static CASLockFactoryBuilder builder() + { + return new CASLockFactoryBuilder(); + } + + private DistributedLock doTryLock(final String dataCenter, + final String resource, + final int priority, + final Map metadata) throws LockException + { + LOG.trace("Trying lock for {} - {}", dataCenter, resource); + + if (!sufficientNodesForLocking(dataCenter, resource)) + { + LOG.warn("Not sufficient nodes to lock resource {} in datacenter {}", resource, dataCenter); + throw new LockException("Not sufficient nodes to lock"); + } + CASLock casLock = new CASLock(dataCenter, resource, priority, metadata, myUuid, myCasLockStatement); // NOSONAR + if (casLock.lock()) + { + return casLock; + } + else + { + throw new LockException(String.format("Unable to lock resource %s in datacenter %s", resource, dataCenter)); + } + } + + private Set getNodesForResource(final String dataCenter, + final String resource) throws UnsupportedEncodingException + { + Set dataCenterNodes = new HashSet<>(); + + Metadata metadata = myCasLockProperties.getSession().getMetadata(); + TokenMap tokenMap = metadata.getTokenMap() + .orElseThrow(() -> new IllegalStateException("Couldn't get token map, is it disabled?")); + Set nodes = tokenMap.getReplicas( + myCasLockProperties.getKeyspaceName(), ByteBuffer.wrap(resource.getBytes("UTF-8"))); + + if (dataCenter != null) + { + Iterator iterator = nodes.iterator(); + + while (iterator.hasNext()) + { + Node node = iterator.next(); + + if (dataCenter.equals(node.getDatacenter())) + { + dataCenterNodes.add(node); + } + } + + return dataCenterNodes; + } + + return nodes; + } + + private int liveNodes(final Collection nodes) + { + int live = 0; + for (Node node : nodes) + { + if (myHostStates.isUp(node)) + { + live++; + } + } + return live; + } + + private void verifySchemasExists() + { + Optional keyspaceMetadata = myCasLockProperties + .getSession() + .getMetadata() + .getKeyspace(myCasLockProperties.getKeyspaceName()); + + if (!keyspaceMetadata.isPresent()) + { + String msg = String.format("Keyspace %s does not exist, it needs to be created", + myCasLockProperties.getKeyspaceName()); + LOG.error(msg); + throw new IllegalStateException(msg); + } + + if (!keyspaceMetadata.get().getTable(TABLE_LOCK).isPresent()) + { + String msg = String.format("Table %s.%s does not exist, it needs to be created", + myCasLockProperties.getKeyspaceName(), + TABLE_LOCK); + LOG.error(msg); + throw new IllegalStateException(msg); + } + + if (!keyspaceMetadata.get().getTable(TABLE_LOCK_PRIORITY).isPresent()) + { + String msg = String.format("Table %s.%s does not exist, it needs to be created", + myCasLockProperties.getKeyspaceName(), + TABLE_LOCK_PRIORITY); + LOG.error(msg); + throw new IllegalStateException(msg); + } + } + +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactoryBuilder.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactoryBuilder.java new file mode 100644 index 00000000..bd3ad7e4 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactoryBuilder.java @@ -0,0 +1,137 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; +import com.ericsson.bss.cassandra.ecchronos.connection.StatementDecorator; +import com.ericsson.bss.cassandra.ecchronos.core.impl.utils.ConsistencyType; +import com.ericsson.bss.cassandra.ecchronos.core.locks.HostStates; + +/** + * Represents a container for builder configurations and state for the CASLockFactory. + * This class is used to decouple builder fields from CASLockFactory to avoid excessive field count. + */ +public class CASLockFactoryBuilder +{ + private static final String DEFAULT_KEYSPACE_NAME = "ecchronos"; + private static final long DEFAULT_EXPIRY_TIME_IN_SECONDS = 30L; + private static final ConsistencyType DEFAULT_CONSISTENCY_SERIAL = ConsistencyType.DEFAULT; + + private DistributedNativeConnectionProvider myNativeConnectionProvider; + private HostStates myHostStates; + private StatementDecorator myStatementDecorator; + private String myKeyspaceName = DEFAULT_KEYSPACE_NAME; + private long myCacheExpiryTimeInSeconds = DEFAULT_EXPIRY_TIME_IN_SECONDS; + private ConsistencyType myConsistencyType = DEFAULT_CONSISTENCY_SERIAL; + private Node myNode; + + public final CASLockFactoryBuilder withNativeConnectionProvider(final DistributedNativeConnectionProvider nativeConnectionProvider) + { + myNativeConnectionProvider = nativeConnectionProvider; + return this; + } + + public final CASLockFactoryBuilder withHostStates(final HostStates hostStates) + { + myHostStates = hostStates; + return this; + } + + public final CASLockFactoryBuilder withStatementDecorator(final StatementDecorator statementDecorator) + { + myStatementDecorator = statementDecorator; + return this; + } + + public final CASLockFactoryBuilder withKeyspaceName(final String keyspaceName) + { + myKeyspaceName = keyspaceName; + return this; + } + + public final CASLockFactoryBuilder withCacheExpiryInSeconds(final long cacheExpiryInSeconds) + { + myCacheExpiryTimeInSeconds = cacheExpiryInSeconds; + return this; + } + + public final CASLockFactoryBuilder withConsistencySerial(final ConsistencyType consistencyType) + { + myConsistencyType = consistencyType; + return this; + } + + public final CASLockFactoryBuilder withNode(final Node node) + { + myNode = node; + return this; + } + + public final CASLockFactory build() + { + if (myNativeConnectionProvider == null) + { + throw new IllegalArgumentException("Native connection provider cannot be null"); + } + + if (myHostStates == null) + { + throw new IllegalArgumentException("Host states cannot be null"); + } + + if (myStatementDecorator == null) + { + throw new IllegalArgumentException("Statement decorator cannot be null"); + } + + return new CASLockFactory(this); + } + + public final DistributedNativeConnectionProvider getNativeConnectionProvider() + { + return myNativeConnectionProvider; + } + + public final HostStates getHostStates() + { + return myHostStates; + } + + public final StatementDecorator getStatementDecorator() + { + return myStatementDecorator; + } + + public final String getKeyspaceName() + { + return myKeyspaceName; + } + + public final long getCacheExpiryTimeInSecond() + { + return myCacheExpiryTimeInSeconds; + } + + public final ConsistencyType getConsistencyType() + { + return myConsistencyType; + } + + public final Node getNode() + { + return myNode; + } +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactoryCacheContext.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactoryCacheContext.java new file mode 100644 index 00000000..43269b3e --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactoryCacheContext.java @@ -0,0 +1,83 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; + +/** + * Represents a container for cache-related configurations and state for the CASLockFactory. + * This class is used to decouple cache-related fields from CASLockFactory to avoid excessive field count. + */ +public final class CASLockFactoryCacheContext +{ + private final LockCache myLockCache; + private final long myLockUpdateTimeInSeconds; + private final int myFailedLockRetryAttempts; + + public CASLockFactoryCacheContext(final Builder builder) + { + myLockCache = builder.myLockCache; + myLockUpdateTimeInSeconds = builder.myLockUpdateTimeInSeconds; + myFailedLockRetryAttempts = builder.myFailedLockRetryAttempts; + } + + public LockCache getLockCache() + { + return myLockCache; + } + + public long getLockUpdateTimeInSeconds() + { + return myLockUpdateTimeInSeconds; + } + + public int getFailedLockRetryAttempts() + { + return myFailedLockRetryAttempts; + } + + public static Builder newBuilder() + { + return new Builder(); + } + + public static class Builder + { + private LockCache myLockCache; + private int myLockUpdateTimeInSeconds; + private int myFailedLockRetryAttempts; + + public final Builder withLockUpdateTimeInSeconds(final int lockTimeInSeconds) + { + myLockUpdateTimeInSeconds = lockTimeInSeconds; + return this; + } + + public final Builder withFailedLockRetryAttempts(final int failedLockRetryAttempts) + { + myFailedLockRetryAttempts = failedLockRetryAttempts; + return this; + } + + public final Builder withLockCache(final LockCache lockCache) + { + myLockCache = lockCache; + return this; + } + + public final CASLockFactoryCacheContext build() + { + return new CASLockFactoryCacheContext(this); + } + } +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java new file mode 100644 index 00000000..5adebf2f --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java @@ -0,0 +1,83 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; + +import com.ericsson.bss.cassandra.ecchronos.core.impl.utils.ConsistencyType; +import java.util.concurrent.ScheduledExecutorService; + +import com.datastax.oss.driver.api.core.ConsistencyLevel; +import com.datastax.oss.driver.api.core.CqlSession; +import com.ericsson.bss.cassandra.ecchronos.connection.StatementDecorator; + +/** + * Represents a container for builder configurations and state for the CASLockStatement. + * This class is used to decouple builder fields from CASLock to avoid excessive field count. + */ +public class CASLockProperties +{ + private final String myKeyspaceName; + private final ScheduledExecutorService myExecutor; + private final ConsistencyLevel mySerialConsistencyLevel; + private final CqlSession mySession; + private final StatementDecorator myStatementDecorator; + + CASLockProperties(final String keyspaceName, + final ScheduledExecutorService executor, + final ConsistencyType consistencyType, + final CqlSession session, + final StatementDecorator statementDecorator) + { + myKeyspaceName = keyspaceName; + myExecutor = executor; + mySerialConsistencyLevel = defineSerialConsistencyLevel(consistencyType); + mySession = session; + myStatementDecorator = statementDecorator; + } + + public final ConsistencyLevel defineSerialConsistencyLevel(final ConsistencyType consistencyType) + { + ConsistencyLevel serialConsistencyLevel; + + serialConsistencyLevel = ConsistencyType.LOCAL.equals(consistencyType) + ? ConsistencyLevel.LOCAL_SERIAL + : ConsistencyLevel.SERIAL; + return serialConsistencyLevel; + } + + public final String getKeyspaceName() + { + return myKeyspaceName; + } + + public final ScheduledExecutorService getExecutor() + { + return myExecutor; + } + + public final ConsistencyLevel getSerialConsistencyLevel() + { + return mySerialConsistencyLevel; + } + + public final CqlSession getSession() + { + return mySession; + } + + public final StatementDecorator getStatementDecorator() + { + return myStatementDecorator; + } +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockStatement.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockStatement.java new file mode 100644 index 00000000..e9b2d18a --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockStatement.java @@ -0,0 +1,225 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; + +import static com.datastax.oss.driver.api.querybuilder.QueryBuilder.bindMarker; + +import com.datastax.oss.driver.api.core.ConsistencyLevel; +import com.datastax.oss.driver.api.core.cql.BoundStatement; +import com.datastax.oss.driver.api.core.cql.PreparedStatement; +import com.datastax.oss.driver.api.core.cql.ResultSet; +import com.datastax.oss.driver.api.core.cql.SimpleStatement; +import com.datastax.oss.driver.api.core.cql.Statement; +import com.datastax.oss.driver.api.querybuilder.QueryBuilder; +import com.ericsson.bss.cassandra.ecchronos.connection.DataCenterAwareStatement; + +/** + * Represents a container for builder configurations and state for the CASLockStatement. + * This class is used to decouple builder fields from CASLock to avoid excessive field count. + */ +public class CASLockStatement +{ + static final String COLUMN_RESOURCE = "resource"; + static final String COLUMN_NODE = "node"; + static final String COLUMN_METADATA = "metadata"; + static final String COLUMN_PRIORITY = "priority"; + + private static final String TABLE_LOCK = "lock"; + private static final String TABLE_LOCK_PRIORITY = "lock_priority"; + + private final PreparedStatement myCompeteStatement; + private final PreparedStatement myLockStatement; + private final PreparedStatement myRemoveLockStatement; + private final PreparedStatement myUpdateLockStatement; + private final PreparedStatement myRemoveLockPriorityStatement; + private final PreparedStatement myGetPriorityStatement; + private final PreparedStatement myGetLockMetadataStatement; + + private final CASLockProperties myCasLockProperties; + private final CASLockFactoryCacheContext myCasLockFactoryCacheContext; + + public CASLockStatement( + final CASLockProperties casLockProperties, + final CASLockFactoryCacheContext casLockFactoryCacheContext) + { + myCasLockProperties = casLockProperties; + myCasLockFactoryCacheContext = casLockFactoryCacheContext; + myCompeteStatement = myCasLockProperties.getSession().prepare(competeStatement()); + myLockStatement = myCasLockProperties.getSession().prepare((insertLockStatement())); + myRemoveLockStatement = myCasLockProperties.getSession().prepare(removeLockStatement()); + myUpdateLockStatement = myCasLockProperties.getSession().prepare((updateLockStatement())); + myRemoveLockPriorityStatement = myCasLockProperties.getSession().prepare(removeLockPriorityStatement()); + myGetPriorityStatement = myCasLockProperties.getSession().prepare(getPriorityStatement()); + myGetLockMetadataStatement = myCasLockProperties.getSession().prepare(lockMetadataStatement()); + } + + public final ResultSet execute(final String dataCenter, final BoundStatement statement) + { + Statement executeStatement; + + if (dataCenter != null) + { + executeStatement = new DataCenterAwareStatement(statement, dataCenter); + } + else + { + executeStatement = statement; + } + + return myCasLockProperties.getSession() + .execute(myCasLockProperties + .getStatementDecorator() + .apply(executeStatement)); + } + + private SimpleStatement insertLockStatement() + { + SimpleStatement insertLockStatement = QueryBuilder + .insertInto(myCasLockProperties.getKeyspaceName(), TABLE_LOCK) + .value(COLUMN_RESOURCE, bindMarker()) + .value(COLUMN_NODE, bindMarker()) + .value(COLUMN_METADATA, bindMarker()) + .ifNotExists() + .build() + .setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM) + .setSerialConsistencyLevel(myCasLockProperties.getSerialConsistencyLevel()); + return insertLockStatement; + } + + private SimpleStatement removeLockStatement() + { + SimpleStatement removeLockStatement = QueryBuilder + .deleteFrom(myCasLockProperties.getKeyspaceName(), TABLE_LOCK) + .whereColumn(COLUMN_RESOURCE) + .isEqualTo(bindMarker()) + .ifColumn(COLUMN_NODE) + .isEqualTo(bindMarker()) + .build() + .setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM) + .setSerialConsistencyLevel(myCasLockProperties.getSerialConsistencyLevel()); + return removeLockStatement; + } + + private SimpleStatement updateLockStatement() + { + SimpleStatement updateLockStatement = QueryBuilder + .update(myCasLockProperties.getKeyspaceName(), TABLE_LOCK) + .setColumn(COLUMN_NODE, bindMarker()) + .setColumn(COLUMN_METADATA, bindMarker()) + .whereColumn(COLUMN_RESOURCE) + .isEqualTo(bindMarker()) + .ifColumn(COLUMN_NODE) + .isEqualTo(bindMarker()) + .build() + .setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM) + .setSerialConsistencyLevel(myCasLockProperties.getSerialConsistencyLevel()); + return updateLockStatement; + } + + private SimpleStatement competeStatement() + { + SimpleStatement competeStatement = QueryBuilder + .insertInto(myCasLockProperties.getKeyspaceName(), TABLE_LOCK_PRIORITY) + .value(COLUMN_RESOURCE, bindMarker()) + .value(COLUMN_NODE, bindMarker()) + .value(COLUMN_PRIORITY, bindMarker()) + .build() + .setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM); + return competeStatement; + } + + private SimpleStatement getPriorityStatement() + { + SimpleStatement priorityStatement = QueryBuilder + .selectFrom(myCasLockProperties.getKeyspaceName(), TABLE_LOCK_PRIORITY) + .columns(COLUMN_PRIORITY, COLUMN_NODE) + .whereColumn(COLUMN_RESOURCE) + .isEqualTo(bindMarker()) + .build() + .setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM); + return priorityStatement; + } + + private SimpleStatement removeLockPriorityStatement() + { + SimpleStatement removeLockPriorityStatement = QueryBuilder + .deleteFrom(myCasLockProperties.getKeyspaceName(), TABLE_LOCK_PRIORITY) + .whereColumn(COLUMN_RESOURCE) + .isEqualTo(bindMarker()) + .whereColumn(COLUMN_NODE) + .isEqualTo(bindMarker()) + .build() + .setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM); + return removeLockPriorityStatement; + } + + private SimpleStatement lockMetadataStatement() + { + SimpleStatement lockMetadataStatement = QueryBuilder + .selectFrom(myCasLockProperties.getKeyspaceName(), TABLE_LOCK) + .column(COLUMN_METADATA) + .whereColumn(COLUMN_RESOURCE) + .isEqualTo(bindMarker()) + .build() + .setSerialConsistencyLevel(myCasLockProperties.getSerialConsistencyLevel()); + return lockMetadataStatement; + } + + public final PreparedStatement getCompeteStatement() + { + return myCompeteStatement; + } + + public final PreparedStatement getLockStatement() + { + return myLockStatement; + } + + public final PreparedStatement getRemoveLockStatement() + { + return myRemoveLockStatement; + } + + public final PreparedStatement getUpdateLockStatement() + { + return myUpdateLockStatement; + } + + public final PreparedStatement getRemoveLockPriorityStatement() + { + return myRemoveLockPriorityStatement; + } + + public final PreparedStatement getGetPriorityStatement() + { + return myGetPriorityStatement; + } + + public final PreparedStatement getLockMetadataStatement() + { + return myGetLockMetadataStatement; + } + + public final CASLockFactoryCacheContext getCasLockFactoryCacheContext() + { + return myCasLockFactoryCacheContext; + } + + public final CASLockProperties getCasLockProperties() + { + return myCasLockProperties; + } + +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/HostStatesImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/HostStatesImpl.java new file mode 100644 index 00000000..90d652a3 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/HostStatesImpl.java @@ -0,0 +1,216 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; + +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.metadata.Metadata; +import com.ericsson.bss.cassandra.ecchronos.core.impl.logging.ThrottlingLogger; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxy; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.locks.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.locks.HostStates; +import java.io.Closeable; +import java.io.IOException; +import java.net.InetAddress; +import java.util.Optional; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Implementation of the {@link HostStates} interface using JMX to retrieve node statuses and then caches the retrieved + * statuses for some time. + */ +public final class HostStatesImpl implements HostStates, Closeable +{ + private static final Logger LOG = LoggerFactory.getLogger(HostStatesImpl.class); + private static final ThrottlingLogger THROTTLED_LOGGER = new ThrottlingLogger(LOG, 1, TimeUnit.MINUTES); + + private static final long DEFAULT_REFRESH_INTERVAL_IN_MS = TimeUnit.SECONDS.toMillis(10); + + private final ConcurrentHashMap myHostStates = new ConcurrentHashMap<>(); + private final Object myRefreshLock = new Object(); + private final long myRefreshIntervalInMs; + private final CqlSession myCqlSession; + + private volatile long myLastRefresh = -1; + + private final DistributedJmxProxyFactory myJmxProxyFactory; + + private HostStatesImpl(final Builder builder) + { + myRefreshIntervalInMs = builder.myRefreshIntervalInMs; + myJmxProxyFactory = builder.myJmxProxyFactory; + myCqlSession = builder.myCqlSession; + } + + @Override + public boolean isUp(final InetAddress address) + { + refreshNodeStatus(address); + + Boolean status = myHostStates.get(address); + return status != null && status; + } + + @Override + public boolean isUp(final Node node) + { + return isUp(node.getBroadcastAddress().get().getAddress()); + } + + @Override + public boolean isUp(final DriverNode node) + { + return isUp(node.getPublicAddress()); + } + + @Override + public void close() + { + myHostStates.clear(); + } + + private void refreshNodeStatus(final InetAddress address) + { + if (shouldRefreshNodeStatus()) + { + synchronized (myRefreshLock) + { + if (shouldRefreshNodeStatus() && !tryRefreshHostStates(address)) + { + myHostStates.clear(); + } + } + } + } + + @VisibleForTesting + void resetLastRefresh() + { + myLastRefresh = -1; + } + + private boolean shouldRefreshNodeStatus() + { + return myLastRefresh == -1 || myLastRefresh < (System.currentTimeMillis() - myRefreshIntervalInMs); + } + + private synchronized boolean tryRefreshHostStates(final InetAddress address) + { + if (myJmxProxyFactory == null) + { + return false; + } + + UUID hostId = getHostIdForAddress(address); + try (DistributedJmxProxy proxy = myJmxProxyFactory.connect()) + { + for (String liveHost : proxy.getLiveNodes(hostId)) + { + InetAddress host = InetAddress.getByName(liveHost); + + if (changeHostState(host, true)) + { + LOG.debug("Host {} marked as UP", host); + } + } + + for (String unreachableHost : proxy.getUnreachableNodes(hostId)) + { + InetAddress host = InetAddress.getByName(unreachableHost); + + if (changeHostState(host, false)) + { + LOG.debug("Host {} marked as DOWN", host); + } + } + + myLastRefresh = System.currentTimeMillis(); + return true; + } + catch (IOException e) + { + THROTTLED_LOGGER.warn("Unable to retrieve host states", e); + } + + return false; + } + + private boolean changeHostState(final InetAddress host, final boolean newValue) + { + Boolean oldValue = myHostStates.put(host, newValue); + + return oldValue == null || oldValue != newValue; + } + + private UUID getHostIdForAddress(final InetAddress address) + { + Metadata metadata = myCqlSession.getMetadata(); + Optional nodeOptional = metadata.getNodes() + .values() + .stream() + .filter(node -> node.getBroadcastAddress().isPresent() + && node.getBroadcastAddress().get().getAddress().equals(address)) + .findFirst(); + + return nodeOptional.map(Node::getHostId).orElse(null); + } + + public static Builder builder() + { + return new Builder(); + } + + public static class Builder + { + private DistributedJmxProxyFactory myJmxProxyFactory; + private long myRefreshIntervalInMs = DEFAULT_REFRESH_INTERVAL_IN_MS; + private CqlSession myCqlSession; + + public final Builder withJmxProxyFactory(final DistributedJmxProxyFactory jmxProxyFactory) + { + myJmxProxyFactory = jmxProxyFactory; + return this; + } + + public final Builder withRefreshIntervalInMs(final long refreshIntervalInMs) + { + myRefreshIntervalInMs = refreshIntervalInMs; + return this; + } + + public final Builder withCqlSession(final CqlSession session) + { + myCqlSession = session; + return this; + } + + public final HostStatesImpl build() + { + if (myJmxProxyFactory == null) + { + throw new IllegalArgumentException("JMX Proxy Factory must be set"); + } + + return new HostStatesImpl(this); + } + } +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCache.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCache.java new file mode 100644 index 00000000..73751788 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCache.java @@ -0,0 +1,136 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; + +import static com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory.DistributedLock; + +import com.ericsson.bss.cassandra.ecchronos.core.exceptions.LockException; +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +import static com.google.common.base.Preconditions.checkNotNull; + +public final class LockCache +{ + private static final Logger LOG = LoggerFactory.getLogger(LockCache.class); + + private final Cache myFailureCache; + private final LockSupplier myLockSupplier; + + public LockCache(final LockSupplier lockSupplier, final long expireTimeInSeconds) + { + this(lockSupplier, expireTimeInSeconds, TimeUnit.SECONDS); + } + + LockCache(final LockSupplier lockSupplier, final long expireTime, final TimeUnit expireTimeUnit) + { + myLockSupplier = lockSupplier; + + myFailureCache = Caffeine.newBuilder() + .expireAfterWrite(expireTime, expireTimeUnit) + .executor(Runnable::run) + .build(); + } + + public Optional getCachedFailure(final String dataCenter, final String resource) + { + return getCachedFailure(new LockKey(dataCenter, resource)); + } + + public DistributedLock getLock(final String dataCenter, + final String resource, + final int priority, + final Map metadata) + throws LockException + { + LockKey lockKey = new LockKey(dataCenter, resource); + + Optional cachedFailure = getCachedFailure(lockKey); + + if (cachedFailure.isPresent()) + { + throwCachedLockException(cachedFailure.get()); + } + + try + { + return myLockSupplier.getLock(dataCenter, resource, priority, metadata); + } + catch (LockException e) + { + myFailureCache.put(lockKey, e); + throw e; + } + } + + private void throwCachedLockException(final LockException e) throws LockException + { + LOG.debug("Encountered cached locking failure, throwing exception", e); + throw e; + } + + private Optional getCachedFailure(final LockKey lockKey) + { + return Optional.ofNullable(myFailureCache.getIfPresent(lockKey)); + } + + @FunctionalInterface + public interface LockSupplier + { + DistributedLock getLock(String dataCenter, String resource, int priority, Map metadata) + throws LockException; + } + + static final class LockKey + { + private final String myDataCenter; + private final String myResourceName; + + LockKey(final String dataCenter, final String resourceName) + { + myDataCenter = dataCenter; + myResourceName = checkNotNull(resourceName); + } + + @Override + public boolean equals(final Object o) + { + if (this == o) + { + return true; + } + if (o == null || getClass() != o.getClass()) + { + return false; + } + LockKey lockKey = (LockKey) o; + return Objects.equals(myDataCenter, lockKey.myDataCenter) + && Objects.equals(myResourceName, lockKey.myResourceName); + } + + @Override + public int hashCode() + { + return Objects.hash(myDataCenter, myResourceName); + } + } +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCollection.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCollection.java new file mode 100644 index 00000000..74d524c0 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCollection.java @@ -0,0 +1,58 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; + +import com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A lock implementation covering multiple distributed locks. + *

+ * Closes all underlying locks when closed. + */ +public class LockCollection implements LockFactory.DistributedLock +{ + private static final Logger LOG = LoggerFactory.getLogger(LockCollection.class); + + private final List myLocks; + + public LockCollection(final Collection locks) + { + myLocks = new ArrayList<>(locks); + } + + /** + * Close. + */ + @Override + public void close() + { + for (LockFactory.DistributedLock lock : myLocks) + { + try + { + lock.close(); + } + catch (Exception e) + { + LOG.warn("Unable to release lock {}", lock, e); + } + } + } +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/NodePriority.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/NodePriority.java new file mode 100644 index 00000000..e40a3d5a --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/NodePriority.java @@ -0,0 +1,43 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; + +import java.util.UUID; + +/** + * Represents a container for node priority configurations and state for the CASLockFactory. + * This class is used to decouple node priority fields from CASLockFactory to avoid excessive field count. + */ +public final class NodePriority +{ + private final UUID myNode; + private final int myPriority; + + public NodePriority(final UUID node, final int priority) + { + myNode = node; + myPriority = priority; + } + + public UUID getUuid() + { + return myNode; + } + + public int getPriority() + { + return myPriority; + } +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/package-info.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/package-info.java new file mode 100644 index 00000000..d8650373 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains the implementations related to locks. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/utils/ConsistencyType.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/utils/ConsistencyType.java new file mode 100644 index 00000000..9865fb0d --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/utils/ConsistencyType.java @@ -0,0 +1,22 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.utils; + +public enum ConsistencyType +{ + DEFAULT, + LOCAL, + SERIAL +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/utils/package-info.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/utils/package-info.java new file mode 100644 index 00000000..796be8fd --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/utils/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains utilities classes. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.utils; diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/LockException.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/LockException.java new file mode 100644 index 00000000..3a7e4d77 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/LockException.java @@ -0,0 +1,38 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.exceptions; + +/** + * Exception thrown when a lock factory is unable to get a lock. + */ +public class LockException extends Exception +{ + private static final long serialVersionUID = 1699712279389641954L; + + public LockException(final String message) + { + super(message); + } + + public LockException(final String message, final Throwable t) + { + super(message, t); + } + + public LockException(final Throwable t) + { + super(t); + } +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/package-info.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/package-info.java new file mode 100644 index 00000000..b8ca0e69 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/package-info.java @@ -0,0 +1,19 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains locks related exceptions. + */ +package com.ericsson.bss.cassandra.ecchronos.core.exceptions; + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/DriverNode.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/DriverNode.java new file mode 100644 index 00000000..4cde5e3c --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/DriverNode.java @@ -0,0 +1,96 @@ +/* + * Copyright 2020 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.locks; + +import com.datastax.oss.driver.api.core.metadata.Node; + +import java.net.InetAddress; +import java.util.Objects; +import java.util.UUID; + +/** + * An internal representation of a node. + * This class together with {@link com.ericsson.bss.cassandra.ecchronos.core.utils.NodeResolver} makes it easier to + * translate node IP to host ID and other way around. + */ +public class DriverNode +{ + private final Node node; + + public DriverNode(final Node aNode) + { + this.node = aNode; + } + + /** + * Get the host id of the node. + * + * @return The host id of the node. + */ + public UUID getId() + { + return node.getHostId(); + } + + /** + * Get the public ip address of the node. + * + * @return The public ip address of the node. + */ + public InetAddress getPublicAddress() + { + return node.getBroadcastAddress().get().getAddress(); + } + + /** + * Get the datacenter the node resides in. + * + * @return The datacenter of the node. + */ + public String getDatacenter() + { + return node.getDatacenter(); + } + + /** + * Check for equality. + */ + @Override + public boolean equals(final Object o) + { + if (this == o) + { + return true; + } + if (o == null || getClass() != o.getClass()) + { + return false; + } + DriverNode that = (DriverNode) o; + return node.equals(that.node); + } + + @Override + public final int hashCode() + { + return Objects.hash(node); + } + + @Override + public final String toString() + { + return String.format("Node(%s:%s:%s)", getId(), getDatacenter(), getPublicAddress()); + } +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/HostStates.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/HostStates.java new file mode 100644 index 00000000..28756485 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/HostStates.java @@ -0,0 +1,50 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.locks; + +import java.net.InetAddress; + +import com.datastax.oss.driver.api.core.metadata.Node; + +/** + * Interface used to determine node statuses. + */ +public interface HostStates +{ + /** + * Check if a host is up. + * + * @param address + * The broadcast address of the host. + * @return True if the node is up. False will be returned if the state is unknown or if the host is down. + */ + boolean isUp(InetAddress address); + + /** + * Check if a host is up. + * + * @param node The node. + * @return True if the host is up. False will be returned if the state is unknown or if the host is down. + */ + boolean isUp(Node node); + + /** + * Check if a node is up. + * + * @param node The node. + * @return True if the node is up. False will be returned if the state is unknown or if the node is down. + */ + boolean isUp(DriverNode node); +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/LockFactory.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/LockFactory.java new file mode 100644 index 00000000..cfc7eaea --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/LockFactory.java @@ -0,0 +1,95 @@ +/* + * Copyright 2018 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.locks; + +import java.io.Closeable; +import java.util.Map; +import java.util.Optional; + +import com.ericsson.bss.cassandra.ecchronos.core.exceptions.LockException; + +/** + * Interface for distributed lock factories. + */ +public interface LockFactory +{ + + /** + * Try to lock a distributed resource using the provided priority. + * + * @param dataCenter + * The data center the lock belongs to or null if it's a global lock. + * @param resource + * The resource to lock. + * @param priority + * The priority of the lock. + * @param metadata + * The metadata of the lock. + * @return The lock if able to lock the resource. + */ + DistributedLock tryLock(String dataCenter, String resource, int priority, Map metadata) + throws LockException; + + /** + * Get the metadata of a resource lock. + * + * @param dataCenter + * The data center the lock belongs to or null if it's a global lock. + * @param resource + * The data center resource: + * i.e "RepairResource-DC1-1". + * @return The metadata of the lock + * containing keyspace and table to repair. + * @throws LockException + */ + Map getLockMetadata(String dataCenter, String resource) throws LockException; + + /** + * Checks if local_quorum is met. + * + * @param dataCenter + * The data center the lock belongs to or null if it's a global lock. + * @param resource + * The data center resource. + * i.e "RepairResource-DC1-1". + * @return boolean + * Indicates if local_quorum is met. + */ + boolean sufficientNodesForLocking(String dataCenter, String resource); + + /** + * Utility method to return a cached lock exception if one is available. + * + * @param dataCenter The data center the lock is for or null if it's a global lock. + * @param resource The resource the lock is for. + * @return A cached exception if available. + */ + default Optional getCachedFailure(String dataCenter, String resource) + { + return Optional.empty(); + } + + /** + * A locked resource that gets released by the call of the {@link DistributedLock#close() close()} method. + */ + interface DistributedLock extends Closeable + { + /** + * Releases the locked resource. + */ + @Override + void close(); + } +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/package-info.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/package-info.java new file mode 100644 index 00000000..8a0252b9 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains interfaces related to locks. + */ +package com.ericsson.bss.cassandra.ecchronos.core.locks; From 5b7b5cf064eaf632a378a72b70a58472b573b1af Mon Sep 17 00:00:00 2001 From: sajid riaz Date: Mon, 4 Nov 2024 15:31:41 +0100 Subject: [PATCH 3/8] Cassandra based distributed locking mechanism # 741 - Cassandra tables called lock and lock_priority, to manage task execution and synchronization across multiple nodes. --- .../ecchronos/application/config/Config.java | 17 + .../AgentNativeConnectionProvider.java | 3 - application/src/main/resources/ecc.yml | 7 + .../application/config/TestConfig.java | 6 + .../lockfactory/TestCasLockFactoryConfig.java | 53 ++ application/src/test/resources/all_set.yml | 9 +- .../builders/DistributedNativeBuilder.java | 1 + ...stributedNativeConnectionProviderImpl.java | 1 - core.impl/pom.xml | 22 + .../impl/AbstractCassandraContainerTest.java | 114 ++++ .../core/impl/locks/TestCASLockFactory.java | 616 ++++++++++++++++++ .../core/impl/locks/TestLockCache.java | 165 +++++ .../core/impl/locks/TestLockCollection.java | 91 +++ 13 files changed, 1100 insertions(+), 5 deletions(-) create mode 100644 application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/TestCasLockFactoryConfig.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/AbstractCassandraContainerTest.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCache.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCollection.java diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/Config.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/Config.java index 7cc16003..4c1969a6 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/Config.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/Config.java @@ -15,6 +15,7 @@ package com.ericsson.bss.cassandra.ecchronos.application.config; import com.ericsson.bss.cassandra.ecchronos.application.config.connection.ConnectionConfig; +import com.ericsson.bss.cassandra.ecchronos.application.config.lockfactory.LockFactoryConfig; import com.ericsson.bss.cassandra.ecchronos.application.config.repair.GlobalRepairConfig; import com.ericsson.bss.cassandra.ecchronos.application.config.rest.RestServerConfig; import com.ericsson.bss.cassandra.ecchronos.application.config.runpolicy.RunPolicyConfig; @@ -28,6 +29,7 @@ public class Config private RunPolicyConfig myRunPolicyConfig = new RunPolicyConfig(); private SchedulerConfig mySchedulerConfig = new SchedulerConfig(); private RestServerConfig myRestServerConfig = new RestServerConfig(); + private LockFactoryConfig myLockFactoryConfig = new LockFactoryConfig(); @JsonProperty("connection") public final ConnectionConfig getConnectionConfig() @@ -119,4 +121,19 @@ public final void setRestServerConfig(final RestServerConfig restServerConfig) myRestServerConfig = restServerConfig; } } + + @JsonProperty("lock_factory") + public final LockFactoryConfig getLockFactory() + { + return myLockFactoryConfig; + } + + @JsonProperty("lock_factory") + public final void setLockFactoryConfig(final LockFactoryConfig lockFactoryConfig) + { + if (lockFactoryConfig != null) + { + myLockFactoryConfig = lockFactoryConfig; + } + } } diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java index af68f05b..452a5661 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java @@ -49,7 +49,6 @@ public class AgentNativeConnectionProvider implements DistributedNativeConnectio private static final Logger LOG = LoggerFactory.getLogger(AgentNativeConnectionProvider.class); private final DistributedNativeConnectionProviderImpl myDistributedNativeConnectionProviderImpl; - private final boolean myRemoteRouting; /** * Constructs an {@code AgentNativeConnectionProvider} with the specified configuration, security supplier, and @@ -71,7 +70,6 @@ public AgentNativeConnectionProvider( AgentConnectionConfig agentConnectionConfig = config.getConnectionConfig() .getCqlConnection() .getAgentConnectionConfig(); - myRemoteRouting = agentConnectionConfig.getRemoteRouting(); Security.CqlSecurity cqlSecurity = cqlSecuritySupplier.get(); boolean authEnabled = cqlSecurity.getCqlCredentials().isEnabled(); boolean tlsEnabled = cqlSecurity.getCqlTlsConfig().isEnabled(); @@ -96,7 +94,6 @@ public AgentNativeConnectionProvider( .withSslEngineFactory(sslEngineFactory) .withSchemaChangeListener(defaultRepairConfigurationProvider) .withNodeStateListener(defaultRepairConfigurationProvider); - LOG.info("Preparing Agent Connection Config"); nativeConnectionBuilder = resolveAgentProviderBuilder(nativeConnectionBuilder, agentConnectionConfig); LOG.info("Establishing Connection With Nodes"); diff --git a/application/src/main/resources/ecc.yml b/application/src/main/resources/ecc.yml index ed88f3cb..33751116 100644 --- a/application/src/main/resources/ecc.yml +++ b/application/src/main/resources/ecc.yml @@ -28,6 +28,13 @@ connection: ## (instanceName: unique identifier), that will be used ## as ecchronos_id (partition key in nodes_sync table). instanceName: unique_identifier + ## + ## Allow routing requests directly to a remote datacenter. + ## This allows locks for other datacenters to be taken in that datacenter instead of via the local datacenter. + ## If clients are prevented from connecting directly to Cassandra nodes in other sites this is not possible. + ## If remote routing is disabled, instead SERIAL consistency will be used for those request. + ## + remoteRouting: true ## Define the Agent strategy, it can be ## - datacenterAware; ## - rackAware; and diff --git a/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestConfig.java b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestConfig.java index 8089448c..baf9e9ad 100644 --- a/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestConfig.java +++ b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestConfig.java @@ -317,5 +317,11 @@ public void testInstanceName() { assertThat(nativeConnection.getAgentConnectionConfig().getInstanceName()).isEqualTo("unique_identifier"); } + + @Test + public void testRemoteRouting() + { + assertThat(nativeConnection.getAgentConnectionConfig().getRemoteRouting()).isEqualTo(false); + } } diff --git a/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/TestCasLockFactoryConfig.java b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/TestCasLockFactoryConfig.java new file mode 100644 index 00000000..94e23156 --- /dev/null +++ b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/lockfactory/TestCasLockFactoryConfig.java @@ -0,0 +1,53 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.application.config.lockfactory; + +import com.ericsson.bss.cassandra.ecchronos.application.config.Config; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; + +import static org.assertj.core.api.Assertions.assertThat; + +public class TestCasLockFactoryConfig +{ + @Test + public void testCasLockFactoryConfigWithProvidedValue() throws IOException + { + CasLockFactoryConfig casLockFactoryConfig = getCasLockFactoryConfig("all_set.yml"); + assertThat(casLockFactoryConfig.getKeyspaceName()).isEqualTo("ecc"); + assertThat(casLockFactoryConfig.getFailureCacheExpiryTimeInSeconds()).isEqualTo(100L); + } + + @Test + public void testCasLockFactoryConfigDefaultValue() throws IOException + { + CasLockFactoryConfig casLockFactoryConfig = getCasLockFactoryConfig("nothing_set.yml"); + assertThat(casLockFactoryConfig.getKeyspaceName()).isEqualTo("ecchronos"); + assertThat(casLockFactoryConfig.getFailureCacheExpiryTimeInSeconds()).isEqualTo(30L); + } + + private CasLockFactoryConfig getCasLockFactoryConfig(final String fileName) throws IOException + { + ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); + File file = new File(classLoader.getResource(fileName).getFile()); + ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); + Config config = mapper.readValue(file, Config.class); + return config.getLockFactory().getCasLockFactoryConfig(); + } +} diff --git a/application/src/test/resources/all_set.yml b/application/src/test/resources/all_set.yml index 218c1972..b5a7977c 100644 --- a/application/src/test/resources/all_set.yml +++ b/application/src/test/resources/all_set.yml @@ -42,6 +42,7 @@ connection: port: 9042 - host: 127.0.0.4 port: 9042 + remoteRouting: false provider: com.ericsson.bss.cassandra.ecchronos.application.providers.AgentNativeConnectionProvider connectionDelay: time: 45 @@ -90,4 +91,10 @@ scheduler: rest_server: host: 127.0.0.2 - port: 8081 \ No newline at end of file + port: 8081 + +lock_factory: + cas: + keyspace: ecc + cache_expiry_time_in_seconds: 100 + consistencySerial: "LOCAL" \ No newline at end of file diff --git a/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/builders/DistributedNativeBuilder.java b/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/builders/DistributedNativeBuilder.java index dbf78ef5..b858addd 100644 --- a/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/builders/DistributedNativeBuilder.java +++ b/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/builders/DistributedNativeBuilder.java @@ -74,6 +74,7 @@ public class DistributedNativeBuilder private SslEngineFactory mySslEngineFactory = null; private SchemaChangeListener mySchemaChangeListener = null; private NodeStateListener myNodeStateListener = null; + private boolean myRemoteRouting; /** * Sets the initial contact points for the distributed native connection. diff --git a/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/providers/DistributedNativeConnectionProviderImpl.java b/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/providers/DistributedNativeConnectionProviderImpl.java index 7c48a049..9f32b138 100644 --- a/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/providers/DistributedNativeConnectionProviderImpl.java +++ b/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/providers/DistributedNativeConnectionProviderImpl.java @@ -125,5 +125,4 @@ public Boolean confirmNodeValid(final Node node) { return myDistributedNativeBuilder.confirmNodeValid(node); } - } diff --git a/core.impl/pom.xml b/core.impl/pom.xml index 9351dfeb..3004eecd 100644 --- a/core.impl/pom.xml +++ b/core.impl/pom.xml @@ -105,6 +105,28 @@ test + + io.micrometer + micrometer-core + + + + com.google.guava + guava + + + + net.jcip + jcip-annotations + test + + + + org.testcontainers + cassandra + test + + org.assertj assertj-core diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/AbstractCassandraContainerTest.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/AbstractCassandraContainerTest.java new file mode 100644 index 00000000..db7a495e --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/AbstractCassandraContainerTest.java @@ -0,0 +1,114 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl; + +import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; +import java.net.InetSocketAddress; + +import java.util.List; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.testcontainers.containers.CassandraContainer; +import org.testcontainers.utility.DockerImageName; + +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.metadata.Node; + +public class AbstractCassandraContainerTest +{ + protected static CqlSession mySession; + + private static DistributedNativeConnectionProvider myNativeConnectionProvider; + private static CassandraContainer node; + + @SuppressWarnings ("resource") + @BeforeClass + public static void setUpCluster() + { + // This is set as an environment variable ('it.cassandra.version') in maven using the '-D' flag. + String cassandraVersion = System.getProperty("it.cassandra.version"); + if (cassandraVersion == null) + { + // No environment version set, just use latest. + cassandraVersion = "latest"; + } + node = new CassandraContainer<>(DockerImageName.parse("cassandra:" + cassandraVersion)) + .withExposedPorts(9042, 7000, 7199) + .withEnv("CASSANDRA_DC", "DC1") + .withEnv("CASSANDRA_ENDPOINT_SNITCH", "GossipingPropertyFileSnitch") + .withEnv("CASSANDRA_CLUSTER_NAME", "TestCluster") + .withEnv("JMX_PORT", "7199"); + node.start(); + String containerIpAddress = node.getHost(); + Integer containerPort = node.getMappedPort(9042); + + mySession = CqlSession.builder() + .addContactPoint(new InetSocketAddress(containerIpAddress, containerPort)) + .withLocalDatacenter("DC1") + .build(); + + List nodesList = mySession.getMetadata().getNodes().values().stream().toList(); + myNativeConnectionProvider = new DistributedNativeConnectionProvider() + { + @Override + public CqlSession getCqlSession() + { + return mySession; + } + + @Override + public List getNodes() + { + return nodesList; + } + + @Override + public void addNode(Node myNode) + { + } + + @Override + public void removeNode(Node myNode) + { + } + + @Override + public Boolean confirmNodeValid(Node node) + { + return false; + } + }; + } + + @AfterClass + public static void tearDownCluster() + { + if (mySession != null) + { + mySession.close(); + } + node.stop(); + } + + public static DistributedNativeConnectionProvider getNativeConnectionProvider() + { + return myNativeConnectionProvider; + } + + public static CassandraContainer getContainerNode() + { + return node; + } +} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java new file mode 100644 index 00000000..60646cfb --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java @@ -0,0 +1,616 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; + +import static com.datastax.oss.driver.api.querybuilder.QueryBuilder.bindMarker; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatExceptionOfType; +import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; +import com.ericsson.bss.cassandra.ecchronos.core.impl.AbstractCassandraContainerTest; +import com.ericsson.bss.cassandra.ecchronos.core.impl.utils.ConsistencyType; +import com.ericsson.bss.cassandra.ecchronos.core.locks.HostStates; +import com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import javax.management.AttributeNotFoundException; +import javax.management.InstanceNotFoundException; +import javax.management.MBeanException; +import javax.management.MalformedObjectNameException; +import javax.management.ReflectionException; +import com.codahale.metrics.Gauge; +import com.codahale.metrics.Metric; +import com.datastax.oss.driver.api.core.AllNodesFailedException; +import com.datastax.oss.driver.api.core.ConsistencyLevel; +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.cql.PreparedStatement; +import com.datastax.oss.driver.api.core.cql.ResultSet; +import com.datastax.oss.driver.api.core.cql.Row; +import com.datastax.oss.driver.api.core.cql.SimpleStatement; +import com.datastax.oss.driver.api.core.cql.Statement; +import com.datastax.oss.driver.api.core.metadata.Node; +import com.datastax.oss.driver.api.core.metrics.DefaultNodeMetric; +import com.datastax.oss.driver.api.core.metrics.Metrics; +import com.datastax.oss.driver.api.querybuilder.QueryBuilder; +import com.datastax.oss.driver.internal.core.context.InternalDriverContext; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import com.ericsson.bss.cassandra.ecchronos.core.exceptions.LockException; + +import net.jcip.annotations.NotThreadSafe; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@NotThreadSafe +@RunWith (Parameterized.class) +public class TestCASLockFactory extends AbstractCassandraContainerTest +{ + @Parameterized.Parameters + public static Collection keyspaceNames() + { + return Arrays.asList("ecchronos", "anotherkeyspace"); + } + + private static final String TABLE_LOCK = "lock"; + private static final String TABLE_LOCK_PRIORITY = "lock_priority"; + + private static final String DATA_CENTER = "DC1"; + private static CASLockFactory myLockFactory; + private static PreparedStatement myLockStatement; + private static PreparedStatement myRemoveLockStatement; + private static PreparedStatement myCompeteStatement; + private static PreparedStatement myGetPrioritiesStatement; + + private static HostStates hostStates; + + @Parameterized.Parameter + public String myKeyspaceName; + + @Before + public void startup() + { + mySession.execute(String.format( + "CREATE KEYSPACE IF NOT EXISTS %s WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': 1}", myKeyspaceName)); + mySession.execute(String.format( + "CREATE TABLE IF NOT EXISTS %s.lock (resource text, node uuid, metadata map, PRIMARY KEY(resource)) WITH default_time_to_live = 600 AND gc_grace_seconds = 0", + myKeyspaceName)); + mySession.execute(String.format( + "CREATE TABLE IF NOT EXISTS %s.lock_priority (resource text, node uuid, priority int, PRIMARY KEY(resource, node)) WITH default_time_to_live = 600 AND gc_grace_seconds = 0", + myKeyspaceName)); + + hostStates = mock(HostStates.class); + when(hostStates.isUp(any(Node.class))).thenReturn(true); + Node node = mock(Node.class); + when(node.getHostId()).thenReturn(UUID.randomUUID()); + myLockFactory = new CASLockFactoryBuilder() + .withNativeConnectionProvider(getNativeConnectionProvider()) + .withHostStates(hostStates) + .withStatementDecorator(s -> s) + .withKeyspaceName(myKeyspaceName) + .withNode(node) + .build(); + + myLockStatement = mySession.prepare(QueryBuilder.insertInto(myKeyspaceName, TABLE_LOCK) + .value("resource", bindMarker()) + .value("node", bindMarker()) + .value("metadata", bindMarker()) + .ifNotExists() + .build() + .setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM) + .setSerialConsistencyLevel(ConsistencyLevel.LOCAL_SERIAL)); + myRemoveLockStatement = + mySession.prepare(String.format("DELETE FROM %s.%s WHERE resource=? IF EXISTS", myKeyspaceName, TABLE_LOCK)); + myCompeteStatement = mySession.prepare( + String.format("INSERT INTO %s.%s (resource, node, priority) VALUES (?, ?, ?)", myKeyspaceName, TABLE_LOCK_PRIORITY)); + myGetPrioritiesStatement = + mySession.prepare(String.format("SELECT * FROM %s.%s WHERE resource=?", myKeyspaceName, TABLE_LOCK_PRIORITY)); + } + + @After + public void testCleanup() + { + execute(SimpleStatement.newInstance( + String.format("DELETE FROM %s.%s WHERE resource='%s'", myKeyspaceName, TABLE_LOCK_PRIORITY, "lock"))); + execute(myRemoveLockStatement.bind("lock")); + myLockFactory.close(); + } + + @Test + public void testGetDefaultTimeToLiveFromLockTable() throws LockException + { + String alterLockTable = String.format("ALTER TABLE %s.%s WITH default_time_to_live = 1200;", myKeyspaceName, TABLE_LOCK); + mySession.execute(alterLockTable); + Node node = mock(Node.class); + when(node.getHostId()).thenReturn(UUID.randomUUID()); + myLockFactory = new CASLockFactoryBuilder() + .withNativeConnectionProvider(getNativeConnectionProvider()) + .withHostStates(hostStates) + .withStatementDecorator(s -> s) + .withKeyspaceName(myKeyspaceName) + .withNode(node) + .build(); + assertThat(myLockFactory.getCasLockFactoryCacheContext().getFailedLockRetryAttempts()).isEqualTo(9); + assertThat(myLockFactory.getCasLockFactoryCacheContext().getLockUpdateTimeInSeconds()).isEqualTo(120); + } + + @Test + public void testGetLock() throws LockException + { + try (LockFactory.DistributedLock lock = myLockFactory.tryLock(DATA_CENTER, "lock", 1, new HashMap())) + { + } + + assertPriorityListEmpty("lock"); + assertThat(myLockFactory.getCachedFailure(DATA_CENTER, "lock")).isEmpty(); + } + + @Test + public void testGetGlobalLock() throws LockException + { + try (LockFactory.DistributedLock lock = myLockFactory.tryLock(null, "lock", 1, new HashMap())) + { + } + assertPriorityListEmpty("lock"); + assertThat(myLockFactory.getCachedFailure(null, "lock")).isEmpty(); + } + + @Test + public void testGlobalLockTakenThrowsException() + { + execute(myLockStatement.bind("lock", UUID.randomUUID(), new HashMap<>())); + + assertThatExceptionOfType(LockException.class).isThrownBy(() -> myLockFactory.tryLock(null, "lock", 1, new HashMap<>())); + assertPrioritiesInList("lock", 1); + assertThat(myLockFactory.getCachedFailure(null, "lock")).isNotEmpty(); + } + + @Test + public void testGlobalLockTakenIsCachedOnSecondTry() throws AttributeNotFoundException, + InstanceNotFoundException, + MalformedObjectNameException, + MBeanException, + ReflectionException, + UnsupportedOperationException, + IOException, + InterruptedException + { + execute(myLockStatement.bind("lock", UUID.randomUUID(), new HashMap<>())); + InternalDriverContext driverContext = (InternalDriverContext) mySession.getContext(); + //Check that no in-flight queries exist, we want all previous queries to complete before we proceed + Optional connectedNode = driverContext.getPoolManager().getPools().keySet().stream().findFirst(); + while (getInFlightQueries(connectedNode.get()) != 0) + { + Thread.sleep(100); + } + long expectedLockReadCount = getReadCount(TABLE_LOCK) + 2; // We do a read due to CAS and execCommandOnContainer + long expectedLockWriteCount = getWriteCount(TABLE_LOCK) + 1; // No writes as the lock is already held + long expectedLockPriorityReadCount = getReadCount(TABLE_LOCK_PRIORITY) + 2; // We read the priorities + long expectedLockPriorityWriteCount = getWriteCount(TABLE_LOCK_PRIORITY) + 1; // We update our local priority once + + assertThatExceptionOfType(LockException.class).isThrownBy(() -> myLockFactory.tryLock(null, "lock", 2, new HashMap<>())); + assertThatExceptionOfType(LockException.class).isThrownBy(() -> myLockFactory.tryLock(null, "lock", 1, new HashMap<>())); + + assertThat(getReadCount(TABLE_LOCK_PRIORITY)).isEqualTo(expectedLockPriorityReadCount); + assertThat(getWriteCount(TABLE_LOCK_PRIORITY)).isEqualTo(expectedLockPriorityWriteCount); + + assertThat(getReadCount(TABLE_LOCK)).isEqualTo(expectedLockReadCount); + assertThat(getWriteCount(TABLE_LOCK)).isEqualTo(expectedLockWriteCount); + assertPrioritiesInList("lock", 2); + assertThat(myLockFactory.getCachedFailure(null, "lock")).isNotEmpty(); + } + + private int getInFlightQueries(Node node) + { + int inFlightQueries = 0; + Optional metrics = mySession.getMetrics(); + if (metrics.isPresent()) + { + Optional inFlight = metrics.get().getNodeMetric(node, DefaultNodeMetric.IN_FLIGHT); + if (inFlight.isPresent()) + { + inFlightQueries = (int) ((Gauge) inFlight.get()).getValue(); + } + } + return inFlightQueries; + } + + @Test + public void testGetLockWithLowerPriority() + { + execute(myCompeteStatement.bind("lock", UUID.randomUUID(), 2)); + + assertThatExceptionOfType(LockException.class).isThrownBy(() -> myLockFactory.tryLock(DATA_CENTER, "lock", 1, new HashMap<>())); + assertPrioritiesInList("lock", 1, 2); + assertThat(myLockFactory.getCachedFailure(DATA_CENTER, "lock")).isNotEmpty(); + } + + @Test + public void testGetAlreadyTakenLock() + { + execute(myLockStatement.bind("lock", UUID.randomUUID(), new HashMap<>())); + + assertThatExceptionOfType(LockException.class).isThrownBy(() -> myLockFactory.tryLock(DATA_CENTER, "lock", 1, new HashMap<>())); + assertPrioritiesInList("lock", 1); + assertThat(myLockFactory.getCachedFailure(DATA_CENTER, "lock")).isNotEmpty(); + } + + @Test + public void testGetLockWithLocallyHigherPriority() throws LockException + { + UUID localHostId = getNativeConnectionProvider().getNodes().get(0).getHostId(); + execute(myCompeteStatement.bind("lock", localHostId, 2)); + CASLockFactory lockFactory = new CASLockFactoryBuilder() + .withNativeConnectionProvider(getNativeConnectionProvider()) + .withHostStates(hostStates) + .withStatementDecorator(s -> s) + .withKeyspaceName(myKeyspaceName) + .withNode(getNativeConnectionProvider().getNodes().get(0)) + .build(); + + try (LockFactory.DistributedLock lock = lockFactory.tryLock(DATA_CENTER, "lock", 1, new HashMap<>())) + { + } + + assertPrioritiesInList("lock", 2); + assertThat(myLockFactory.getCachedFailure(DATA_CENTER, "lock")).isEmpty(); + } + + @Test + public void testGetLockWithLocallyLowerPriority() throws LockException + { + UUID localHostId = getNativeConnectionProvider().getNodes().get(0).getHostId(); + execute(myCompeteStatement.bind("lock", localHostId, 1)); + CASLockFactory lockFactory = new CASLockFactoryBuilder() + .withNativeConnectionProvider(getNativeConnectionProvider()) + .withHostStates(hostStates) + .withStatementDecorator(s -> s) + .withKeyspaceName(myKeyspaceName) + .withNode(getNativeConnectionProvider().getNodes().get(0)) + .build(); + try (LockFactory.DistributedLock lock = lockFactory.tryLock(DATA_CENTER, "lock", 2, new HashMap<>())) + { + } + + assertPriorityListEmpty("lock"); + assertThat(lockFactory.getCachedFailure(DATA_CENTER, "lock")).isEmpty(); + } + + @Test + public void testReadMetadata() throws LockException + { + Map expectedMetadata = new HashMap<>(); + expectedMetadata.put("data", "something"); + + try (LockFactory.DistributedLock lock = myLockFactory.tryLock(DATA_CENTER, "lock", 1, expectedMetadata)) + { + Map actualMetadata = myLockFactory.getLockMetadata(DATA_CENTER, "lock"); + + assertThat(actualMetadata).isEqualTo(expectedMetadata); + } + + assertPriorityListEmpty("lock"); + assertThat(myLockFactory.getCachedFailure(DATA_CENTER, "lock")).isEmpty(); + } + + @Test + public void testInterruptCasLockUpdate() throws InterruptedException + { + Map metadata = new HashMap<>(); + + ExecutorService executorService = Executors.newFixedThreadPool(1); + + try + { + Future future = executorService.submit( + new CASLock( + DATA_CENTER, + "lock", + 1, + metadata, + myLockFactory.getHostId(), + myLockFactory.getCasLockStatement())); + + Thread.sleep(100); + + future.cancel(true); + + executorService.shutdown(); + assertThat(executorService.awaitTermination(1, TimeUnit.SECONDS)).isTrue(); + } + finally + { + if (!executorService.isShutdown()) + { + executorService.shutdownNow(); + } + } + } + + @Test + public void testFailedLockRetryAttempts() + { + Map metadata = new HashMap<>(); + try (CASLock lockUpdateTask = new CASLock( + DATA_CENTER, + "lock", + 1, + metadata, + myLockFactory.getHostId(), + myLockFactory.getCasLockStatement())) + { + for (int i = 0; i < 10; i++) + { + lockUpdateTask.run(); + assertThat(lockUpdateTask.getFailedAttempts()).isEqualTo(i + 1); + } + + execute(myLockStatement.bind("lock", myLockFactory.getHostId(), new HashMap<>())); + lockUpdateTask.run(); + assertThat(lockUpdateTask.getFailedAttempts()).isEqualTo(0); + } + + assertThat(myLockFactory.getCachedFailure(DATA_CENTER, "lock")).isEmpty(); + } + + @Test + public void testActivateWithoutKeyspaceCausesIllegalStateException() + { + mySession.execute(String.format("DROP KEYSPACE %s", myKeyspaceName)); + + assertThatExceptionOfType(IllegalStateException.class) + .isThrownBy(() -> new CASLockFactoryBuilder() + .withNativeConnectionProvider(getNativeConnectionProvider()) + .withHostStates(hostStates) + .withStatementDecorator(s -> s) + .withKeyspaceName(myKeyspaceName) + .build()); + + mySession.execute(String.format( + "CREATE KEYSPACE IF NOT EXISTS %s WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': 1}", myKeyspaceName)); + mySession.execute(String.format( + "CREATE TABLE IF NOT EXISTS %s.%s (resource text, node uuid, metadata map, PRIMARY KEY(resource)) WITH default_time_to_live = 600 AND gc_grace_seconds = 0", + myKeyspaceName, TABLE_LOCK)); + mySession.execute(String.format( + "CREATE TABLE IF NOT EXISTS %s.%s (resource text, node uuid, priority int, PRIMARY KEY(resource, node)) WITH default_time_to_live = 600 AND gc_grace_seconds = 0", + myKeyspaceName, TABLE_LOCK_PRIORITY)); + } + + @Test + public void testActivateWithoutLockTableCausesIllegalStateException() + { + mySession.execute(String.format("DROP TABLE %s.%s", myKeyspaceName, TABLE_LOCK)); + + assertThatExceptionOfType(IllegalStateException.class) + .isThrownBy(() -> new CASLockFactoryBuilder() + .withNativeConnectionProvider(getNativeConnectionProvider()) + .withHostStates(hostStates) + .withStatementDecorator(s -> s) + .withKeyspaceName(myKeyspaceName) + .build()); + + mySession.execute(String.format( + "CREATE TABLE IF NOT EXISTS %s.%s (resource text, node uuid, metadata map, PRIMARY KEY(resource)) WITH default_time_to_live = 600 AND gc_grace_seconds = 0", + myKeyspaceName, TABLE_LOCK)); + } + + @Test + public void testActivateWithoutLockPriorityTableCausesIllegalStateException() + { + mySession.execute(String.format("DROP TABLE %s.%s", myKeyspaceName, TABLE_LOCK_PRIORITY)); + + assertThatExceptionOfType(IllegalStateException.class) + .isThrownBy(() -> new CASLockFactoryBuilder() + .withNativeConnectionProvider(getNativeConnectionProvider()) + .withHostStates(hostStates) + .withStatementDecorator(s -> s) + .withKeyspaceName(myKeyspaceName) + .build()); + + mySession.execute(String.format( + "CREATE TABLE IF NOT EXISTS %s.%s (resource text, node uuid, priority int, PRIMARY KEY(resource, node)) WITH default_time_to_live = 600 AND gc_grace_seconds = 0", + myKeyspaceName, TABLE_LOCK_PRIORITY)); + } + + @Test + public void testActivateWithoutCassandraCausesIllegalStateException() + { + // mock + CqlSession session = mock(CqlSession.class); + + doThrow(AllNodesFailedException.class).when(session).getMetadata(); + + // test + assertThatExceptionOfType(AllNodesFailedException.class) + .isThrownBy(() -> new CASLockFactoryBuilder() + .withNativeConnectionProvider(new DistributedNativeConnectionProvider() + { + @Override + public CqlSession getCqlSession() + { + return session; + } + + @Override + public List getNodes() + { + return null; + } + + @Override + public void addNode(Node myNode) + { + } + + @Override + public void removeNode(Node myNode) + { + } + + @Override + public Boolean confirmNodeValid(Node node) + { + return false; + } + }) + .withHostStates(hostStates) + .withStatementDecorator(s -> s) + .withKeyspaceName(myKeyspaceName) + .build()); + } + + @Test + public void testLocalSerialConsistency() + { + DistributedNativeConnectionProvider connectionProviderMock = mock(DistributedNativeConnectionProvider.class); + Node nodeMock = mock(Node.class); + when(nodeMock.getHostId()).thenReturn(UUID.randomUUID()); + when(connectionProviderMock.getCqlSession()).thenReturn(mySession); + when(connectionProviderMock.getNodes()).thenReturn(Arrays.asList(nodeMock)); + + myLockFactory = new CASLockFactoryBuilder() + .withNativeConnectionProvider(connectionProviderMock) + .withHostStates(hostStates) + .withStatementDecorator(s -> s) + .withKeyspaceName(myKeyspaceName) + .withConsistencySerial(ConsistencyType.LOCAL) + .withNode(nodeMock) + .build(); + + assertEquals(ConsistencyLevel.LOCAL_SERIAL, myLockFactory.getSerialConsistencyLevel()); + } + + @Test + public void testSerialConsistency() + { + DistributedNativeConnectionProvider connectionProviderMock = mock(DistributedNativeConnectionProvider.class); + Node nodeMock = mock(Node.class); + when(nodeMock.getHostId()).thenReturn(UUID.randomUUID()); + when(connectionProviderMock.getCqlSession()).thenReturn(mySession); + when(connectionProviderMock.getNodes()).thenReturn(Arrays.asList(nodeMock)); + + myLockFactory = new CASLockFactoryBuilder() + .withNativeConnectionProvider(connectionProviderMock) + .withHostStates(hostStates) + .withStatementDecorator(s -> s) + .withKeyspaceName(myKeyspaceName) + .withConsistencySerial(ConsistencyType.SERIAL) + .withNode(nodeMock) + .build(); + + assertEquals(ConsistencyLevel.SERIAL, myLockFactory.getSerialConsistencyLevel()); + } + + private void assertPriorityListEmpty(String resource) + { + assertThat(getPriorities(resource)).isEmpty(); + } + + private void assertPrioritiesInList(String resource, Integer... priorities) + { + assertThat(getPriorities(resource)).containsExactlyInAnyOrder(priorities); + } + + private Set getPriorities(String resource) + { + ResultSet resultSet = execute(myGetPrioritiesStatement.bind(resource)); + List rows = resultSet.all(); + + return rows.stream().map(r -> r.getInt("priority")).collect(Collectors.toSet()); + } + + private ResultSet execute(Statement statement) + { + return mySession.execute(statement); + } + + private long getReadCount(String tableName) throws AttributeNotFoundException, + InstanceNotFoundException, + MBeanException, + ReflectionException, + IOException, + MalformedObjectNameException, + UnsupportedOperationException, + InterruptedException + { + return getReadCountFromTableStats(tableName); + } + + private long getWriteCount(String tableName) throws AttributeNotFoundException, + InstanceNotFoundException, + MBeanException, + ReflectionException, + IOException, + MalformedObjectNameException, + UnsupportedOperationException, + InterruptedException + { + return getWriteCountFromTableStats(tableName); + } + + private long getReadCountFromTableStats(String tableName) throws UnsupportedOperationException, IOException, InterruptedException + { + String tableStatsOutput = + getContainerNode().execInContainer("nodetool", "tablestats", myKeyspaceName + "." + tableName).getStdout(); + long readCount = 0; + Pattern readCountPattern = Pattern.compile("Read Count:\\s+(\\d+)"); + Matcher readCountMatcher = readCountPattern.matcher(tableStatsOutput); + + if (readCountMatcher.find()) + { + readCount = Long.parseLong(readCountMatcher.group(1)); + } + + return readCount; + } + + private long getWriteCountFromTableStats(String tableName) throws UnsupportedOperationException, IOException, InterruptedException + { + String tableStatsOutput = + getContainerNode().execInContainer("nodetool", "tablestats", myKeyspaceName + "." + tableName).getStdout(); + long writeCount = 0; + Pattern writeCountPattern = Pattern.compile("Write Count:\\s+(\\d+)"); + Matcher writeCountMatcher = writeCountPattern.matcher(tableStatsOutput); + + if (writeCountMatcher.find()) + { + writeCount = Long.parseLong(writeCountMatcher.group(1)); + } + + return writeCount; + } + +} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCache.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCache.java new file mode 100644 index 00000000..9aad10bb --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCache.java @@ -0,0 +1,165 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; + +import com.ericsson.bss.cassandra.ecchronos.core.exceptions.LockException; +import com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory.DistributedLock; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +@RunWith(MockitoJUnitRunner.class) +public class TestLockCache +{ + private static final String DATA_CENTER = "DC1"; + private static final String RESOURCE = "RepairResource-91e32362-7af4-11e9-8f9e-2a86e4085a59-1"; + private static final int PRIORITY = 1; + private static final Map METADATA = new HashMap<>(); + + @Mock + private LockCache.LockSupplier mockedLockSupplier; + + private LockCache myLockCache; + + @Before + public void setup() + { + myLockCache = new LockCache(mockedLockSupplier, 30L); + } + + @Test + public void testGetLock() throws LockException + { + DistributedLock expectedLock = doReturnLockOnGetLock(); + + assertGetLockRetrievesExpectedLock(expectedLock); + } + + @Test + public void testGetThrowingLockIsCached() throws LockException + { + LockException expectedExcetion = doThrowOnGetLock(); + + assertGetLockThrowsException(expectedExcetion); + + // Reset return type, locking should still throw + doReturnLockOnGetLock(); + + assertGetLockThrowsException(expectedExcetion); + } + + @Test + public void testGetMultipleLocks() throws LockException + { + String otherResource = "RepairResource-b2e33e60-7af6-11e9-8f9e-2a86e4085a59-1"; + + DistributedLock expectedLock = doReturnLockOnGetLock(RESOURCE); + DistributedLock expectedOtherLock = doReturnLockOnGetLock(otherResource); + + assertGetLockRetrievesExpectedLock(RESOURCE, expectedLock); + assertGetLockRetrievesExpectedLock(otherResource, expectedOtherLock); + } + + @Test + public void testGetOtherLockAfterThrowingOnAnotherResource() throws LockException + { + String otherResource = "RepairResource-b2e33e60-7af6-11e9-8f9e-2a86e4085a59-1"; + + LockException expectedException = doThrowOnGetLock(RESOURCE); + DistributedLock expectedOtherLock = doReturnLockOnGetLock(otherResource); + + assertGetLockThrowsException(RESOURCE, expectedException); + assertGetLockRetrievesExpectedLock(otherResource, expectedOtherLock); + } + + @Test + public void testGetLockAfterCachedExceptionHasExpired() throws LockException, InterruptedException + { + myLockCache = new LockCache(mockedLockSupplier, 20, TimeUnit.MILLISECONDS); + + LockException expectedException = doThrowOnGetLock(); + assertGetLockThrowsException(expectedException); + + Thread.sleep(20); + + DistributedLock expectedLock = doReturnLockOnGetLock(); + assertGetLockRetrievesExpectedLock(expectedLock); + } + + @Test + public void testEqualsContract() + { + EqualsVerifier.forClass(LockCache.LockKey.class).usingGetClass().verify(); + } + + private void assertGetLockRetrievesExpectedLock(DistributedLock expectedLock) throws LockException + { + assertGetLockRetrievesExpectedLock(RESOURCE, expectedLock); + } + + private void assertGetLockRetrievesExpectedLock(String resource, DistributedLock expectedLock) throws LockException + { + assertThat(myLockCache.getLock(DATA_CENTER, resource, PRIORITY, METADATA)).isSameAs(expectedLock); + assertThat(myLockCache.getCachedFailure(DATA_CENTER, resource)).isEmpty(); + } + + private void assertGetLockThrowsException(LockException expectedException) + { + assertGetLockThrowsException(RESOURCE, expectedException); + } + + private void assertGetLockThrowsException(String resource, LockException expectedException) + { + assertThatThrownBy(() -> myLockCache.getLock(DATA_CENTER, resource, PRIORITY, METADATA)).isSameAs(expectedException); + assertThat(myLockCache.getCachedFailure(DATA_CENTER, resource)).isNotEmpty(); + } + + private DistributedLock doReturnLockOnGetLock() throws LockException + { + return doReturnLockOnGetLock(RESOURCE); + } + + private DistributedLock doReturnLockOnGetLock(String resource) throws LockException + { + DistributedLock expectedLock = mock(DistributedLock.class); + when(mockedLockSupplier.getLock(eq(DATA_CENTER), eq(resource), eq(PRIORITY), eq(METADATA))).thenReturn(expectedLock); + return expectedLock; + } + + private LockException doThrowOnGetLock() throws LockException + { + return doThrowOnGetLock(RESOURCE); + } + + private LockException doThrowOnGetLock(String resource) throws LockException + { + LockException expectedException = new LockException(""); + when(mockedLockSupplier.getLock(eq(DATA_CENTER), eq(resource), eq(PRIORITY), eq(METADATA))).thenThrow(expectedException); + return expectedException; + } +} \ No newline at end of file diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCollection.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCollection.java new file mode 100644 index 00000000..160096b4 --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCollection.java @@ -0,0 +1,91 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; + +import static org.assertj.core.api.Assertions.assertThat; +import com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory; +import java.util.ArrayList; +import java.util.List; +import org.junit.Test; + +import com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory.DistributedLock; + +public class TestLockCollection +{ + + @Test + public void testCloseAllLocks() + { + List locks = new ArrayList<>(); + for (int i = 0; i < 10; i++) + { + locks.add(new DummyLock()); + } + + new LockCollection(locks).close(); + + for (DummyLock lock : locks) + { + assertThat(lock.closed).isTrue(); + } + } + + @Test + public void testCloseAllLocksOneThrowing() + { + List locks = new ArrayList<>(); + for (int i = 0; i < 4; i++) + { + locks.add(new DummyLock()); + } + + locks.add(new ThrowingLock()); + + for (int i = 0; i < 5; i++) + { + locks.add(new DummyLock()); + } + + new LockCollection(locks).close(); + + for (DistributedLock lock : locks) + { + if (lock instanceof DummyLock) + { + assertThat(((DummyLock) lock).closed).isTrue(); + } + } + } + + private class ThrowingLock implements DistributedLock + { + @Override + public void close() + { + throw new IllegalStateException(); + } + } +} + +class DummyLock implements LockFactory.DistributedLock +{ + public volatile boolean closed = false; + + @Override + public void close() + { + closed = true; + } +} From 888c138af3da3169c6284b2813d3766acf0ada74 Mon Sep 17 00:00:00 2001 From: sajid riaz Date: Mon, 4 Nov 2024 15:31:41 +0100 Subject: [PATCH 4/8] Cassandra based distributed locking mechanism # 741 - Cassandra tables called lock and lock_priority, to manage task execution and synchronization across multiple nodes. --- .../core/impl/locks/HostStatesImpl.java | 2 +- .../ecchronos/core/locks/DriverNode.java | 96 ------------------- .../ecchronos/core/locks/HostStates.java | 1 + 3 files changed, 2 insertions(+), 97 deletions(-) delete mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/DriverNode.java diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/HostStatesImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/HostStatesImpl.java index 90d652a3..0308eb41 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/HostStatesImpl.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/HostStatesImpl.java @@ -19,8 +19,8 @@ import com.ericsson.bss.cassandra.ecchronos.core.impl.logging.ThrottlingLogger; import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxy; import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; -import com.ericsson.bss.cassandra.ecchronos.core.locks.DriverNode; import com.ericsson.bss.cassandra.ecchronos.core.locks.HostStates; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; import java.io.Closeable; import java.io.IOException; import java.net.InetAddress; diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/DriverNode.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/DriverNode.java deleted file mode 100644 index 4cde5e3c..00000000 --- a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/DriverNode.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright 2020 Telefonaktiebolaget LM Ericsson - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.ericsson.bss.cassandra.ecchronos.core.locks; - -import com.datastax.oss.driver.api.core.metadata.Node; - -import java.net.InetAddress; -import java.util.Objects; -import java.util.UUID; - -/** - * An internal representation of a node. - * This class together with {@link com.ericsson.bss.cassandra.ecchronos.core.utils.NodeResolver} makes it easier to - * translate node IP to host ID and other way around. - */ -public class DriverNode -{ - private final Node node; - - public DriverNode(final Node aNode) - { - this.node = aNode; - } - - /** - * Get the host id of the node. - * - * @return The host id of the node. - */ - public UUID getId() - { - return node.getHostId(); - } - - /** - * Get the public ip address of the node. - * - * @return The public ip address of the node. - */ - public InetAddress getPublicAddress() - { - return node.getBroadcastAddress().get().getAddress(); - } - - /** - * Get the datacenter the node resides in. - * - * @return The datacenter of the node. - */ - public String getDatacenter() - { - return node.getDatacenter(); - } - - /** - * Check for equality. - */ - @Override - public boolean equals(final Object o) - { - if (this == o) - { - return true; - } - if (o == null || getClass() != o.getClass()) - { - return false; - } - DriverNode that = (DriverNode) o; - return node.equals(that.node); - } - - @Override - public final int hashCode() - { - return Objects.hash(node); - } - - @Override - public final String toString() - { - return String.format("Node(%s:%s:%s)", getId(), getDatacenter(), getPublicAddress()); - } -} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/HostStates.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/HostStates.java index 28756485..ed3626c4 100644 --- a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/HostStates.java +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/HostStates.java @@ -14,6 +14,7 @@ */ package com.ericsson.bss.cassandra.ecchronos.core.locks; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; import java.net.InetAddress; import com.datastax.oss.driver.api.core.metadata.Node; From bdeaff760ef72ac2b325024ad6a532eb08cc1476 Mon Sep 17 00:00:00 2001 From: sajid riaz Date: Mon, 4 Nov 2024 15:31:41 +0100 Subject: [PATCH 5/8] Cassandra based distributed locking mechanism # 741 - Cassandra tables called lock and lock_priority, to manage task execution and synchronization across multiple nodes. --- .../config/connection/AgentConnectionConfig.java | 11 ----------- application/src/main/resources/ecc.yml | 7 ------- .../ecchronos/application/config/TestConfig.java | 6 ------ application/src/test/resources/all_set.yml | 1 - .../impl/builders/DistributedNativeBuilder.java | 1 - .../DistributedNativeConnectionProvider.java | 1 - .../cassandra/ecchronos/core/impl/locks/CASLock.java | 2 +- .../ecchronos/core/impl/locks/CASLockFactory.java | 2 +- .../ecchronos/core/impl/locks/CASLockProperties.java | 1 - .../ecchronos/core/impl/locks/CASLockStatement.java | 1 - .../ecchronos/core/impl/locks/LockCache.java | 2 +- .../ecchronos/core/impl/locks/TestCASLockFactory.java | 2 +- .../ecchronos/core/impl/locks/TestLockCache.java | 2 +- .../cassandra/ecchronos/core/locks/LockFactory.java | 2 +- .../core/{exceptions => utils}/LockException.java | 2 +- .../core/{exceptions => utils}/package-info.java | 5 ++--- 16 files changed, 9 insertions(+), 39 deletions(-) rename core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/{exceptions => utils}/LockException.java (94%) rename core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/{exceptions => utils}/package-info.java (85%) diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/AgentConnectionConfig.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/AgentConnectionConfig.java index 2234f289..c99ffe6c 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/AgentConnectionConfig.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/AgentConnectionConfig.java @@ -48,17 +48,6 @@ public AgentConnectionConfig() } - @JsonProperty("remoteRouting") - public boolean getRemoteRouting() - { - return myRemoteRouting; - } - - @JsonProperty("remoteRouting") - public void setRemoteRouting(final boolean remoteRouting) - { - myRemoteRouting = remoteRouting; - } /** * Gets unique ecchronos instance name. * diff --git a/application/src/main/resources/ecc.yml b/application/src/main/resources/ecc.yml index 33751116..ed88f3cb 100644 --- a/application/src/main/resources/ecc.yml +++ b/application/src/main/resources/ecc.yml @@ -28,13 +28,6 @@ connection: ## (instanceName: unique identifier), that will be used ## as ecchronos_id (partition key in nodes_sync table). instanceName: unique_identifier - ## - ## Allow routing requests directly to a remote datacenter. - ## This allows locks for other datacenters to be taken in that datacenter instead of via the local datacenter. - ## If clients are prevented from connecting directly to Cassandra nodes in other sites this is not possible. - ## If remote routing is disabled, instead SERIAL consistency will be used for those request. - ## - remoteRouting: true ## Define the Agent strategy, it can be ## - datacenterAware; ## - rackAware; and diff --git a/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestConfig.java b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestConfig.java index baf9e9ad..8089448c 100644 --- a/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestConfig.java +++ b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestConfig.java @@ -317,11 +317,5 @@ public void testInstanceName() { assertThat(nativeConnection.getAgentConnectionConfig().getInstanceName()).isEqualTo("unique_identifier"); } - - @Test - public void testRemoteRouting() - { - assertThat(nativeConnection.getAgentConnectionConfig().getRemoteRouting()).isEqualTo(false); - } } diff --git a/application/src/test/resources/all_set.yml b/application/src/test/resources/all_set.yml index b5a7977c..5d18c36d 100644 --- a/application/src/test/resources/all_set.yml +++ b/application/src/test/resources/all_set.yml @@ -42,7 +42,6 @@ connection: port: 9042 - host: 127.0.0.4 port: 9042 - remoteRouting: false provider: com.ericsson.bss.cassandra.ecchronos.application.providers.AgentNativeConnectionProvider connectionDelay: time: 45 diff --git a/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/builders/DistributedNativeBuilder.java b/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/builders/DistributedNativeBuilder.java index b858addd..dbf78ef5 100644 --- a/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/builders/DistributedNativeBuilder.java +++ b/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/builders/DistributedNativeBuilder.java @@ -74,7 +74,6 @@ public class DistributedNativeBuilder private SslEngineFactory mySslEngineFactory = null; private SchemaChangeListener mySchemaChangeListener = null; private NodeStateListener myNodeStateListener = null; - private boolean myRemoteRouting; /** * Sets the initial contact points for the distributed native connection. diff --git a/connection/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/DistributedNativeConnectionProvider.java b/connection/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/DistributedNativeConnectionProvider.java index 6dde865e..34c1e245 100644 --- a/connection/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/DistributedNativeConnectionProvider.java +++ b/connection/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/DistributedNativeConnectionProvider.java @@ -33,7 +33,6 @@ default void close() throws IOException { } void addNode(Node myNode); - void removeNode(Node myNode); Boolean confirmNodeValid(Node node); } diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLock.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLock.java index b76880ec..6a1cb0b8 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLock.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLock.java @@ -30,7 +30,7 @@ import com.datastax.oss.driver.api.core.cql.ResultSet; import com.datastax.oss.driver.api.core.cql.Row; -import com.ericsson.bss.cassandra.ecchronos.core.exceptions.LockException; +import com.ericsson.bss.cassandra.ecchronos.core.utils.LockException; /** * Represents a container for builder configurations and state for the CASLock. diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java index 1540fac5..9a5c1948 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java @@ -23,7 +23,7 @@ import com.datastax.oss.driver.api.core.metadata.TokenMap; import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; -import com.ericsson.bss.cassandra.ecchronos.core.exceptions.LockException; +import com.ericsson.bss.cassandra.ecchronos.core.utils.LockException; import com.ericsson.bss.cassandra.ecchronos.core.locks.HostStates; import com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory; import com.google.common.annotations.VisibleForTesting; diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java index 5adebf2f..8efe5aa9 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java @@ -49,7 +49,6 @@ public class CASLockProperties public final ConsistencyLevel defineSerialConsistencyLevel(final ConsistencyType consistencyType) { ConsistencyLevel serialConsistencyLevel; - serialConsistencyLevel = ConsistencyType.LOCAL.equals(consistencyType) ? ConsistencyLevel.LOCAL_SERIAL : ConsistencyLevel.SERIAL; diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockStatement.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockStatement.java index e9b2d18a..b4bbd52b 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockStatement.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockStatement.java @@ -221,5 +221,4 @@ public final CASLockProperties getCasLockProperties() { return myCasLockProperties; } - } diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCache.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCache.java index 73751788..e34f6fc0 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCache.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCache.java @@ -16,7 +16,7 @@ import static com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory.DistributedLock; -import com.ericsson.bss.cassandra.ecchronos.core.exceptions.LockException; +import com.ericsson.bss.cassandra.ecchronos.core.utils.LockException; import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; import org.slf4j.Logger; diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java index 60646cfb..1cbc449f 100644 --- a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java @@ -69,7 +69,7 @@ import org.junit.Before; import org.junit.Test; -import com.ericsson.bss.cassandra.ecchronos.core.exceptions.LockException; +import com.ericsson.bss.cassandra.ecchronos.core.utils.LockException; import net.jcip.annotations.NotThreadSafe; import org.junit.runner.RunWith; diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCache.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCache.java index 9aad10bb..112365ea 100644 --- a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCache.java +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCache.java @@ -14,7 +14,7 @@ */ package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; -import com.ericsson.bss.cassandra.ecchronos.core.exceptions.LockException; +import com.ericsson.bss.cassandra.ecchronos.core.utils.LockException; import com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory.DistributedLock; import nl.jqno.equalsverifier.EqualsVerifier; import org.junit.Before; diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/LockFactory.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/LockFactory.java index cfc7eaea..c6b09c3b 100644 --- a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/LockFactory.java +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/LockFactory.java @@ -18,7 +18,7 @@ import java.util.Map; import java.util.Optional; -import com.ericsson.bss.cassandra.ecchronos.core.exceptions.LockException; +import com.ericsson.bss.cassandra.ecchronos.core.utils.LockException; /** * Interface for distributed lock factories. diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/LockException.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils/LockException.java similarity index 94% rename from core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/LockException.java rename to core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils/LockException.java index 3a7e4d77..bae8ce59 100644 --- a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/LockException.java +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils/LockException.java @@ -12,7 +12,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.ericsson.bss.cassandra.ecchronos.core.exceptions; +package com.ericsson.bss.cassandra.ecchronos.core.utils; /** * Exception thrown when a lock factory is unable to get a lock. diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/package-info.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils/package-info.java similarity index 85% rename from core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/package-info.java rename to core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils/package-info.java index b8ca0e69..ea490574 100644 --- a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/exceptions/package-info.java +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils/package-info.java @@ -13,7 +13,6 @@ * limitations under the License. */ /** - * Contains locks related exceptions. + * Contains utilities classes. */ -package com.ericsson.bss.cassandra.ecchronos.core.exceptions; - +package com.ericsson.bss.cassandra.ecchronos.core.utils; From 67fbcb2ba2c4c981023567e6122619a9ded93f56 Mon Sep 17 00:00:00 2001 From: sajid riaz Date: Mon, 4 Nov 2024 15:31:41 +0100 Subject: [PATCH 6/8] Cassandra based distributed locking mechanism # 741 - Cassandra tables called lock and lock_priority, to manage task execution and synchronization across multiple nodes. --- CHANGES.md | 2 ++ .../connection/AgentConnectionConfig.java | 1 - .../ecchronos/core/impl/locks/CASLock.java | 3 +-- .../core/impl/locks/CASLockFactory.java | 10 +++++----- .../ecchronos/core/impl/locks/LockCache.java | 3 +-- .../core/impl/utils/package-info.java | 1 + .../core/impl/locks/TestCASLockFactory.java | 3 +-- .../core/impl/locks/TestLockCache.java | 2 +- .../ecchronos/core/locks/LockFactory.java | 3 +-- .../ecchronos/core/utils/package-info.java | 18 ------------------ .../utils/exceptions}/LockException.java | 2 +- 11 files changed, 14 insertions(+), 34 deletions(-) delete mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils/package-info.java rename {core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils => utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/exceptions}/LockException.java (94%) diff --git a/CHANGES.md b/CHANGES.md index 411d4188..566b8232 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,9 +2,11 @@ ## Version 1.0.0 (Not yet Released) +* Cassandra-Based Distributed Locks #741 * Create New Repair Type Called "VNODE" - Issue #755 * Create ReplicaRepairGroup Class for Grouping Replicas and Token Ranges - Issue #721 * Hot Reload of Nodes List - Issue #699 +* Update nodes when cluster changes, nodes removed or added #699 * Investigate Creation of RepairScheduler and ScheduleManager #714 * Implement ScheduledJobQueue for Prioritized Job Management and Execution - Issue #740 * Implement RepairGroup Class for Managing and Executing Repair Tasks - Issue #738 diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/AgentConnectionConfig.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/AgentConnectionConfig.java index c99ffe6c..b6e9f797 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/AgentConnectionConfig.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/AgentConnectionConfig.java @@ -38,7 +38,6 @@ public final class AgentConnectionConfig private HostAware myHostAware = new HostAware(); private Class myDatacenterAwarePolicy = DataCenterAwarePolicy.class; private String myInstanceName; - private boolean myRemoteRouting = true; /** * Default constructor for AgentConnectionConfig. diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLock.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLock.java index 6a1cb0b8..34e610fe 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLock.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLock.java @@ -30,7 +30,7 @@ import com.datastax.oss.driver.api.core.cql.ResultSet; import com.datastax.oss.driver.api.core.cql.Row; -import com.ericsson.bss.cassandra.ecchronos.core.utils.LockException; +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.LockException; /** * Represents a container for builder configurations and state for the CASLock. @@ -213,5 +213,4 @@ int getFailedAttempts() { return myFailedUpdateAttempts.get(); } - } diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java index 9a5c1948..51fb3f5c 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java @@ -23,9 +23,9 @@ import com.datastax.oss.driver.api.core.metadata.TokenMap; import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; -import com.ericsson.bss.cassandra.ecchronos.core.utils.LockException; -import com.ericsson.bss.cassandra.ecchronos.core.locks.HostStates; import com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory; +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.LockException; +import com.ericsson.bss.cassandra.ecchronos.core.locks.HostStates; import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.slf4j.Logger; @@ -140,9 +140,9 @@ private int getDefaultTimeToLiveFromLockTable() @Override public DistributedLock tryLock(final String dataCenter, - final String resource, - final int priority, - final Map metadata) + final String resource, + final int priority, + final Map metadata) throws LockException { return myCasLockFactoryCacheContext.getLockCache() diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCache.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCache.java index e34f6fc0..7f8f76a6 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCache.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/LockCache.java @@ -15,8 +15,7 @@ package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; import static com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory.DistributedLock; - -import com.ericsson.bss.cassandra.ecchronos.core.utils.LockException; +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.LockException; import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; import org.slf4j.Logger; diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/utils/package-info.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/utils/package-info.java index 796be8fd..7f486977 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/utils/package-info.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/utils/package-info.java @@ -16,3 +16,4 @@ * Contains utilities classes. */ package com.ericsson.bss.cassandra.ecchronos.core.impl.utils; + diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java index 1cbc449f..91758a8d 100644 --- a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java @@ -28,6 +28,7 @@ import com.ericsson.bss.cassandra.ecchronos.core.impl.utils.ConsistencyType; import com.ericsson.bss.cassandra.ecchronos.core.locks.HostStates; import com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory; +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.LockException; import java.io.IOException; import java.util.Arrays; import java.util.Collection; @@ -69,8 +70,6 @@ import org.junit.Before; import org.junit.Test; -import com.ericsson.bss.cassandra.ecchronos.core.utils.LockException; - import net.jcip.annotations.NotThreadSafe; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCache.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCache.java index 112365ea..2f88964a 100644 --- a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCache.java +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestLockCache.java @@ -14,7 +14,7 @@ */ package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; -import com.ericsson.bss.cassandra.ecchronos.core.utils.LockException; +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.LockException; import com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory.DistributedLock; import nl.jqno.equalsverifier.EqualsVerifier; import org.junit.Before; diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/LockFactory.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/LockFactory.java index c6b09c3b..39afa9ac 100644 --- a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/LockFactory.java +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/LockFactory.java @@ -17,8 +17,7 @@ import java.io.Closeable; import java.util.Map; import java.util.Optional; - -import com.ericsson.bss.cassandra.ecchronos.core.utils.LockException; +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.LockException; /** * Interface for distributed lock factories. diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils/package-info.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils/package-info.java deleted file mode 100644 index ea490574..00000000 --- a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils/package-info.java +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright 2024 Telefonaktiebolaget LM Ericsson - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * Contains utilities classes. - */ -package com.ericsson.bss.cassandra.ecchronos.core.utils; diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils/LockException.java b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/exceptions/LockException.java similarity index 94% rename from core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils/LockException.java rename to utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/exceptions/LockException.java index bae8ce59..e968341b 100644 --- a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/utils/LockException.java +++ b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/exceptions/LockException.java @@ -12,7 +12,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.ericsson.bss.cassandra.ecchronos.core.utils; +package com.ericsson.bss.cassandra.ecchronos.utils.exceptions; /** * Exception thrown when a lock factory is unable to get a lock. From e735a23ceadc71f12142bfb017d5b303169e9353 Mon Sep 17 00:00:00 2001 From: Paul Chandler Date: Mon, 16 Sep 2024 11:18:12 +0100 Subject: [PATCH 7/8] Cassandra based distributed locking mechanism # 741 - Cassandra tables called lock and lock_priority, to manage task execution and synchronization across multiple nodes. --- .gitignore | 22 -- CHANGES.md | 3 +- .../AgentNativeConnectionProvider.java | 15 ++ application/src/main/resources/ecc.yml | 24 +- .../builders/DistributedNativeBuilder.java | 2 +- ...stributedNativeConnectionProviderImpl.java | 26 ++- connection/pom.xml | 7 + .../DistributedNativeConnectionProvider.java | 2 + .../core/impl/locks/CASLockFactory.java | 6 +- .../impl/locks/CASLockFactoryBuilder.java | 2 +- .../core/impl/locks/CASLockProperties.java | 17 +- .../core/impl/locks/HostStatesImpl.java | 216 ------------------ .../impl/AbstractCassandraContainerTest.java | 7 + .../core/impl/locks/TestCASLockFactory.java | 93 +++++++- .../ecchronos/core/locks/HostStates.java | 51 ----- .../data/utils/AbstractCassandraTest.java | 7 + 16 files changed, 185 insertions(+), 315 deletions(-) delete mode 100644 .gitignore delete mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/HostStatesImpl.java delete mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/HostStates.java diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 94278c9f..00000000 --- a/.gitignore +++ /dev/null @@ -1,22 +0,0 @@ -target/ -META-INF/ -OSGI-INF/ -build.properties -pom.xml.versionsBackup -dependency-reduced-pom.xml -*~ -*.bak -.checkstyle -.classpath -.project -.settings/ -.toDelete -*.pyc -.idea/ -*.iml -.coverage -*htmlcov -application/statistics/ -statistics/ -.vscode/ - diff --git a/CHANGES.md b/CHANGES.md index 566b8232..4fe0e8c7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,11 +2,10 @@ ## Version 1.0.0 (Not yet Released) -* Cassandra-Based Distributed Locks #741 +* Cassandra-Based Distributed Locks - Issue #741 * Create New Repair Type Called "VNODE" - Issue #755 * Create ReplicaRepairGroup Class for Grouping Replicas and Token Ranges - Issue #721 * Hot Reload of Nodes List - Issue #699 -* Update nodes when cluster changes, nodes removed or added #699 * Investigate Creation of RepairScheduler and ScheduleManager #714 * Implement ScheduledJobQueue for Prioritized Job Management and Execution - Issue #740 * Implement RepairGroup Class for Managing and Executing Repair Tasks - Issue #738 diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java index 452a5661..e9194120 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java @@ -28,6 +28,7 @@ import com.ericsson.bss.cassandra.ecchronos.connection.impl.builders.DistributedNativeBuilder; import com.ericsson.bss.cassandra.ecchronos.connection.impl.providers.DistributedNativeConnectionProviderImpl; import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.DefaultRepairConfigurationProvider; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.connection.ConnectionType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -297,4 +298,18 @@ public Boolean confirmNodeValid(final Node node) { return myDistributedNativeConnectionProviderImpl.confirmNodeValid(node); } + + /** + * Retrieves the type of connection being used by this connection provider. + * This method delegates the call to the underlying {@code DistributedNativeConnectionProviderImpl} + * to determine the current {@link ConnectionType}. + * + * @return The {@link ConnectionType} of the connection managed by + * {@code myDistributedNativeConnectionProviderImpl}. + */ + @Override + public ConnectionType getConnectionType() + { + return myDistributedNativeConnectionProviderImpl.getConnectionType(); + } } diff --git a/application/src/main/resources/ecc.yml b/application/src/main/resources/ecc.yml index ed88f3cb..efe4f39c 100644 --- a/application/src/main/resources/ecc.yml +++ b/application/src/main/resources/ecc.yml @@ -291,14 +291,16 @@ rest_server: ## the cache expiration time is reached. ## cache_expiry_time_in_seconds: 30 - ## - ## Allow to override consistency level for LWT (lightweight transactions). Possible values are: - ## "DEFAULT" - Use consistency level based on remoteRouting. - ## "SERIAL" - Use SERIAL consistency for LWT regardless of remoteRouting. - ## "LOCAL" - Use LOCAL_SERIAL consistency for LWT regardless of remoteRouting. - ## - ## if you use remoteRouting: false and LOCAL then all locks will be taken locally - ## in DC. I.e There's a risk that multiple nodes in different datacenters will be able to lock the - ## same nodes causing multiple repairs on the same range/node at the same time. - ## - consistencySerial: "DEFAULT" + ## + ## Allow to override consistency level for LWT (lightweight transactions). Possible values are: + ## "DEFAULT" - Use consistency level based on the `datacenterAware` agent type. + ## If the agent type is `datacenterAware`, LOCAL_SERIAL consistency will be used. Otherwise, + ## SERIAL consistency will be applied. + ## "SERIAL" - Use SERIAL consistency for LWT regardless of agent type. + ## "LOCAL" - Use LOCAL_SERIAL consistency for LWT regardless agent type. + ## + ## If an agent type other than datacenterAware and LOCAL is used, all locks will be managed locally within each datacenter. + ## I.e There's a risk that multiple nodes in different datacenters will be able to lock the + ## same nodes causing multiple repairs on the same range/node at the same time. + ## + consistencySerial: "DEFAULT" diff --git a/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/builders/DistributedNativeBuilder.java b/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/builders/DistributedNativeBuilder.java index dbf78ef5..0316dcdc 100644 --- a/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/builders/DistributedNativeBuilder.java +++ b/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/builders/DistributedNativeBuilder.java @@ -244,7 +244,7 @@ public final DistributedNativeConnectionProviderImpl build() LOG.info("Requesting Nodes List"); List nodesList = createNodesList(session); LOG.info("Nodes list was created with success"); - return new DistributedNativeConnectionProviderImpl(session, nodesList, this); + return new DistributedNativeConnectionProviderImpl(session, nodesList, this, myType); } /** diff --git a/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/providers/DistributedNativeConnectionProviderImpl.java b/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/providers/DistributedNativeConnectionProviderImpl.java index 9f32b138..d95bd49c 100644 --- a/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/providers/DistributedNativeConnectionProviderImpl.java +++ b/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/providers/DistributedNativeConnectionProviderImpl.java @@ -19,6 +19,7 @@ import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; import com.ericsson.bss.cassandra.ecchronos.connection.impl.builders.DistributedNativeBuilder; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.connection.ConnectionType; import java.io.IOException; import java.util.List; @@ -27,6 +28,7 @@ public class DistributedNativeConnectionProviderImpl implements DistributedNativ private final CqlSession mySession; private final List myNodes; private final DistributedNativeBuilder myDistributedNativeBuilder; + private final ConnectionType myConnectionType; /** * Constructs a new {@code DistributedNativeConnectionProviderImpl} with the specified {@link CqlSession} and list @@ -38,14 +40,15 @@ public class DistributedNativeConnectionProviderImpl implements DistributedNativ * the list of {@link Node} instances representing the nodes in the cluster. */ public DistributedNativeConnectionProviderImpl( - final CqlSession session, - final List nodesList, - final DistributedNativeBuilder distributedNativeBuilder - ) + final CqlSession session, + final List nodesList, + final DistributedNativeBuilder distributedNativeBuilder, + final ConnectionType connectionType) { mySession = session; myNodes = nodesList; myDistributedNativeBuilder = distributedNativeBuilder; + myConnectionType = connectionType; } /** @@ -70,8 +73,6 @@ public List getNodes() return myNodes; } - - /** * Closes the {@link CqlSession} associated with this connection provider. * @@ -125,4 +126,17 @@ public Boolean confirmNodeValid(final Node node) { return myDistributedNativeBuilder.confirmNodeValid(node); } + + /** + * Retrieves the type of connection being used by this connection provider. + * to determine the current {@link ConnectionType}. + * + * @return The {@link ConnectionType} of the connection managed by + * {@code myDistributedNativeConnectionProviderImpl}. + */ + @Override + public ConnectionType getConnectionType() + { + return myConnectionType; + } } diff --git a/connection/pom.xml b/connection/pom.xml index 5ce8de64..1cad2e4b 100644 --- a/connection/pom.xml +++ b/connection/pom.xml @@ -32,6 +32,13 @@ EcChronos Connection + + + com.ericsson.bss.cassandra.ecchronos + utils + ${project.version} + + com.datastax.oss diff --git a/connection/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/DistributedNativeConnectionProvider.java b/connection/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/DistributedNativeConnectionProvider.java index 34c1e245..2ec7c598 100644 --- a/connection/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/DistributedNativeConnectionProvider.java +++ b/connection/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/DistributedNativeConnectionProvider.java @@ -14,6 +14,7 @@ */ package com.ericsson.bss.cassandra.ecchronos.connection; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.connection.ConnectionType; import java.io.Closeable; import java.io.IOException; import java.util.List; @@ -35,4 +36,5 @@ default void close() throws IOException void addNode(Node myNode); void removeNode(Node myNode); Boolean confirmNodeValid(Node node); + ConnectionType getConnectionType(); } diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java index 51fb3f5c..4ad9727c 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactory.java @@ -24,8 +24,8 @@ import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; import com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory; +import com.ericsson.bss.cassandra.ecchronos.core.state.HostStates; import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.LockException; -import com.ericsson.bss.cassandra.ecchronos.core.locks.HostStates; import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.slf4j.Logger; @@ -84,7 +84,9 @@ public final class CASLockFactory implements LockFactory, Closeable CASLockFactory(final CASLockFactoryBuilder builder) { - myCasLockProperties = new CASLockProperties(builder.getKeyspaceName(), + myCasLockProperties = new CASLockProperties( + builder.getNativeConnectionProvider().getConnectionType(), + builder.getKeyspaceName(), Executors.newSingleThreadScheduledExecutor( new ThreadFactoryBuilder().setNameFormat("LockRefresher-%d").build()), builder.getConsistencyType(), diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactoryBuilder.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactoryBuilder.java index bd3ad7e4..0e60d496 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactoryBuilder.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockFactoryBuilder.java @@ -18,7 +18,7 @@ import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; import com.ericsson.bss.cassandra.ecchronos.connection.StatementDecorator; import com.ericsson.bss.cassandra.ecchronos.core.impl.utils.ConsistencyType; -import com.ericsson.bss.cassandra.ecchronos.core.locks.HostStates; +import com.ericsson.bss.cassandra.ecchronos.core.state.HostStates; /** * Represents a container for builder configurations and state for the CASLockFactory. diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java index 8efe5aa9..add371c1 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java @@ -15,6 +15,7 @@ package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; import com.ericsson.bss.cassandra.ecchronos.core.impl.utils.ConsistencyType; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.connection.ConnectionType; import java.util.concurrent.ScheduledExecutorService; import com.datastax.oss.driver.api.core.ConsistencyLevel; @@ -27,18 +28,21 @@ */ public class CASLockProperties { + private final ConnectionType myConnectionType; private final String myKeyspaceName; private final ScheduledExecutorService myExecutor; private final ConsistencyLevel mySerialConsistencyLevel; private final CqlSession mySession; private final StatementDecorator myStatementDecorator; - CASLockProperties(final String keyspaceName, + CASLockProperties(final ConnectionType connectionType, + final String keyspaceName, final ScheduledExecutorService executor, final ConsistencyType consistencyType, final CqlSession session, final StatementDecorator statementDecorator) { + myConnectionType = connectionType; myKeyspaceName = keyspaceName; myExecutor = executor; mySerialConsistencyLevel = defineSerialConsistencyLevel(consistencyType); @@ -49,9 +53,18 @@ public class CASLockProperties public final ConsistencyLevel defineSerialConsistencyLevel(final ConsistencyType consistencyType) { ConsistencyLevel serialConsistencyLevel; - serialConsistencyLevel = ConsistencyType.LOCAL.equals(consistencyType) + if (ConsistencyType.DEFAULT.equals(consistencyType)) + { + serialConsistencyLevel = myConnectionType == ConnectionType.datacenterAware ? ConsistencyLevel.LOCAL_SERIAL : ConsistencyLevel.SERIAL; + } + else + { + serialConsistencyLevel = ConsistencyType.LOCAL.equals(consistencyType) + ? ConsistencyLevel.LOCAL_SERIAL + : ConsistencyLevel.SERIAL; + } return serialConsistencyLevel; } diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/HostStatesImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/HostStatesImpl.java deleted file mode 100644 index 0308eb41..00000000 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/HostStatesImpl.java +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright 2024 Telefonaktiebolaget LM Ericsson - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.ericsson.bss.cassandra.ecchronos.core.impl.locks; - -import com.datastax.oss.driver.api.core.CqlSession; -import com.datastax.oss.driver.api.core.metadata.Metadata; -import com.ericsson.bss.cassandra.ecchronos.core.impl.logging.ThrottlingLogger; -import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxy; -import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; -import com.ericsson.bss.cassandra.ecchronos.core.locks.HostStates; -import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; -import java.io.Closeable; -import java.io.IOException; -import java.net.InetAddress; -import java.util.Optional; -import java.util.UUID; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.TimeUnit; - -import com.datastax.oss.driver.api.core.metadata.Node; -import com.google.common.annotations.VisibleForTesting; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Implementation of the {@link HostStates} interface using JMX to retrieve node statuses and then caches the retrieved - * statuses for some time. - */ -public final class HostStatesImpl implements HostStates, Closeable -{ - private static final Logger LOG = LoggerFactory.getLogger(HostStatesImpl.class); - private static final ThrottlingLogger THROTTLED_LOGGER = new ThrottlingLogger(LOG, 1, TimeUnit.MINUTES); - - private static final long DEFAULT_REFRESH_INTERVAL_IN_MS = TimeUnit.SECONDS.toMillis(10); - - private final ConcurrentHashMap myHostStates = new ConcurrentHashMap<>(); - private final Object myRefreshLock = new Object(); - private final long myRefreshIntervalInMs; - private final CqlSession myCqlSession; - - private volatile long myLastRefresh = -1; - - private final DistributedJmxProxyFactory myJmxProxyFactory; - - private HostStatesImpl(final Builder builder) - { - myRefreshIntervalInMs = builder.myRefreshIntervalInMs; - myJmxProxyFactory = builder.myJmxProxyFactory; - myCqlSession = builder.myCqlSession; - } - - @Override - public boolean isUp(final InetAddress address) - { - refreshNodeStatus(address); - - Boolean status = myHostStates.get(address); - return status != null && status; - } - - @Override - public boolean isUp(final Node node) - { - return isUp(node.getBroadcastAddress().get().getAddress()); - } - - @Override - public boolean isUp(final DriverNode node) - { - return isUp(node.getPublicAddress()); - } - - @Override - public void close() - { - myHostStates.clear(); - } - - private void refreshNodeStatus(final InetAddress address) - { - if (shouldRefreshNodeStatus()) - { - synchronized (myRefreshLock) - { - if (shouldRefreshNodeStatus() && !tryRefreshHostStates(address)) - { - myHostStates.clear(); - } - } - } - } - - @VisibleForTesting - void resetLastRefresh() - { - myLastRefresh = -1; - } - - private boolean shouldRefreshNodeStatus() - { - return myLastRefresh == -1 || myLastRefresh < (System.currentTimeMillis() - myRefreshIntervalInMs); - } - - private synchronized boolean tryRefreshHostStates(final InetAddress address) - { - if (myJmxProxyFactory == null) - { - return false; - } - - UUID hostId = getHostIdForAddress(address); - try (DistributedJmxProxy proxy = myJmxProxyFactory.connect()) - { - for (String liveHost : proxy.getLiveNodes(hostId)) - { - InetAddress host = InetAddress.getByName(liveHost); - - if (changeHostState(host, true)) - { - LOG.debug("Host {} marked as UP", host); - } - } - - for (String unreachableHost : proxy.getUnreachableNodes(hostId)) - { - InetAddress host = InetAddress.getByName(unreachableHost); - - if (changeHostState(host, false)) - { - LOG.debug("Host {} marked as DOWN", host); - } - } - - myLastRefresh = System.currentTimeMillis(); - return true; - } - catch (IOException e) - { - THROTTLED_LOGGER.warn("Unable to retrieve host states", e); - } - - return false; - } - - private boolean changeHostState(final InetAddress host, final boolean newValue) - { - Boolean oldValue = myHostStates.put(host, newValue); - - return oldValue == null || oldValue != newValue; - } - - private UUID getHostIdForAddress(final InetAddress address) - { - Metadata metadata = myCqlSession.getMetadata(); - Optional nodeOptional = metadata.getNodes() - .values() - .stream() - .filter(node -> node.getBroadcastAddress().isPresent() - && node.getBroadcastAddress().get().getAddress().equals(address)) - .findFirst(); - - return nodeOptional.map(Node::getHostId).orElse(null); - } - - public static Builder builder() - { - return new Builder(); - } - - public static class Builder - { - private DistributedJmxProxyFactory myJmxProxyFactory; - private long myRefreshIntervalInMs = DEFAULT_REFRESH_INTERVAL_IN_MS; - private CqlSession myCqlSession; - - public final Builder withJmxProxyFactory(final DistributedJmxProxyFactory jmxProxyFactory) - { - myJmxProxyFactory = jmxProxyFactory; - return this; - } - - public final Builder withRefreshIntervalInMs(final long refreshIntervalInMs) - { - myRefreshIntervalInMs = refreshIntervalInMs; - return this; - } - - public final Builder withCqlSession(final CqlSession session) - { - myCqlSession = session; - return this; - } - - public final HostStatesImpl build() - { - if (myJmxProxyFactory == null) - { - throw new IllegalArgumentException("JMX Proxy Factory must be set"); - } - - return new HostStatesImpl(this); - } - } -} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/AbstractCassandraContainerTest.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/AbstractCassandraContainerTest.java index db7a495e..9fb8c68b 100644 --- a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/AbstractCassandraContainerTest.java +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/AbstractCassandraContainerTest.java @@ -15,6 +15,7 @@ package com.ericsson.bss.cassandra.ecchronos.core.impl; import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.connection.ConnectionType; import java.net.InetSocketAddress; import java.util.List; @@ -89,6 +90,12 @@ public Boolean confirmNodeValid(Node node) { return false; } + + @Override + public ConnectionType getConnectionType() + { + return ConnectionType.hostAware; + } }; } diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java index 91758a8d..ba6069ad 100644 --- a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/TestCASLockFactory.java @@ -26,8 +26,9 @@ import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; import com.ericsson.bss.cassandra.ecchronos.core.impl.AbstractCassandraContainerTest; import com.ericsson.bss.cassandra.ecchronos.core.impl.utils.ConsistencyType; -import com.ericsson.bss.cassandra.ecchronos.core.locks.HostStates; import com.ericsson.bss.cassandra.ecchronos.core.locks.LockFactory; +import com.ericsson.bss.cassandra.ecchronos.core.state.HostStates; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.connection.ConnectionType; import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.LockException; import java.io.IOException; import java.util.Arrays; @@ -484,6 +485,12 @@ public Boolean confirmNodeValid(Node node) { return false; } + + @Override + public ConnectionType getConnectionType() + { + return null; + } }) .withHostStates(hostStates) .withStatementDecorator(s -> s) @@ -491,6 +498,51 @@ public Boolean confirmNodeValid(Node node) .build()); } + @Test + public void testDataCenterAwareAgentTypeWithDefaultSerialConsistency() + { + Node nodeMock = mock(Node.class); + DistributedNativeConnectionProvider connectionProviderMock = mock(DistributedNativeConnectionProvider.class); + + when(nodeMock.getHostId()).thenReturn(UUID.randomUUID()); + when(connectionProviderMock.getCqlSession()).thenReturn(mySession); + when(connectionProviderMock.getNodes()).thenReturn(Arrays.asList(nodeMock)); + when(connectionProviderMock.getConnectionType()).thenReturn(ConnectionType.datacenterAware); + + myLockFactory = new CASLockFactoryBuilder() + .withNativeConnectionProvider(getDataCenterAwareConnectionTypeProvider()) + .withHostStates(hostStates) + .withStatementDecorator(s -> s) + .withKeyspaceName(myKeyspaceName) + .withConsistencySerial(ConsistencyType.DEFAULT) + .withNode(nodeMock) + .build(); + + assertEquals(ConsistencyLevel.LOCAL_SERIAL, myLockFactory.getSerialConsistencyLevel()); + } + + @Test + public void testOtherThanDataCenterAwareAgentTypeWithDefaultSerialConsistency() + { + Node nodeMock = mock(Node.class); + DistributedNativeConnectionProvider connectionProviderMock = mock(DistributedNativeConnectionProvider.class); + + when(nodeMock.getHostId()).thenReturn(UUID.randomUUID()); + when(connectionProviderMock.getCqlSession()).thenReturn(mySession); + when(connectionProviderMock.getNodes()).thenReturn(Arrays.asList(nodeMock)); + + myLockFactory = new CASLockFactoryBuilder() + .withNativeConnectionProvider(connectionProviderMock) + .withHostStates(hostStates) + .withStatementDecorator(s -> s) + .withKeyspaceName(myKeyspaceName) + .withConsistencySerial(ConsistencyType.DEFAULT) + .withNode(nodeMock) + .build(); + + assertEquals(ConsistencyLevel.SERIAL, myLockFactory.getSerialConsistencyLevel()); + } + @Test public void testLocalSerialConsistency() { @@ -612,4 +664,43 @@ private long getWriteCountFromTableStats(String tableName) throws UnsupportedOpe return writeCount; } + private DistributedNativeConnectionProvider getDataCenterAwareConnectionTypeProvider() + { + return new DistributedNativeConnectionProvider() + { + @Override + public CqlSession getCqlSession() + { + return mySession; + } + + @Override + public List getNodes() + { + return mySession.getMetadata().getNodes().values().stream().toList(); + } + + @Override + public void addNode(Node myNode) + { + } + + @Override + public void removeNode(Node myNode) + { + } + + @Override + public Boolean confirmNodeValid(Node node) + { + return false; + } + + @Override + public ConnectionType getConnectionType() + { + return ConnectionType.datacenterAware; + } + }; + } } diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/HostStates.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/HostStates.java deleted file mode 100644 index ed3626c4..00000000 --- a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/locks/HostStates.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2024 Telefonaktiebolaget LM Ericsson - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.ericsson.bss.cassandra.ecchronos.core.locks; - -import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; -import java.net.InetAddress; - -import com.datastax.oss.driver.api.core.metadata.Node; - -/** - * Interface used to determine node statuses. - */ -public interface HostStates -{ - /** - * Check if a host is up. - * - * @param address - * The broadcast address of the host. - * @return True if the node is up. False will be returned if the state is unknown or if the host is down. - */ - boolean isUp(InetAddress address); - - /** - * Check if a host is up. - * - * @param node The node. - * @return True if the host is up. False will be returned if the state is unknown or if the host is down. - */ - boolean isUp(Node node); - - /** - * Check if a node is up. - * - * @param node The node. - * @return True if the node is up. False will be returned if the state is unknown or if the node is down. - */ - boolean isUp(DriverNode node); -} diff --git a/data/src/test/java/com/ericsson/bss/cassandra/ecchronos/data/utils/AbstractCassandraTest.java b/data/src/test/java/com/ericsson/bss/cassandra/ecchronos/data/utils/AbstractCassandraTest.java index ce2dd65f..2c0cd6bf 100644 --- a/data/src/test/java/com/ericsson/bss/cassandra/ecchronos/data/utils/AbstractCassandraTest.java +++ b/data/src/test/java/com/ericsson/bss/cassandra/ecchronos/data/utils/AbstractCassandraTest.java @@ -17,6 +17,7 @@ import com.datastax.oss.driver.api.core.CqlSession; import com.datastax.oss.driver.api.core.metadata.Node; import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.connection.ConnectionType; import org.junit.AfterClass; import org.junit.BeforeClass; import org.testcontainers.containers.CassandraContainer; @@ -75,6 +76,12 @@ public void removeNode(Node myNode) { public Boolean confirmNodeValid(Node node) { return false; } + + @Override + public ConnectionType getConnectionType() + { + return ConnectionType.datacenterAware; + } }; } From 6b11c9964c49573ce7b9fc5fe7dddceba87a02c7 Mon Sep 17 00:00:00 2001 From: sajid riaz Date: Fri, 8 Nov 2024 11:45:07 +0100 Subject: [PATCH 8/8] Add check for datacenteraware agent type - add check for CasLockStatement to check if agent type is datacenteraware and execute statement accordingly. --- .../ecchronos/core/impl/locks/CASLockProperties.java | 6 ++++++ .../ecchronos/core/impl/locks/CASLockStatement.java | 2 +- .../bss/cassandra/ecchronos/data/sync/EccNodesSync.java | 3 ++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java index add371c1..5395eaf3 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockProperties.java @@ -92,4 +92,10 @@ public final StatementDecorator getStatementDecorator() { return myStatementDecorator; } + + public final boolean isDatacenterAwareAgentType() + + { + return myConnectionType == ConnectionType.datacenterAware; + } } diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockStatement.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockStatement.java index b4bbd52b..cb121c15 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockStatement.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/locks/CASLockStatement.java @@ -69,7 +69,7 @@ public final ResultSet execute(final String dataCenter, final BoundStatement sta { Statement executeStatement; - if (dataCenter != null) + if (dataCenter != null && myCasLockProperties.isDatacenterAwareAgentType()) { executeStatement = new DataCenterAwareStatement(statement, dataCenter); } diff --git a/data/src/main/java/com/ericsson/bss/cassandra/ecchronos/data/sync/EccNodesSync.java b/data/src/main/java/com/ericsson/bss/cassandra/ecchronos/data/sync/EccNodesSync.java index 5e453990..03fcefec 100644 --- a/data/src/main/java/com/ericsson/bss/cassandra/ecchronos/data/sync/EccNodesSync.java +++ b/data/src/main/java/com/ericsson/bss/cassandra/ecchronos/data/sync/EccNodesSync.java @@ -106,7 +106,8 @@ private EccNodesSync(final Builder builder) throws UnknownHostException mySelectStatusStatement = mySession.prepare(selectFrom(KEYSPACE_NAME, TABLE_NAME) .columns(COLUMN_NODE_ID, COLUMN_NODE_ENDPOINT, COLUMN_DC_NAME, COLUMN_NODE_STATUS) .whereColumn(COLUMN_ECCHRONOS_ID).isEqualTo(bindMarker()) - .build()); + .build() + .setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM)); ecChronosID = builder.myEcchronosID; connectionDelayValue = builder.myConnectionDelayValue;