diff --git a/.mvn/extensions.xml b/.mvn/extensions.xml
index 1ca3828834..98c3571185 100644
--- a/.mvn/extensions.xml
+++ b/.mvn/extensions.xml
@@ -24,7 +24,7 @@
com.gradle
develocity-maven-extension
- 1.21.4
+ 1.23
com.gradle
diff --git a/curator-recipes/src/main/java/org/apache/curator/framework/recipes/leader/LeaderLatch.java b/curator-recipes/src/main/java/org/apache/curator/framework/recipes/leader/LeaderLatch.java
index 4d20f9afde..99e0fb9a43 100644
--- a/curator-recipes/src/main/java/org/apache/curator/framework/recipes/leader/LeaderLatch.java
+++ b/curator-recipes/src/main/java/org/apache/curator/framework/recipes/leader/LeaderLatch.java
@@ -509,7 +509,7 @@ public void processResult(CuratorFramework client, CuratorEvent event) throws Ex
getChildren();
}
} else {
- log.error("getChildren() failed. rc = {}", event.getResultCode());
+ log.error("[id={}] creatingParentContainersIfNeeded() failed (rc = {})", id, event.getResultCode());
}
}
};
@@ -528,7 +528,7 @@ private synchronized void internalStart() {
reset();
} catch (Exception e) {
ThreadUtils.checkInterrupted(e);
- log.error("An error occurred checking resetting leadership.", e);
+ log.error("[id={}] failed to check resetting leadership.", id, e);
}
}
}
@@ -545,10 +545,10 @@ private void checkLeadership(List children) throws Exception {
List sortedChildren = LockInternals.getSortedChildren(LOCK_NAME, sorter, children);
int ourIndex = (localOurPath != null) ? sortedChildren.indexOf(ZKPaths.getNodeFromPath(localOurPath)) : -1;
- log.debug("checkLeadership with id: {}, ourPath: {}, children: {}", id, localOurPath, sortedChildren);
+ log.debug("[id={}] checkLeadership with ourPath: {}, children: {}", id, localOurPath, sortedChildren);
if (ourIndex < 0) {
- log.error("Can't find our node. Resetting. Index: {}", ourIndex);
+ log.error("[id={}] failed to find our node; resetting (index: {})", id, ourIndex);
reset();
return;
}
@@ -582,7 +582,7 @@ public void process(WatchedEvent event) {
getChildren();
} catch (Exception ex) {
ThreadUtils.checkInterrupted(ex);
- log.error("An error occurred checking the leadership.", ex);
+ log.error("[id={}] failed to check the leadership.", id, ex);
}
}
}
@@ -592,7 +592,7 @@ public void process(WatchedEvent event) {
@Override
public void processResult(CuratorFramework client, CuratorEvent event) throws Exception {
if (event.getResultCode() == KeeperException.Code.NONODE.intValue()) {
- // previous node is gone - retry getChildren
+ log.debug("[id={}] previous node is gone; retry getChildren", id);
getChildren();
}
}
@@ -607,29 +607,37 @@ private void getChildren() throws Exception {
public void processResult(CuratorFramework client, CuratorEvent event) throws Exception {
if (event.getResultCode() == KeeperException.Code.OK.intValue()) {
checkLeadership(event.getChildren());
+ } else if (event.getResultCode() == KeeperException.Code.NONODE.intValue()) {
+ log.debug("[id={}] latchPath has gone; reset", id);
+ // This is possible when RECONNECTED during:
+ // (1) Scale the zk cluster to 0 nodes.
+ // (2) Scale it back.
+ //
+ // See also https://issues.apache.org/jira/browse/CURATOR-724
+ reset();
+ } else {
+ log.error("[id={}] getChildren() failed (rc = {})", id, event.getResultCode());
}
}
};
client.getChildren().inBackground(callback).forPath(ZKPaths.makePath(latchPath, null));
}
+ @VisibleForTesting
+ volatile CountDownLatch debugHandleReconnectedLatch = null;
+
@VisibleForTesting
protected void handleStateChange(ConnectionState newState) {
switch (newState) {
- default: {
- // NOP
- break;
- }
-
case RECONNECTED: {
try {
- if (client.getConnectionStateErrorPolicy().isErrorState(ConnectionState.SUSPENDED)
- || !hasLeadership.get()) {
- getChildren();
+ if (debugHandleReconnectedLatch != null) {
+ debugHandleReconnectedLatch.await();
}
+ getChildren();
} catch (Exception e) {
ThreadUtils.checkInterrupted(e);
- log.error("Could not reset leader latch", e);
+ log.error("[id={}] failed to recheck leadership on reconnected", id, e);
setLeadership(false);
}
break;
@@ -646,6 +654,11 @@ protected void handleStateChange(ConnectionState newState) {
setLeadership(false);
break;
}
+
+ default: {
+ // NOP
+ break;
+ }
}
}
@@ -662,7 +675,7 @@ private synchronized void setLeadership(boolean newValue) {
private void setNode(String newValue) throws Exception {
String oldPath = ourPath.getAndSet(newValue);
- log.debug("setNode with id: {}, oldPath: {}, newValue: {}", id, oldPath, newValue);
+ log.debug("[id={}] setNode with oldPath: {}, newValue: {}", id, oldPath, newValue);
if (oldPath != null) {
client.delete().guaranteed().inBackground().forPath(oldPath);
}
diff --git a/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderLatch.java b/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderLatch.java
index 528b317ff1..0dc375d16f 100644
--- a/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderLatch.java
+++ b/curator-recipes/src/test/java/org/apache/curator/framework/recipes/leader/TestLeaderLatch.java
@@ -179,6 +179,63 @@ public void testUncreatedPathGetLeader() throws Exception {
}
}
+ // @see https://issues.apache.org/jira/browse/CURATOR-724
+ @Test
+ public void testGetChildrenHitsNoNode() throws Exception {
+ final String latchPath = "/testGetChildrenHitsNoNode";
+ final Timing2 timing = new Timing2();
+ final BlockingQueue events0 = new LinkedBlockingQueue<>();
+ final BlockingQueue events1 = new LinkedBlockingQueue<>();
+
+ final List closeableResources = new ArrayList<>();
+ try {
+ final String id0 = "id0";
+ final CuratorFramework client0 = createAndStartClient(server.getConnectString(), timing, id0, null);
+ closeableResources.add(client0);
+ final LeaderLatch latch0 = createAndStartLeaderLatch(client0, latchPath, id0, events0);
+ closeableResources.add(latch0);
+
+ assertThat(events0.poll(timing.forWaiting().milliseconds(), TimeUnit.MILLISECONDS))
+ .isNotNull()
+ .isEqualTo(new TestEvent(id0, TestEventType.GAINED_LEADERSHIP));
+
+ final String id1 = "id1";
+ final CuratorFramework client1 = createAndStartClient(server.getConnectString(), timing, id1, null);
+ closeableResources.add(client1);
+ final LeaderLatch latch1 = createAndStartLeaderLatch(client1, latchPath, id1, events1);
+ closeableResources.add(latch1);
+
+ // wait for the non-leading LeaderLatch (i.e. latch1) instance to be done with its creation
+ // this call is time-consuming but necessary because we don't have a handle to detect the end of the reset
+ // call
+ timing.forWaiting().sleepABit();
+
+ assertTrue(latch0.hasLeadership());
+ assertFalse(latch1.hasLeadership());
+
+ // ensure we can observe the leadership transferred to latch1
+ latch0.debugHandleReconnectedLatch = new CountDownLatch(1);
+
+ // scale to zero - recreate the cluster
+ final int port = server.getPort();
+ server.close();
+ server = new TestingServer(port, true);
+
+ assertThat(events1.poll(timing.forWaiting().milliseconds(), TimeUnit.MILLISECONDS))
+ .isNotNull()
+ .isEqualTo(new TestEvent(id1, TestEventType.GAINED_LEADERSHIP));
+
+ latch0.debugHandleReconnectedLatch.countDown();
+ assertThat(events0.poll(timing.forWaiting().milliseconds(), TimeUnit.MILLISECONDS))
+ .isNotNull()
+ .isEqualTo(new TestEvent(id0, TestEventType.LOST_LEADERSHIP));
+ } finally {
+ // reverse is necessary for closing the LeaderLatch instances before closing the corresponding client
+ Collections.reverse(closeableResources);
+ closeableResources.forEach(CloseableUtils::closeQuietly);
+ }
+ }
+
@Test
public void testWatchedNodeDeletedOnReconnect() throws Exception {
final String latchPath = "/foo/bar";
diff --git a/curator-recipes/src/test/resources/log4j.properties b/curator-recipes/src/test/resources/log4j.properties
index 706484ce59..98720873e3 100644
--- a/curator-recipes/src/test/resources/log4j.properties
+++ b/curator-recipes/src/test/resources/log4j.properties
@@ -22,4 +22,4 @@ log4j.additivity.org.apache.curator=false
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.layout=org.apache.log4j.PatternLayout
-log4j.appender.console.layout.ConversionPattern=%-5p %c %x %m [%t]%n
+log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p %c %x %m [%t]%n