Skip to content

Commit

Permalink
Prevent premature removal of agent because of race condition in Docke…
Browse files Browse the repository at this point in the history
…rSwarmAgentRetentionStrategy (#115)

Co-authored-by: ue56923 <[email protected]>
Co-authored-by: Roman <[email protected]>
  • Loading branch information
3 people authored Jan 4, 2023
1 parent 2b768b1 commit d3f18e9
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ public long getTimeout() {
public long check(@Nonnull DockerSwarmComputer c) {
if (c.isIdle() && c.isOnline()) {
final long connectTime = System.currentTimeMillis() - c.getConnectTime();
final long onlineTime = System.currentTimeMillis() - c.getOnlineTime();
final long idleTime = System.currentTimeMillis() - c.getIdleStartMilliseconds();
final boolean isTimeout = connectTime > timeout && idleTime > timeout;
final boolean isTimeout = connectTime > timeout && onlineTime > timeout && idleTime > timeout;
if (isTimeout && (!isTaskAccepted || isTaskCompleted ) && !Jenkins.getInstance().isQuietingDown()) {
LOGGER.log(Level.INFO, "Disconnecting due to idle {0}", c.getName());
done(c);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,29 @@

package org.jenkinsci.plugins.docker.swarm;

import java.io.IOException;
import java.io.OutputStream;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Future;
import java.util.logging.Level;
import java.util.logging.Logger;

import com.google.common.collect.Iterables;

import hudson.model.Executor;
import hudson.model.Queue;
import hudson.remoting.Channel;
import hudson.remoting.Channel.Listener;
import hudson.slaves.AbstractCloudComputer;
import hudson.slaves.OfflineCause;

public class DockerSwarmComputer extends AbstractCloudComputer<DockerSwarmAgent> {
private static final Logger LOGGER = Logger.getLogger(DockerSwarmComputer.class.getName());

private long onlineTime = 0L;

public DockerSwarmComputer(final DockerSwarmAgent dockerSwarmAgent) {
super(dockerSwarmAgent);
Expand Down Expand Up @@ -55,4 +65,17 @@ public Future<?> disconnect(OfflineCause cause) {
public String getVolumeName() {
return getName().split("-")[1];
}

public final long getOnlineTime() {
return onlineTime;
}

@Override
public void setChannel(Channel channel, OutputStream launchLog, Listener listener) throws IOException, InterruptedException {
this.onlineTime = System.currentTimeMillis();

super.setChannel(channel, launchLog, listener);

LOGGER.log(Level.INFO, "Agent {0} got online", getName());
}
}

0 comments on commit d3f18e9

Please sign in to comment.