Skip to content

Commit

Permalink
Add more information to NoNodeAvailableException and AllNodesFailedEx…
Browse files Browse the repository at this point in the history
…ception

Sometimes users see these exceptions on production in elusive rare
cases.
Since it is PROD it is imposible to enable DEBUG logs.
In order to debug these cases we need these errors to contain
all the information that can help to investigate what is happening.
This information needs to be as recent as it is possible and be
collected as close as possible to the place when it is being read by
driver.
  • Loading branch information
dkropachev committed Sep 26, 2024
1 parent 3446aae commit 18670c9
Show file tree
Hide file tree
Showing 27 changed files with 820 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
import com.datastax.oss.driver.internal.core.session.DefaultSession;
import com.datastax.oss.driver.internal.core.session.RepreparePayload;
import com.datastax.oss.driver.internal.core.util.Loggers;
import com.datastax.oss.driver.internal.core.util.collection.DebugQueryPlan;
import com.datastax.oss.driver.internal.core.util.collection.SimpleQueryPlan;
import com.datastax.oss.driver.shaded.guava.common.annotations.VisibleForTesting;
import com.datastax.oss.protocol.internal.Frame;
Expand Down Expand Up @@ -360,7 +361,11 @@ private void sendRequest(
// We've reached the end of the query plan without finding any node to write to; abort the
// continuous paging session.
if (activeExecutionsCount.decrementAndGet() == 0) {
abortGlobalRequestOrChosenCallback(AllNodesFailedException.fromErrors(errors));
if (queryPlan instanceof DebugQueryPlan) {
abortGlobalRequestOrChosenCallback(AllNodesFailedException.fromErrors(errors, queryPlan));
} else {
abortGlobalRequestOrChosenCallback(AllNodesFailedException.fromErrors(errors));
}
}
} else if (!chosenCallback.isDone()) {
NodeResponseCallback nodeResponseCallback =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import com.datastax.oss.driver.internal.core.tracker.NoopRequestTracker;
import com.datastax.oss.driver.internal.core.tracker.RequestLogger;
import com.datastax.oss.driver.internal.core.util.Loggers;
import com.datastax.oss.driver.internal.core.util.collection.DebugQueryPlan;
import com.datastax.oss.driver.internal.core.util.collection.SimpleQueryPlan;
import com.datastax.oss.protocol.internal.Frame;
import com.datastax.oss.protocol.internal.Message;
Expand Down Expand Up @@ -265,11 +266,13 @@ private void sendRequest(
// We've reached the end of the query plan without finding any node to write to
if (!result.isDone() && activeExecutionsCount.decrementAndGet() == 0) {
// We're the last execution so fail the result
setFinalError(
statement,
AllNodesFailedException.fromErrors(this.errors),
null,
NO_SUCCESSFUL_EXECUTION);
AllNodesFailedException exception;
if (queryPlan instanceof DebugQueryPlan) {
exception = AllNodesFailedException.fromErrors(this.errors, queryPlan);
} else {
exception = AllNodesFailedException.fromErrors(this.errors);
}
setFinalError(statement, exception, null, NO_SUCCESSFUL_EXECUTION);
}
} else {
NodeResponseCallback nodeResponseCallback =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,32 @@
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;

/**
* Thrown when a query failed on all the coordinators it was tried on. This exception may wrap
* multiple errors, that are available either as {@linkplain #getSuppressed() suppressed
* exceptions}, or via {@link #getAllErrors()} where they are grouped by node.
*/
public class AllNodesFailedException extends DriverException {

/** @deprecated Use {@link #fromErrors(List)} instead. */
@NonNull
@Deprecated
public static AllNodesFailedException fromErrors(@Nullable Map<Node, Throwable> errors) {
if (errors == null || errors.isEmpty()) {
return new NoNodeAvailableException();
} else {
return new AllNodesFailedException(groupByNode(errors));
return new AllNodesFailedException(groupByNode(errors), null);
}
}

@NonNull
public static AllNodesFailedException fromErrors(
@Nullable List<Entry<Node, Throwable>> errors, Queue<Node> queryPlan) {
if (errors == null || errors.isEmpty()) {
return new NoNodeAvailableException(queryPlan);
} else {
return new AllNodesFailedException(groupByNode(errors), queryPlan);
}
}

Expand All @@ -59,6 +69,7 @@ public static AllNodesFailedException fromErrors(@Nullable List<Entry<Node, Thro
}

private final Map<Node, List<Throwable>> errors;
private final Queue<Node> queryPlan;

/** @deprecated Use {@link #AllNodesFailedException(String, ExecutionInfo, Iterable)} instead. */
@Deprecated
Expand All @@ -68,6 +79,7 @@ protected AllNodesFailedException(
@NonNull Map<Node, Throwable> errors) {
super(message, executionInfo, null, true);
this.errors = toDeepImmutableMap(groupByNode(errors));
this.queryPlan = null;
addSuppressedErrors();
}

Expand All @@ -77,6 +89,18 @@ protected AllNodesFailedException(
@NonNull Iterable<Entry<Node, List<Throwable>>> errors) {
super(message, executionInfo, null, true);
this.errors = toDeepImmutableMap(errors);
this.queryPlan = null;
addSuppressedErrors();
}

protected AllNodesFailedException(
@NonNull String message,
@Nullable ExecutionInfo executionInfo,
@NonNull Iterable<Entry<Node, List<Throwable>>> errors,
@Nullable Queue<Node> queryPlan) {
super(message, executionInfo, null, true);
this.errors = toDeepImmutableMap(errors);
this.queryPlan = queryPlan;
addSuppressedErrors();
}

Expand All @@ -91,12 +115,26 @@ private void addSuppressedErrors() {
private AllNodesFailedException(Map<Node, List<Throwable>> errors) {
this(
buildMessage(
String.format("All %d node(s) tried for the query failed", errors.size()), errors),
String.format("All %d node(s) tried for the query failed", errors.size()),
errors,
null),
null,
errors.entrySet());
}

private static String buildMessage(String baseMessage, Map<Node, List<Throwable>> errors) {
private AllNodesFailedException(Map<Node, List<Throwable>> errors, Queue<Node> queryPlan) {
this(
buildMessage(
String.format("All %d node(s) tried for the query failed", errors.size()),
errors,
queryPlan),
null,
errors.entrySet(),
queryPlan);
}

private static String buildMessage(
String baseMessage, Map<Node, List<Throwable>> errors, Queue<Node> queryPlan) {
int limit = Math.min(errors.size(), 3);
Iterator<Entry<Node, List<Throwable>>> iterator =
Iterables.limit(errors.entrySet(), limit).iterator();
Expand All @@ -108,9 +146,14 @@ private static String buildMessage(String baseMessage, Map<Node, List<Throwable>
details.append(", ");
}
}
if (queryPlan == null) {
return String.format(
"%s (showing first %d nodes, use getAllErrors() for more): %s",
baseMessage, limit, details);
}
return String.format(
"%s (showing first %d nodes, use getAllErrors() for more): %s",
baseMessage, limit, details);
"%s\nQuery Plan: %s\n(showing first %d nodes, use getAllErrors() for more): %s",
baseMessage, queryPlan, limit, details);
}

/**
Expand All @@ -131,6 +174,10 @@ public Map<Node, Throwable> getErrors() {
return builder.build();
}

protected Queue<Node> getQueryPlan() {
return this.queryPlan;
}

/** An immutable map containing all errors on each tried node. */
@NonNull
public Map<Node, List<Throwable>> getAllErrors() {
Expand All @@ -146,7 +193,7 @@ public DriverException copy() {
@NonNull
public AllNodesFailedException reword(String newMessage) {
return new AllNodesFailedException(
buildMessage(newMessage, errors), getExecutionInfo(), errors.entrySet());
buildMessage(newMessage, errors, queryPlan), getExecutionInfo(), errors.entrySet());
}

private static Map<Node, List<Throwable>> groupByNode(Map<Node, Throwable> errors) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
package com.datastax.oss.driver.api.core;

import com.datastax.oss.driver.api.core.cql.ExecutionInfo;
import com.datastax.oss.driver.api.core.metadata.Node;
import edu.umd.cs.findbugs.annotations.NonNull;
import java.util.Collections;
import java.util.Queue;

/**
* Specialization of {@code AllNodesFailedException} when no coordinators were tried.
Expand All @@ -32,13 +34,25 @@ public NoNodeAvailableException() {
this(null);
}

private NoNodeAvailableException(ExecutionInfo executionInfo) {
super("No node was available to execute the query", executionInfo, Collections.emptySet());
private NoNodeAvailableException(
String message, ExecutionInfo executionInfo, Queue<Node> queryPlan) {
super(message, executionInfo, Collections.emptySet(), queryPlan);
}

public NoNodeAvailableException(Queue<Node> queryPlan) {
this(buildMessage(queryPlan), null, queryPlan);
}

private static String buildMessage(Queue<Node> queryPlan) {
if (queryPlan == null) {
return "No node was available to execute the query";
}
return "No node was available to execute the query. Query Plan: " + queryPlan;
}

@NonNull
@Override
public DriverException copy() {
return new NoNodeAvailableException(getExecutionInfo());
return new NoNodeAvailableException(getMessage(), getExecutionInfo(), getQueryPlan());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ public enum DefaultDriverOption implements DriverOption {
* <p>Value-type: boolean
*/
CONNECTION_WARN_INIT_ERROR("advanced.connection.warn-on-init-error"),
/**
* Provide more details when query execution has failed.
*
* <p>Value-Type: boolean
*/
CONNECTION_QUERY_PLAN_EXCEPTIONS("advanced.connection.detailed-query-plan-exceptions"),
/**
* The number of connections in the LOCAL pool.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ protected static void fillWithDriverDefaults(OptionsMap map) {
map.put(TypedDriverOption.LOAD_BALANCING_DC_FAILOVER_MAX_NODES_PER_REMOTE_DC, 0);
map.put(TypedDriverOption.LOAD_BALANCING_DC_FAILOVER_ALLOW_FOR_LOCAL_CONSISTENCY_LEVELS, false);
map.put(TypedDriverOption.METRICS_GENERATE_AGGREGABLE_HISTOGRAMS, true);
map.put(TypedDriverOption.CONNECTION_QUERY_PLAN_EXCEPTIONS, false);
}

@Immutable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,11 @@ public String toString() {
DefaultDriverOption.LOAD_BALANCING_DC_FAILOVER_ALLOW_FOR_LOCAL_CONSISTENCY_LEVELS,
GenericType.BOOLEAN);

/** TBD. */
public static final TypedDriverOption<Boolean> CONNECTION_QUERY_PLAN_EXCEPTIONS =
new TypedDriverOption<>(
DefaultDriverOption.CONNECTION_QUERY_PLAN_EXCEPTIONS, GenericType.BOOLEAN);

private static Iterable<TypedDriverOption<?>> introspectBuiltInValues() {
try {
ImmutableList.Builder<TypedDriverOption<?>> result = ImmutableList.builder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import com.datastax.oss.driver.internal.core.metadata.NodeStateEvent;
import com.datastax.oss.driver.internal.core.metadata.TopologyEvent;
import com.datastax.oss.driver.internal.core.util.Loggers;
import com.datastax.oss.driver.internal.core.util.collection.DebugQueryPlan;
import com.datastax.oss.driver.internal.core.util.concurrent.CompletableFutures;
import com.datastax.oss.driver.internal.core.util.concurrent.Reconnection;
import com.datastax.oss.driver.internal.core.util.concurrent.RunOrSchedule;
Expand Down Expand Up @@ -357,7 +358,11 @@ private void connect(
assert adminExecutor.inEventLoop();
Node node = nodes.poll();
if (node == null) {
onFailure.accept(AllNodesFailedException.fromErrors(errors));
if (nodes instanceof DebugQueryPlan) {
onFailure.accept(AllNodesFailedException.fromErrors(errors, nodes));
} else {
onFailure.accept(AllNodesFailedException.fromErrors(errors));
}
} else {
LOG.debug("[{}] Trying to establish a connection to {}", logPrefix, node);
context
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import com.datastax.oss.driver.internal.core.context.InternalDriverContext;
import com.datastax.oss.driver.internal.core.session.DefaultSession;
import com.datastax.oss.driver.internal.core.util.Loggers;
import com.datastax.oss.driver.internal.core.util.collection.DebugQueryPlan;
import com.datastax.oss.driver.internal.core.util.concurrent.CompletableFutures;
import com.datastax.oss.protocol.internal.Frame;
import com.datastax.oss.protocol.internal.Message;
Expand Down Expand Up @@ -197,7 +198,11 @@ private void sendRequest(PrepareRequest request, Node node, int retryCount) {
}
}
if (channel == null) {
setFinalError(AllNodesFailedException.fromErrors(this.errors));
if (queryPlan instanceof DebugQueryPlan) {
setFinalError(AllNodesFailedException.fromErrors(this.errors, queryPlan));
} else {
setFinalError(AllNodesFailedException.fromErrors(this.errors));
}
} else {
InitialPrepareCallback initialPrepareCallback =
new InitialPrepareCallback(request, node, channel, retryCount);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
import com.datastax.oss.driver.internal.core.tracker.NoopRequestTracker;
import com.datastax.oss.driver.internal.core.tracker.RequestLogger;
import com.datastax.oss.driver.internal.core.util.Loggers;
import com.datastax.oss.driver.internal.core.util.collection.DebugQueryPlan;
import com.datastax.oss.driver.internal.core.util.collection.SimpleQueryPlan;
import com.datastax.oss.protocol.internal.Frame;
import com.datastax.oss.protocol.internal.Message;
Expand Down Expand Up @@ -385,7 +386,12 @@ private void sendRequest(
// We've reached the end of the query plan without finding any node to write to
if (!result.isDone() && activeExecutionsCount.decrementAndGet() == 0) {
// We're the last execution so fail the result
setFinalError(statement, AllNodesFailedException.fromErrors(this.errors), null, -1);
if (queryPlan instanceof DebugQueryPlan) {
setFinalError(
statement, AllNodesFailedException.fromErrors(this.errors, queryPlan), null, -1);
} else {
setFinalError(statement, AllNodesFailedException.fromErrors(this.errors), null, -1);
}
}
} else {
NodeResponseCallback nodeResponseCallback =
Expand Down
Loading

0 comments on commit 18670c9

Please sign in to comment.