Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cndb 10313 - draft PR! WIP #1473

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/java/org/apache/cassandra/index/sai/IndexContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
import org.apache.cassandra.index.sai.metrics.IndexMetrics;
import org.apache.cassandra.index.sai.plan.Expression;
import org.apache.cassandra.index.sai.plan.Orderer;
import org.apache.cassandra.index.sai.utils.IPv6v4ComparisonSupport;
import org.apache.cassandra.index.sai.utils.PrimaryKey;
import org.apache.cassandra.index.sai.utils.PrimaryKeyWithSortKey;
import org.apache.cassandra.index.sai.utils.TypeUtil;
Expand All @@ -95,6 +96,7 @@
import org.apache.cassandra.utils.concurrent.OpOrder;

import static org.apache.cassandra.config.CassandraRelevantProperties.VALIDATE_MAX_TERM_SIZE_AT_COORDINATOR;
import static org.apache.cassandra.index.sai.utils.IPv6v4ComparisonSupport.IP_COMPARISON_OPTION;

/**
* Manage metadata for each column index.
Expand Down Expand Up @@ -152,6 +154,8 @@ public class IndexContext

private final int maxTermSize;

private final boolean equalV4V6IPs;

public IndexContext(@Nonnull String keyspace,
@Nonnull String table,
@Nonnull TableId tableId,
Expand Down Expand Up @@ -191,6 +195,7 @@ public IndexContext(@Nonnull String keyspace,
: this.analyzerFactory;
this.vectorSimilarityFunction = indexWriterConfig.getSimilarityFunction();
this.hasEuclideanSimilarityFunc = vectorSimilarityFunction == VectorSimilarityFunction.EUCLIDEAN;
this.equalV4V6IPs = Boolean.parseBoolean(config.options.getOrDefault(IP_COMPARISON_OPTION, String.valueOf(IPv6v4ComparisonSupport.IP_COMPARISON_OPTION_DEFAULT)));
}
else
{
Expand All @@ -200,6 +205,7 @@ public IndexContext(@Nonnull String keyspace,
this.queryAnalyzerFactory = this.analyzerFactory;
this.vectorSimilarityFunction = null;
this.hasEuclideanSimilarityFunc = false;
this.equalV4V6IPs = IPv6v4ComparisonSupport.IP_COMPARISON_OPTION_DEFAULT;
}

this.maxTermSize = isVector() ? MAX_VECTOR_TERM_SIZE
Expand Down Expand Up @@ -614,6 +620,11 @@ public int getIntOption(String name, int defaultValue)
}
}

public boolean getIPComparisonOption()
{
return equalV4V6IPs;
}

public AbstractAnalyzer.AnalyzerFactory getAnalyzerFactory()
{
return analyzerFactory;
Expand Down
2 changes: 1 addition & 1 deletion src/java/org/apache/cassandra/index/sai/QueryContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
@NotThreadSafe
public class QueryContext
{
private static final boolean DISABLE_TIMEOUT = Boolean.getBoolean("cassandra.sai.test.disable.timeout");
private static final boolean DISABLE_TIMEOUT = true;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just helping myself with debugging, to be reverted


protected final long queryStartTimeNanos;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import com.google.common.collect.ImmutableSet;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import org.apache.cassandra.db.marshal.InetAddressType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -97,6 +98,7 @@
import org.apache.cassandra.index.sai.disk.StorageAttachedIndexWriter;
import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
import org.apache.cassandra.index.sai.disk.v1.IndexWriterConfig;
import org.apache.cassandra.index.sai.utils.IPv6v4ComparisonSupport;
import org.apache.cassandra.index.sai.utils.TypeUtil;
import org.apache.cassandra.index.sai.view.View;
import org.apache.cassandra.index.transactions.IndexTransaction;
Expand All @@ -113,6 +115,7 @@

import static org.apache.cassandra.config.CassandraRelevantProperties.SAI_VALIDATE_TERMS_AT_COORDINATOR;
import static org.apache.cassandra.index.sai.disk.v1.IndexWriterConfig.MAX_TOP_K;
import static org.apache.cassandra.index.sai.utils.IPv6v4ComparisonSupport.IP_COMPARISON_OPTION;

public class StorageAttachedIndex implements Index
{
Expand Down Expand Up @@ -217,7 +220,8 @@ public List<SecondaryIndexBuilder> getParallelIndexBuildTasks(ColumnFamilyStore
IndexWriterConfig.OPTIMIZE_FOR,
LuceneAnalyzer.INDEX_ANALYZER,
LuceneAnalyzer.QUERY_ANALYZER,
AnalyzerEqOperatorSupport.OPTION);
AnalyzerEqOperatorSupport.OPTION,
IP_COMPARISON_OPTION);

// this does not include vectors because each Vector declaration is a separate type instance
public static final Set<CQL3Type> SUPPORTED_TYPES = ImmutableSet.of(CQL3Type.Native.ASCII, CQL3Type.Native.BIGINT, CQL3Type.Native.DATE,
Expand Down Expand Up @@ -305,6 +309,8 @@ public static Map<String, String> validateOptions(Map<String, String> options, T
throw new InvalidRequestException("Failed to retrieve target column for: " + targetColumn);
}

IPv6v4ComparisonSupport.fromMap(options, target.left.type);

// In order to support different index target on non-frozen map, ie. KEYS, VALUE, ENTRIES, we need to put index
// name as part of index file name instead of column name. We only need to check that the target is different
// between indexes. This will only allow indexes in the same column with a different IndexTarget.Type.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package org.apache.cassandra.index.sai.memory;

import java.net.UnknownHostException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -277,7 +278,8 @@ public CloseableIterator<PrimaryKeyWithSortKey> orderResultsBy(QueryContext cont

// We do two kinds of encoding... it'd be great to make this more straight forward, but this is what
// we have for now. I leave it to the reader to inspect the two methods to see the nuanced differences.
var encoding = encode(TypeUtil.encode(cell.buffer(), validator));
ByteComparable encoding = null;
encoding = encode(TypeUtil.encode(cell.buffer(), validator));
return new PrimaryKeyWithByteComparable(indexContext, memtable, key, encoding);
},
Runnables.doNothing()
Expand Down
7 changes: 4 additions & 3 deletions src/java/org/apache/cassandra/index/sai/plan/Expression.java
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ public Expression add(Operator op, ByteBuffer value)
// VSTODO seems like we could optimize for CompositeType here since we know we have a key match
public boolean isSatisfiedBy(ByteBuffer columnValue)
{
boolean equalV4V6IPs = context.getIPComparisonOption();
if (columnValue == null)
return false;

Expand Down Expand Up @@ -301,7 +302,7 @@ public boolean isSatisfiedBy(ByteBuffer columnValue)
else
{
// range or (not-)equals - (mainly) for numeric values
int cmp = TypeUtil.comparePostFilter(lower.value, value, validator);
int cmp = TypeUtil.comparePostFilter(lower.value, value, validator, equalV4V6IPs);

// in case of (NOT_)EQ lower == upper
if (operation == Op.EQ || operation == Op.CONTAINS_KEY || operation == Op.CONTAINS_VALUE)
Expand All @@ -326,7 +327,7 @@ public boolean isSatisfiedBy(ByteBuffer columnValue)
else
{
// range - mainly for numeric values
int cmp = TypeUtil.comparePostFilter(upper.value, value, validator);
int cmp = TypeUtil.comparePostFilter(upper.value, value, validator, equalV4V6IPs);
if (cmp < 0 || (cmp == 0 && !upperInclusive))
return false;
}
Expand All @@ -337,7 +338,7 @@ public boolean isSatisfiedBy(ByteBuffer columnValue)
for (ByteBuffer term : exclusions)
{
if (TypeUtil.isLiteral(validator) && validateStringValue(value.raw, term) ||
TypeUtil.comparePostFilter(new Value(term, validator), value, validator) == 0)
TypeUtil.comparePostFilter(new Value(term, validator), value, validator, equalV4V6IPs) == 0)
return false;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Copyright DataStax, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.cassandra.index.sai.utils;

import java.util.Map;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;

import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.db.marshal.InetAddressType;
import org.apache.cassandra.exceptions.InvalidRequestException;

/**
* Index config property for defining the behaviour of the comparison between IPv4 and IPv6 addresses when the index is used.
* </p>
* The inet type in CQL allows ipv4 and ipv6 address formats to mix in the same column, and while there is a standard
* conversion between v4 and v6, equivalent addresses are not considered equal by Cassandra, when it comes to their usage
* in keys or in filtering queries. However, in SAI queries, equivalent v4 and v6 addresses ARE considered equal. While
* it can be considered a bug that SAI does not respect the inet type's semantics, it is also a feature that can be useful.
* However, for backwards compatibility reasons, we should allow users to let SAI queries behave same as filtering queries
* through this index config property.
*/
public class IPv6v4ComparisonSupport
{
public static final String IP_COMPARISON_OPTION = "compare_v4_to_v6_as_equal";
public static final boolean IP_COMPARISON_OPTION_DEFAULT = false;

@VisibleForTesting
public static final String NOT_IP_ERROR = "The '" + IP_COMPARISON_OPTION + "' index option can only be used with the ip type.";

@VisibleForTesting
static final String WRONG_OPTION_ERROR = "Illegal value for boolean option '" +
IPv6v4ComparisonSupport.IP_COMPARISON_OPTION + "': ";

public static void fromMap(Map<String, String> options, AbstractType<?> type)
{
if (!options.containsKey(IP_COMPARISON_OPTION))
return;

if (!(type instanceof InetAddressType))
throw new InvalidRequestException(NOT_IP_ERROR);

String value = options.get(IP_COMPARISON_OPTION).toUpperCase();

if (Strings.isNullOrEmpty(value))
throw new InvalidRequestException(WRONG_OPTION_ERROR + value);

validateBoolean(value);
}

private static void validateBoolean(String value)
{
if (!value.equalsIgnoreCase(Boolean.TRUE.toString()) && !value.equalsIgnoreCase(Boolean.FALSE.toString()))
throw new InvalidRequestException(WRONG_OPTION_ERROR + value);
}
}
12 changes: 7 additions & 5 deletions src/java/org/apache/cassandra/index/sai/utils/TypeUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -319,15 +319,17 @@ else if (useFastByteOperations(type, version))

/**
* This is used for value comparison in post-filtering - {@link Expression#isSatisfiedBy(ByteBuffer)}.
*
* <p>
* This allows types to decide whether they should be compared based on their encoded value or their
* raw value. At present only {@link InetAddressType} values are compared by their encoded values to
* allow for ipv4 -> ipv6 equivalency in searches.
* raw value.
*/
public static int comparePostFilter(Expression.Value requestedValue, Expression.Value columnValue, AbstractType<?> type)
public static int comparePostFilter(Expression.Value requestedValue, Expression.Value columnValue, AbstractType<?> type, boolean equalV4V6IPs)
{
if (isInetAddress(type))
return compareInet(requestedValue.encoded, columnValue.encoded);
if (equalV4V6IPs)
return compareInet(requestedValue.encoded, columnValue.encoded);
else
return InetAddressType.instance.compareForCQL(requestedValue.raw, columnValue.raw);
// Override comparisons for frozen collections
else if (isFrozen(type))
return FastByteOperations.compareUnsigned(requestedValue.raw, columnValue.raw);
Expand Down
Loading