Skip to content

Commit

Permalink
SAI: Reduce DirectReader object creation (#1011)
Browse files Browse the repository at this point in the history
This performance regression was introduced by
9cabec5. I haven't been able to find
the exact commit responsible for this regression, but essentially, we
switched from `DirectReaders` to the lucene `DirectReader` around
0774345 and
6bd00d1, and that led to a lot of
unnecessary object creation.

In my initial testing, this change appeared to improve numeric query
throughput from `282.03it/s` for 100k queries to `333.15it/s` for 100k
queries. The memory profile also showed far fewer objects created.

Note: the above numbers might have been from variability in my testing.
It would be helpful to test in a more controlled environment. Either
way, based on my understanding of the objects involved, this should
generally produce better results because we'll have fewer objects.

Finally, this change is especially helpful because the `sort then
filter` logic has to initialize the the iterators for search, so it
helps in both execution paths.
  • Loading branch information
michaeljmarshall authored Dec 12, 2024
1 parent 6934b4d commit 80e0d45
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.cassandra.index.sai.disk.v1.bitpack;

import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.cassandra.index.sai.disk.io.IndexInput;
import org.apache.cassandra.index.sai.disk.oldlucene.LuceneCompat;
import org.apache.cassandra.index.sai.disk.v1.LongArray;
Expand All @@ -31,6 +32,7 @@ public abstract class AbstractBlockPackedReader implements LongArray
private final long valueCount;
final byte[] blockBitsPerValue; // package protected for test access
private final SeekingRandomAccessInput input;
private final IntObjectHashMap<LongValues> readers;

private long prevTokenValue = Long.MIN_VALUE;
private long lastIndex; // the last index visited by token -> row ID searches
Expand All @@ -43,6 +45,7 @@ public abstract class AbstractBlockPackedReader implements LongArray
this.valueCount = valueCount;
this.input = new SeekingRandomAccessInput(indexInput);
this.blockBitsPerValue = blockBitsPerValue;
this.readers = new IntObjectHashMap<>();
// start searching tokens from current index segment
this.lastIndex = sstableRowId;
}
Expand All @@ -59,9 +62,19 @@ public long get(final long index)

final int block = (int) (index >>> blockShift);
final int idx = (int) (index & blockMask);
final LongValues subReader = blockBitsPerValue[block] == 0 ? LongValues.ZEROES
: LuceneCompat.directReaderGetInstance(input, blockBitsPerValue[block], blockOffsetAt(block));
return delta(block, idx) + subReader.get(idx);
return delta(block, idx) + getReader(block).get(idx);
}

private LongValues getReader(int block)
{
LongValues reader = readers.get(block);
if (reader == null)
{
reader = blockBitsPerValue[block] == 0 ? LongValues.ZEROES
: LuceneCompat.directReaderGetInstance(input, blockBitsPerValue[block], blockOffsetAt(block));
readers.put(block, reader);
}
return reader;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,10 @@ public int readLeaf(long filePointer,
final byte[] scratchPackedValue1 = new byte[packedBytesLength];

final SeekingRandomAccessInput randoInput = new SeekingRandomAccessInput(bkdInput);
LongValues orderMapReader = LuceneCompat.directReaderGetInstance(randoInput, bitsPerValue, orderMapPointer);
for (int x = 0; x < count; x++)
{
LongValues orderMapReader = LuceneCompat.directReaderGetInstance(randoInput, LuceneCompat.directWriterUnsignedBitsRequired(randoInput.order(), maxPointsInLeafNode - 1), orderMapPointer);
final short idx = (short) LeafOrderMap.getValue(x, orderMapReader);
final short idx = LeafOrderMap.getValue(x, orderMapReader);
origIndex[x] = idx;
}

Expand Down Expand Up @@ -343,6 +343,7 @@ class Intersection
final QueryContext context;

final IndexInput bkdInput;
final SeekingRandomAccessInput bkdRandomInput;
final IndexInput postingsInput;
final IndexInput postingsSummaryInput;
final IndexTree index;
Expand All @@ -352,6 +353,7 @@ class Intersection
IndexTree index, QueryEventListener.BKDIndexEventListener listener, QueryContext context)
{
this.bkdInput = bkdInput;
this.bkdRandomInput = new SeekingRandomAccessInput(bkdInput);
this.postingsInput = postingsInput;
this.postingsSummaryInput = postingsSummaryInput;
this.index = index;
Expand Down Expand Up @@ -670,11 +672,10 @@ void filterLeaf(Collection<PostingList> postingLists) throws IOException

final long orderMapPointer = bkdInput.getFilePointer();

final SeekingRandomAccessInput randoInput = new SeekingRandomAccessInput(bkdInput);
LongValues orderMapReader = LuceneCompat.directReaderGetInstance(bkdRandomInput, bitsPerValue, orderMapPointer);
for (int x = 0; x < count; x++)
{
LongValues orderMapReader = LuceneCompat.directReaderGetInstance(randoInput, LuceneCompat.directWriterUnsignedBitsRequired(randoInput.order(), maxPointsInLeafNode - 1), orderMapPointer);
origIndex[x] = (short) orderMapReader.get(x);
origIndex[x] = LeafOrderMap.getValue(x, orderMapReader);
}

// seek beyond the ordermap
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,21 @@

public class LeafOrderMap
{
public static int getValue(int index, LongValues reader)
/**
* Get the value at the given index from the reader, and cast it to a short. If the value is too large to fit in a
* short, an ArithmeticException is thrown.
* @param index the index to read from
* @param reader the reader to read from
* @return the value at the given index, cast to a short
*/
public static short getValue(int index, LongValues reader)
{
return Math.toIntExact(reader.get(index));
var value = reader.get(index);
var result = (short) value;
if (result != value) {
throw new ArithmeticException("short overflow");
}
return result;
}

public static void write(ByteOrder order, final int[] array, int length, int maxValue, final DataOutput out) throws IOException
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import org.agrona.collections.IntArrayList;
import org.apache.cassandra.index.sai.disk.io.IndexInputReader;
import org.apache.cassandra.index.sai.disk.oldlucene.LuceneCompat;
import org.apache.cassandra.index.sai.utils.SAICodecUtils;
import org.apache.cassandra.io.util.FileHandle;
import org.apache.cassandra.io.util.FileUtils;
Expand All @@ -31,6 +32,7 @@
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.MathUtil;
import org.apache.lucene.util.packed.DirectWriter;

/**
* Base reader for a block KD-tree previously written with {@link BKDWriter}.
Expand All @@ -50,6 +52,7 @@ public class TraversingBKDReader implements Closeable
final int leafNodeOffset;
final int numDims;
final int maxPointsInLeafNode;
final int bitsPerValue;
final int packedBytesLength;

@SuppressWarnings("resource")
Expand All @@ -64,6 +67,7 @@ public class TraversingBKDReader implements Closeable

numDims = in.readVInt();
maxPointsInLeafNode = in.readVInt();
bitsPerValue = LuceneCompat.directWriterUnsignedBitsRequired(in.order(), maxPointsInLeafNode - 1);
bytesPerDim = in.readVInt();
packedBytesLength = numDims * bytesPerDim;

Expand Down

0 comments on commit 80e0d45

Please sign in to comment.