From 3d17e2f63f64f434cb819d1db214ae595bd6c721 Mon Sep 17 00:00:00 2001 From: Jonathan Ellis Date: Wed, 11 Dec 2024 15:25:30 -0600 Subject: [PATCH] add testMatchingAllowed and make it work via shouldMerge --- .../cql3/restrictions/RestrictionSet.java | 25 ++++++++++--------- .../restrictions/SingleColumnRestriction.java | 11 ++++++++ .../cql3/restrictions/SingleRestriction.java | 12 +++++++++ .../cassandra/index/sai/cql/BM25Test.java | 16 ++++++++++++ 4 files changed, 52 insertions(+), 12 deletions(-) diff --git a/src/java/org/apache/cassandra/cql3/restrictions/RestrictionSet.java b/src/java/org/apache/cassandra/cql3/restrictions/RestrictionSet.java index f9dd22756fe0..9d459e1da247 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/RestrictionSet.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/RestrictionSet.java @@ -427,21 +427,22 @@ public void addRestriction(SingleRestriction restriction, boolean isDisjunction) { // ANDed together restrictions against the same columns should be merged. Set existingRestrictions = getRestrictions(newRestrictions, columnDefs); - // Trivial case of no existing restrictions - if (existingRestrictions.isEmpty()) + + // merge the new restriction into an existing one. note that there is only ever a single + // restriction (per column), UNLESS one is ORDER BY BM25 and the other is MATCH. + for (var existing : existingRestrictions) { - addRestrictionForColumns(columnDefs, restriction, null); - return; + // shouldMerge exists for the BM25/MATCH case + if (existing.shouldMerge(restriction)) + { + var merged = existing.mergeWith(restriction); + addRestrictionForColumns(merged.getColumnDefs(), merged, Set.of(existing)); + return; + } } - // Since we merge new restrictions into the existing ones at each pass, there should only be - // at most one existing restriction across the same columnDefs - assert existingRestrictions.size() == 1 : existingRestrictions; - // Perform the merge - SingleRestriction existing = existingRestrictions.iterator().next(); - var merged = existing.mergeWith(restriction); - - addRestrictionForColumns(merged.getColumnDefs(), merged, Set.of(existing)); + // no existing restrictions that we should merge the new one with, add a new one + addRestrictionForColumns(columnDefs, restriction, null); } } diff --git a/src/java/org/apache/cassandra/cql3/restrictions/SingleColumnRestriction.java b/src/java/org/apache/cassandra/cql3/restrictions/SingleColumnRestriction.java index ced7f9d31cdb..5d055fd7fb89 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/SingleColumnRestriction.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/SingleColumnRestriction.java @@ -1237,6 +1237,17 @@ public boolean isIndexBasedOrdering() { return true; } + + @Override + public boolean shouldMerge(SingleRestriction other) + { + // we don't want to merge MATCH restrictions with ORDER BY BM25 + // so shouldMerge = false for that scenario, and true for others + // (because even though we can't meaningfully merge with others, we want doMergeWith to be called to throw) + // + // (Note that because ORDER BY is processed before WHERE, we only need this check in the BM25 class) + return !other.isAnalyzerMatches(); + } } /** diff --git a/src/java/org/apache/cassandra/cql3/restrictions/SingleRestriction.java b/src/java/org/apache/cassandra/cql3/restrictions/SingleRestriction.java index 207e1786a114..bdd80badc0ae 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/SingleRestriction.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/SingleRestriction.java @@ -129,4 +129,16 @@ public default MultiClusteringBuilder appendBoundTo(MultiClusteringBuilder build { return appendTo(builder, options); } + + /** + * @return true if the other restriction should be merged with this one. + * This is NOT for preventing illegal combinations of restrictions, e.g. + * a=1 AND a=2; that is handled by mergeWith. Instead, this is for the case + * where we want two completely different semantics against the same column. + * Currently the only such case is BM25 with MATCH. + */ + default boolean shouldMerge(SingleRestriction other) + { + return true; + } } diff --git a/test/unit/org/apache/cassandra/index/sai/cql/BM25Test.java b/test/unit/org/apache/cassandra/index/sai/cql/BM25Test.java index f38613150a64..3f220527d648 100644 --- a/test/unit/org/apache/cassandra/index/sai/cql/BM25Test.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/BM25Test.java @@ -27,6 +27,22 @@ public class BM25Test extends SAITester { + @Test + public void testMatchingAllowed() throws Throwable + { + // match operator should be allowed with BM25 on the same column + // (seems obvious but exercises a corner case in the internal RestrictionSet processing) + createSimpleTable(); + + execute("INSERT INTO %s (k, v) VALUES (1, 'apple')"); + + beforeAndAfterFlush(() -> + { + var result = execute("SELECT k FROM %s WHERE v : 'apple' ORDER BY v BM25 OF 'apple' LIMIT 3"); + assertRows(result, row(1)); + }); + } + @Test public void testTermFrequencyOrdering() throws Throwable {