From e92acf92fe2577c13c57aa7d29079f9b7b68e840 Mon Sep 17 00:00:00 2001 From: Eduardo Alonso Date: Tue, 4 Jul 2017 09:02:34 +0200 Subject: [PATCH] Added 'maxClauses' to boolean search (#338) --- CHANGELOG.md | 9 +-- .../search/condition/BooleanCondition.java | 16 +++++ .../cassandra/lucene/builder/BuilderTest.java | 3 +- doc/documentation.rst | 2 + .../search/condition/BooleanCondition.java | 24 +++++--- .../builder/BooleanConditionBuilder.java | 18 +++++- .../condition/BooleanConditionTest.java | 44 +++++++++++++ .../varia/TestBooleanExtremeClauses.java | 61 +++++++++++++++++++ 8 files changed, 164 insertions(+), 13 deletions(-) create mode 100644 testsAT/src/test/java/com/stratio/cassandra/lucene/testsAT/varia/TestBooleanExtremeClauses.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 6730c8268..10b7d1fcb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,13 @@ # Changelog -## 3.0.14.0 (July 03, 2017) +## 3.0.14.1 (upcoming) -* Upgrade to Apache Cassandra 3.0.14 +* Add 'max_clauses' to boolean query -## 3.0.13.1 (upcoming) +## 3.0.14.0 (June 27, 2017) -*Upgrade to Apache Lucene 5.5.4 +* Upgrade to Apache Lucene 5.5.4 +* Upgrade to Apache Cassandra 3.0.14 ## 3.0.13.0 (April 17, 2017) diff --git a/builder/src/main/java/com/stratio/cassandra/lucene/builder/search/condition/BooleanCondition.java b/builder/src/main/java/com/stratio/cassandra/lucene/builder/search/condition/BooleanCondition.java index 29e89a13b..a231de9d8 100644 --- a/builder/src/main/java/com/stratio/cassandra/lucene/builder/search/condition/BooleanCondition.java +++ b/builder/src/main/java/com/stratio/cassandra/lucene/builder/search/condition/BooleanCondition.java @@ -39,6 +39,10 @@ public class BooleanCondition extends Condition { @JsonProperty("not") private List not; + /** The max boolean query clauses. */ + @JsonProperty("max_clauses") + private Integer maxClauses = null; + /** * Returns this with the specified mandatory conditions. * @@ -71,4 +75,16 @@ public BooleanCondition not(Condition... conditions) { not = add(not, conditions); return this; } + + + /** + * Returns this builder with the specified max booleqna query clauses + * + * @param maxClauses teh booleanQuery allowed max clauses + * @return this builder with the specified conditions + */ + public BooleanCondition maxClauses(Integer maxClauses) { + this.maxClauses = maxClauses; + return this; + } } diff --git a/builder/src/test/java/com/stratio/cassandra/lucene/builder/BuilderTest.java b/builder/src/test/java/com/stratio/cassandra/lucene/builder/BuilderTest.java index 23d46289a..4e4d9851d 100644 --- a/builder/src/test/java/com/stratio/cassandra/lucene/builder/BuilderTest.java +++ b/builder/src/test/java/com/stratio/cassandra/lucene/builder/BuilderTest.java @@ -484,6 +484,7 @@ public void testBooleanConditionFull() { .should(match("f3", 3), match("f4", 4)) .not(match("f5", 5), match("f6", 6)) .boost(2) + .maxClauses(10) .build(); String expected = "{\"type\":\"boolean\",\"boost\":2.0," + "\"must\":[" + @@ -495,7 +496,7 @@ public void testBooleanConditionFull() { "],\"not\":[" + "{\"type\":\"match\",\"field\":\"f5\",\"value\":5}," + "{\"type\":\"match\",\"field\":\"f6\",\"value\":6}" + - "]}"; + "],\"max_clauses\":10}"; assertEquals("boolean is wrong", expected, actual); } diff --git a/doc/documentation.rst b/doc/documentation.rst index cbec59526..baa42f1e5 100644 --- a/doc/documentation.rst +++ b/doc/documentation.rst @@ -2446,6 +2446,7 @@ Searches for rows matching boolean combinations of other searches. (, must: [(search,)?] )? (, should: [(search,)?] )? (, not: [(search,)?] )? + (, max_clauses: )? } }'); @@ -2457,6 +2458,7 @@ where: OR … OR search_n - **not**: represents the negation of the disjunction of searches: NOT(search_1 OR search_2 OR … OR search_n) +- **max_clauses**: this is the clauses limit for this query **Example 1:** search for rows where name ends with “a” AND food starts diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/search/condition/BooleanCondition.java b/plugin/src/main/java/com/stratio/cassandra/lucene/search/condition/BooleanCondition.java index 5b2e342e9..ed5662dbd 100644 --- a/plugin/src/main/java/com/stratio/cassandra/lucene/search/condition/BooleanCondition.java +++ b/plugin/src/main/java/com/stratio/cassandra/lucene/search/condition/BooleanCondition.java @@ -30,14 +30,14 @@ import static org.apache.lucene.search.BooleanClause.Occur.*; /** - * A {@link Condition} that matches documents matching boolean combinations of other queries, e.g. {@link - * MatchCondition}s, {@link RangeCondition}s or other {@link BooleanCondition}s. + * A {@link Condition} that matches documents matching boolean combinations of other queries, e.g. {@link MatchCondition}s, {@link RangeCondition}s or other {@link BooleanCondition}s. * * @author Andres de la Pena {@literal } */ public class BooleanCondition extends Condition { protected static final Logger logger = LoggerFactory.getLogger(BooleanCondition.class); + static final Integer DEFAULT_MAX_CLAUSES = 1024; /** The mandatory conditions. */ public final List must; @@ -48,28 +48,36 @@ public class BooleanCondition extends Condition { /** The mandatory not conditions. */ public final List not; + /** The max boolean query clauses. */ + final Integer maxClauses; + /** * Returns a new {@link BooleanCondition} compound by the specified {@link Condition}s. * - * @param boost The boost for this query clause. Documents matching this clause will (in addition to the normal - * weightings) have their score multiplied by {@code boost}. + * @param boost The boost for this query clause. Documents matching this clause will (in addition to the normal weightings) have their score multiplied by {@code boost}. * @param must the mandatory conditions * @param should the optional conditions * @param not the mandatory not conditions + * @param maxClauses teh booleanQuery allowed max clauses */ public BooleanCondition(Float boost, List must, List should, - List not) { + List not, + Integer maxClauses) { super(boost); this.must = must == null ? Collections.EMPTY_LIST : must; this.should = should == null ? Collections.EMPTY_LIST : should; this.not = not == null ? Collections.EMPTY_LIST : not; + this.maxClauses = maxClauses == null ? DEFAULT_MAX_CLAUSES : maxClauses; } /** {@inheritDoc} */ @Override - public BooleanQuery doQuery(Schema schema) { + public synchronized BooleanQuery doQuery(Schema schema) { + int oldMaxClauses= BooleanQuery.getMaxClauseCount(); + BooleanQuery.setMaxClauseCount(maxClauses); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); must.forEach(condition -> builder.add(condition.query(schema), MUST)); should.forEach(condition -> builder.add(condition.query(schema), SHOULD)); @@ -78,7 +86,9 @@ public BooleanQuery doQuery(Schema schema) { logger.warn("Performing resource-intensive pure negation query {}", this); builder.add(new MatchAllDocsQuery(), FILTER); } - return builder.build(); + BooleanQuery out=builder.build(); + BooleanQuery.setMaxClauseCount(oldMaxClauses); + return out; } /** {@inheritDoc} */ diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/search/condition/builder/BooleanConditionBuilder.java b/plugin/src/main/java/com/stratio/cassandra/lucene/search/condition/builder/BooleanConditionBuilder.java index 740732d37..b072f2f69 100644 --- a/plugin/src/main/java/com/stratio/cassandra/lucene/search/condition/builder/BooleanConditionBuilder.java +++ b/plugin/src/main/java/com/stratio/cassandra/lucene/search/condition/builder/BooleanConditionBuilder.java @@ -43,6 +43,10 @@ public class BooleanConditionBuilder extends ConditionBuilder> not = new LinkedList<>(); + /** The max boolean query clauses. */ + @JsonProperty("max_clauses") + protected Integer maxClauses = null; + /** * Returns this builder with the specified mandatory conditions. * @@ -76,6 +80,17 @@ public BooleanConditionBuilder not(ConditionBuilder... builders) { return this; } + /** + * Returns this builder with the specified max booleqna query clauses + * + * @param maxClauses teh booleanQuery allowed max clauses + * @return this builder with the specified conditions + */ + public BooleanConditionBuilder maxClauses(Integer maxClauses) { + this.maxClauses = maxClauses; + return this; + } + /** * Returns the {@link BooleanCondition} represented by this builder. * @@ -86,6 +101,7 @@ public BooleanCondition build() { return new BooleanCondition(boost, must.stream().map(ConditionBuilder::build).collect(toList()), should.stream().map(ConditionBuilder::build).collect(toList()), - not.stream().map(ConditionBuilder::build).collect(toList())); + not.stream().map(ConditionBuilder::build).collect(toList()), + maxClauses); } } diff --git a/plugin/src/test/java/com/stratio/cassandra/lucene/search/condition/BooleanConditionTest.java b/plugin/src/test/java/com/stratio/cassandra/lucene/search/condition/BooleanConditionTest.java index dcceead7d..6efc0c53e 100644 --- a/plugin/src/test/java/com/stratio/cassandra/lucene/search/condition/BooleanConditionTest.java +++ b/plugin/src/test/java/com/stratio/cassandra/lucene/search/condition/BooleanConditionTest.java @@ -22,6 +22,7 @@ import static com.stratio.cassandra.lucene.schema.SchemaBuilders.*; import static com.stratio.cassandra.lucene.search.SearchBuilders.*; +import static com.stratio.cassandra.lucene.search.condition.BooleanCondition.DEFAULT_MAX_CLAUSES; import static org.junit.Assert.*; /** @@ -55,6 +56,7 @@ public void testBuildDefaults() { assertEquals("Must is not set", 0, condition.must.size()); assertEquals("Should is not set", 0, condition.should.size()); assertEquals("Not is not set", 0, condition.not.size()); + assertEquals("Max Clauses is not set", DEFAULT_MAX_CLAUSES, condition.maxClauses); } @Test @@ -118,4 +120,46 @@ public void testToString() { condition.toString()); } + @Test + public void testOverMaxClauses() { + Schema schema = schema().mapper("name1", stringMapper()) + .mapper("name2", stringMapper()) + .mapper("name3", stringMapper()) + .mapper("name4", stringMapper()) + .mapper("name5", stringMapper()) + .mapper("name6", stringMapper()).build(); + BooleanCondition condition = bool().maxClauses(5) + .must(match("name1","value1")) + .must(match("name2","value2")) + .must(match("name3","value3")) + .must(match("name4","value4")) + .must(match("name5","value5")) + .must(match("name6","value6")).build(); + + try { + condition.doQuery(schema); + } catch (org.apache.lucene.search.BooleanQuery.TooManyClauses e) { + assertTrue(true); + return; + } + fail("Creating a booleanQuery with more clauses than limited should throw an Exception"); + } + + @Test + public void testInMaxClauses() { + Schema schema = schema().mapper("name1", stringMapper()) + .mapper("name2", stringMapper()) + .mapper("name3", stringMapper()) + .mapper("name4", stringMapper()) + .mapper("name5", stringMapper()) + .mapper("name6", stringMapper()).build(); + BooleanCondition condition = bool().maxClauses(10) + .must(match("name1","value1")) + .must(match("name2","value2")) + .must(match("name3","value3")) + .must(match("name4","value4")) + .must(match("name5","value5")) + .must(match("name6","value6")).build(); + assertEquals("Query count clauses is wrong", 6, condition.doQuery(schema).clauses().size()); + } } diff --git a/testsAT/src/test/java/com/stratio/cassandra/lucene/testsAT/varia/TestBooleanExtremeClauses.java b/testsAT/src/test/java/com/stratio/cassandra/lucene/testsAT/varia/TestBooleanExtremeClauses.java new file mode 100644 index 000000000..a8e15b3de --- /dev/null +++ b/testsAT/src/test/java/com/stratio/cassandra/lucene/testsAT/varia/TestBooleanExtremeClauses.java @@ -0,0 +1,61 @@ +package com.stratio.cassandra.lucene.testsAT.varia; + +import com.stratio.cassandra.lucene.builder.search.condition.BooleanCondition; +import com.stratio.cassandra.lucene.testsAT.BaseIT; +import com.stratio.cassandra.lucene.testsAT.util.CassandraUtils; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.util.LinkedHashMap; +import java.util.Map; + +import static com.stratio.cassandra.lucene.builder.Builder.all; +import static com.stratio.cassandra.lucene.builder.Builder.bool; +import static com.stratio.cassandra.lucene.builder.Builder.range; + +/** + * @author Eduardo Alonso {@literal } + */ +@RunWith(JUnit4.class) +public class TestBooleanExtremeClauses extends BaseIT { + + private static final int NUM_PARTITIONS = 100; + private static final int NUM_MAX_CLAUSES = 1000; + private static CassandraUtils utils; + + @BeforeClass + public static void before() { + utils = CassandraUtils.builder("stateless_search_skinny") + .withPartitionKey("pk") + .withColumn("pk", "int") + .withColumn("rc", "int") + .build() + .createKeyspace() + .createTable() + .createIndex(); + for (Integer i = 0; i < NUM_PARTITIONS; i++) { + Map data = new LinkedHashMap<>(); + data.put("pk", i.toString()); + data.put("rc", i.toString()); + utils.insert(data); + } + utils.refresh(); + } + + @AfterClass + public static void after() { + CassandraUtils.dropKeyspaceIfNotNull(utils); + } + + @Test + public void testQuery() throws Exception { + BooleanCondition bool= bool().maxClauses(NUM_MAX_CLAUSES + 1); + for (int i=0;i< NUM_MAX_CLAUSES ; i++ ) { + bool.must(range("rc").lower(0).upper(i).includeLower(true).includeUpper(true)); + } + utils.query(bool).check(1); + } +}