Skip to content
This repository has been archived by the owner on May 27, 2020. It is now read-only.

Commit

Permalink
Added 'maxClauses' to boolean search (#338)
Browse files Browse the repository at this point in the history
  • Loading branch information
Eduardo Alonso authored Jul 4, 2017
1 parent fd5ca1c commit e92acf9
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 13 deletions.
9 changes: 5 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# Changelog

## 3.0.14.0 (July 03, 2017)
## 3.0.14.1 (upcoming)

* Upgrade to Apache Cassandra 3.0.14
* Add 'max_clauses' to boolean query

## 3.0.13.1 (upcoming)
## 3.0.14.0 (June 27, 2017)

*Upgrade to Apache Lucene 5.5.4
* Upgrade to Apache Lucene 5.5.4
* Upgrade to Apache Cassandra 3.0.14

## 3.0.13.0 (April 17, 2017)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ public class BooleanCondition extends Condition<BooleanCondition> {
@JsonProperty("not")
private List<Condition> not;

/** The max boolean query clauses. */
@JsonProperty("max_clauses")
private Integer maxClauses = null;

/**
* Returns this with the specified mandatory conditions.
*
Expand Down Expand Up @@ -71,4 +75,16 @@ public BooleanCondition not(Condition... conditions) {
not = add(not, conditions);
return this;
}


/**
* Returns this builder with the specified max booleqna query clauses
*
* @param maxClauses teh booleanQuery allowed max clauses
* @return this builder with the specified conditions
*/
public BooleanCondition maxClauses(Integer maxClauses) {
this.maxClauses = maxClauses;
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,7 @@ public void testBooleanConditionFull() {
.should(match("f3", 3), match("f4", 4))
.not(match("f5", 5), match("f6", 6))
.boost(2)
.maxClauses(10)
.build();
String expected = "{\"type\":\"boolean\",\"boost\":2.0," +
"\"must\":[" +
Expand All @@ -495,7 +496,7 @@ public void testBooleanConditionFull() {
"],\"not\":[" +
"{\"type\":\"match\",\"field\":\"f5\",\"value\":5}," +
"{\"type\":\"match\",\"field\":\"f6\",\"value\":6}" +
"]}";
"],\"max_clauses\":10}";
assertEquals("boolean is wrong", expected, actual);
}

Expand Down
2 changes: 2 additions & 0 deletions doc/documentation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2446,6 +2446,7 @@ Searches for rows matching boolean combinations of other searches.
(, must: [(search,)?] )?
(, should: [(search,)?] )?
(, not: [(search,)?] )?
(, max_clauses: <max_clauses>)?
}
}');
Expand All @@ -2457,6 +2458,7 @@ where:
OROR search_n
- **not**: represents the negation of the disjunction of searches:
NOT(search_1 OR search_2 OROR search_n)
- **max_clauses**: this is the clauses limit for this query
**Example 1:** search for rows where name ends with “a” AND food starts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@
import static org.apache.lucene.search.BooleanClause.Occur.*;

/**
* A {@link Condition} that matches documents matching boolean combinations of other queries, e.g. {@link
* MatchCondition}s, {@link RangeCondition}s or other {@link BooleanCondition}s.
* A {@link Condition} that matches documents matching boolean combinations of other queries, e.g. {@link MatchCondition}s, {@link RangeCondition}s or other {@link BooleanCondition}s.
*
* @author Andres de la Pena {@literal <[email protected]>}
*/
public class BooleanCondition extends Condition {

protected static final Logger logger = LoggerFactory.getLogger(BooleanCondition.class);
static final Integer DEFAULT_MAX_CLAUSES = 1024;

/** The mandatory conditions. */
public final List<Condition> must;
Expand All @@ -48,28 +48,36 @@ public class BooleanCondition extends Condition {
/** The mandatory not conditions. */
public final List<Condition> not;

/** The max boolean query clauses. */
final Integer maxClauses;

/**
* Returns a new {@link BooleanCondition} compound by the specified {@link Condition}s.
*
* @param boost The boost for this query clause. Documents matching this clause will (in addition to the normal
* weightings) have their score multiplied by {@code boost}.
* @param boost The boost for this query clause. Documents matching this clause will (in addition to the normal weightings) have their score multiplied by {@code boost}.
* @param must the mandatory conditions
* @param should the optional conditions
* @param not the mandatory not conditions
* @param maxClauses teh booleanQuery allowed max clauses
*/
public BooleanCondition(Float boost,
List<Condition> must,
List<Condition> should,
List<Condition> not) {
List<Condition> not,
Integer maxClauses) {
super(boost);
this.must = must == null ? Collections.EMPTY_LIST : must;
this.should = should == null ? Collections.EMPTY_LIST : should;
this.not = not == null ? Collections.EMPTY_LIST : not;
this.maxClauses = maxClauses == null ? DEFAULT_MAX_CLAUSES : maxClauses;
}

/** {@inheritDoc} */
@Override
public BooleanQuery doQuery(Schema schema) {
public synchronized BooleanQuery doQuery(Schema schema) {
int oldMaxClauses= BooleanQuery.getMaxClauseCount();
BooleanQuery.setMaxClauseCount(maxClauses);

BooleanQuery.Builder builder = new BooleanQuery.Builder();
must.forEach(condition -> builder.add(condition.query(schema), MUST));
should.forEach(condition -> builder.add(condition.query(schema), SHOULD));
Expand All @@ -78,7 +86,9 @@ public BooleanQuery doQuery(Schema schema) {
logger.warn("Performing resource-intensive pure negation query {}", this);
builder.add(new MatchAllDocsQuery(), FILTER);
}
return builder.build();
BooleanQuery out=builder.build();
BooleanQuery.setMaxClauseCount(oldMaxClauses);
return out;
}

/** {@inheritDoc} */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ public class BooleanConditionBuilder extends ConditionBuilder<BooleanCondition,
@JsonProperty("not")
protected final List<ConditionBuilder<?, ?>> not = new LinkedList<>();

/** The max boolean query clauses. */
@JsonProperty("max_clauses")
protected Integer maxClauses = null;

/**
* Returns this builder with the specified mandatory conditions.
*
Expand Down Expand Up @@ -76,6 +80,17 @@ public BooleanConditionBuilder not(ConditionBuilder<?, ?>... builders) {
return this;
}

/**
* Returns this builder with the specified max booleqna query clauses
*
* @param maxClauses teh booleanQuery allowed max clauses
* @return this builder with the specified conditions
*/
public BooleanConditionBuilder maxClauses(Integer maxClauses) {
this.maxClauses = maxClauses;
return this;
}

/**
* Returns the {@link BooleanCondition} represented by this builder.
*
Expand All @@ -86,6 +101,7 @@ public BooleanCondition build() {
return new BooleanCondition(boost,
must.stream().map(ConditionBuilder::build).collect(toList()),
should.stream().map(ConditionBuilder::build).collect(toList()),
not.stream().map(ConditionBuilder::build).collect(toList()));
not.stream().map(ConditionBuilder::build).collect(toList()),
maxClauses);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import static com.stratio.cassandra.lucene.schema.SchemaBuilders.*;
import static com.stratio.cassandra.lucene.search.SearchBuilders.*;
import static com.stratio.cassandra.lucene.search.condition.BooleanCondition.DEFAULT_MAX_CLAUSES;
import static org.junit.Assert.*;

/**
Expand Down Expand Up @@ -55,6 +56,7 @@ public void testBuildDefaults() {
assertEquals("Must is not set", 0, condition.must.size());
assertEquals("Should is not set", 0, condition.should.size());
assertEquals("Not is not set", 0, condition.not.size());
assertEquals("Max Clauses is not set", DEFAULT_MAX_CLAUSES, condition.maxClauses);
}

@Test
Expand Down Expand Up @@ -118,4 +120,46 @@ public void testToString() {
condition.toString());
}

@Test
public void testOverMaxClauses() {
Schema schema = schema().mapper("name1", stringMapper())
.mapper("name2", stringMapper())
.mapper("name3", stringMapper())
.mapper("name4", stringMapper())
.mapper("name5", stringMapper())
.mapper("name6", stringMapper()).build();
BooleanCondition condition = bool().maxClauses(5)
.must(match("name1","value1"))
.must(match("name2","value2"))
.must(match("name3","value3"))
.must(match("name4","value4"))
.must(match("name5","value5"))
.must(match("name6","value6")).build();

try {
condition.doQuery(schema);
} catch (org.apache.lucene.search.BooleanQuery.TooManyClauses e) {
assertTrue(true);
return;
}
fail("Creating a booleanQuery with more clauses than limited should throw an Exception");
}

@Test
public void testInMaxClauses() {
Schema schema = schema().mapper("name1", stringMapper())
.mapper("name2", stringMapper())
.mapper("name3", stringMapper())
.mapper("name4", stringMapper())
.mapper("name5", stringMapper())
.mapper("name6", stringMapper()).build();
BooleanCondition condition = bool().maxClauses(10)
.must(match("name1","value1"))
.must(match("name2","value2"))
.must(match("name3","value3"))
.must(match("name4","value4"))
.must(match("name5","value5"))
.must(match("name6","value6")).build();
assertEquals("Query count clauses is wrong", 6, condition.doQuery(schema).clauses().size());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package com.stratio.cassandra.lucene.testsAT.varia;

import com.stratio.cassandra.lucene.builder.search.condition.BooleanCondition;
import com.stratio.cassandra.lucene.testsAT.BaseIT;
import com.stratio.cassandra.lucene.testsAT.util.CassandraUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

import java.util.LinkedHashMap;
import java.util.Map;

import static com.stratio.cassandra.lucene.builder.Builder.all;
import static com.stratio.cassandra.lucene.builder.Builder.bool;
import static com.stratio.cassandra.lucene.builder.Builder.range;

/**
* @author Eduardo Alonso {@literal <[email protected]>}
*/
@RunWith(JUnit4.class)
public class TestBooleanExtremeClauses extends BaseIT {

private static final int NUM_PARTITIONS = 100;
private static final int NUM_MAX_CLAUSES = 1000;
private static CassandraUtils utils;

@BeforeClass
public static void before() {
utils = CassandraUtils.builder("stateless_search_skinny")
.withPartitionKey("pk")
.withColumn("pk", "int")
.withColumn("rc", "int")
.build()
.createKeyspace()
.createTable()
.createIndex();
for (Integer i = 0; i < NUM_PARTITIONS; i++) {
Map<String, String> data = new LinkedHashMap<>();
data.put("pk", i.toString());
data.put("rc", i.toString());
utils.insert(data);
}
utils.refresh();
}

@AfterClass
public static void after() {
CassandraUtils.dropKeyspaceIfNotNull(utils);
}

@Test
public void testQuery() throws Exception {
BooleanCondition bool= bool().maxClauses(NUM_MAX_CLAUSES + 1);
for (int i=0;i< NUM_MAX_CLAUSES ; i++ ) {
bool.must(range("rc").lower(0).upper(i).includeLower(true).includeUpper(true));
}
utils.query(bool).check(1);
}
}

0 comments on commit e92acf9

Please sign in to comment.