diff --git a/builder/src/main/java/com/stratio/cassandra/lucene/builder/index/Partitioner.java b/builder/src/main/java/com/stratio/cassandra/lucene/builder/index/Partitioner.java index 2778c9b88..324b07759 100644 --- a/builder/src/main/java/com/stratio/cassandra/lucene/builder/index/Partitioner.java +++ b/builder/src/main/java/com/stratio/cassandra/lucene/builder/index/Partitioner.java @@ -25,7 +25,9 @@ * An index partitioner to split the index in multiple partitions. * * Index partitioning is useful to speed up some searches to the detriment of others, depending on the implementation. - * It is also useful to overcome the Lucene's hard limit of 2147483519 documents per index. + * + * It is also useful to overcome the Lucene's hard limit of 2147483519 documents per local index. + * However, queries involving partitions with more than 2147483519 total documents will still fail. * * @author Andres de la Pena {@literal } */ @@ -41,10 +43,12 @@ public static class None extends Partitioner { } /** - * {@link Partitioner} based on the Cassandra's partitioning token. + * A {@link Partitioner} based on the partition key token. Partitioning on token guarantees a good load balancing + * between partitions while speeding up partition-directed searches to the detriment of token range searches + * performance. It allows to efficiently run partition directed queries in nodes indexing more than 2147483519 rows. + * However, token range searches in nodes with more than 2147483519 rows will fail. * - * Partitioning on token guarantees a good load balancing between partitions while speeding up partition-directed - * searches to the detriment of token range searches. + * The number of partitions per node should be specified. */ public static class OnToken extends Partitioner { diff --git a/doc/documentation.rst b/doc/documentation.rst index 0dab56321..b0d112340 100644 --- a/doc/documentation.rst +++ b/doc/documentation.rst @@ -601,7 +601,8 @@ Partitioners Lucene indexes can be partitioned on a per-node basis. This means that the local index in each node can be split in multiple smaller fragments. Index partitioning is useful to speed up some searches to the detriment of others, depending on the implementation. It is also useful to overcome the -Lucene's hard limit of 2147483519 documents per local index. +Lucene's hard limit of 2147483519 documents per local index. However, queries involving partitions +with more than 2147483519 total documents will still fail. Partitioning is disabled by default, and it can be activated specifying a partitioner implementation in the index creation statement. @@ -629,8 +630,10 @@ Token partitioner _________________ A partitioner based on the partition key token. Partitioning on token guarantees a good load -balancing between partitions while speeding up partition-directed searches to the detriment of any -other searches. The number of partitions per node should be specified. +balancing between partitions while speeding up partition-directed searches to the detriment of token +range searches performance. It allows to efficiently run partition directed queries in nodes +indexing more than 2147483519 rows. However, token range searches in nodes with more than 2147483519 +rows will fail. The number of partitions per node should be specified. .. code-block:: sql diff --git a/plugin/src/main/scala/com/stratio/cassandra/lucene/partitioning/Partitioner.scala b/plugin/src/main/scala/com/stratio/cassandra/lucene/partitioning/Partitioner.scala index cc060b30f..3a9a6aec9 100644 --- a/plugin/src/main/scala/com/stratio/cassandra/lucene/partitioning/Partitioner.scala +++ b/plugin/src/main/scala/com/stratio/cassandra/lucene/partitioning/Partitioner.scala @@ -24,7 +24,8 @@ import org.apache.cassandra.db.{DecoratedKey, ReadCommand} * Index partitioning is useful to speed up some searches to the detriment of others, depending on * the implementation. * - * It is also useful to overcome the Lucene's hard limit of 2147483519 documents per index. + * It is also useful to overcome the Lucene's hard limit of 2147483519 documents per local index. + * However, queries involving partitions with more than 2147483519 total documents will still fail. * * @author Andres de la Pena `adelapena@stratio.com` */ diff --git a/plugin/src/main/scala/com/stratio/cassandra/lucene/partitioning/PartitionerOnToken.scala b/plugin/src/main/scala/com/stratio/cassandra/lucene/partitioning/PartitionerOnToken.scala index 755fe4b88..8fa1d390f 100644 --- a/plugin/src/main/scala/com/stratio/cassandra/lucene/partitioning/PartitionerOnToken.scala +++ b/plugin/src/main/scala/com/stratio/cassandra/lucene/partitioning/PartitionerOnToken.scala @@ -20,10 +20,12 @@ import com.stratio.cassandra.lucene.IndexException import org.apache.cassandra.db._ import org.apache.cassandra.dht.Token -/** [[Partitioner]] based on the partition key token. +/** [[Partitioner]] partitioner based on the partition key token. * * Partitioning on token guarantees a good load balancing between partitions while speeding up - * partition-directed searches to the detriment of token range searches. + * partition-directed searches to the detriment of token range searches performance. It allows to + * efficiently run partition directed queries in nodes indexing more than 2147483519 rows. However, + * token range searches in nodes with more than 2147483519 rows will fail. * * @param partitions the number of partitions * @author Andres de la Pena `adelapena@stratio.com`