Fixed doc.

Stratio · Sep 1, 2016 · 1899a8d · 1899a8d
2 parents f29429f + 180a88f
commit 1899a8d
Show file tree

Hide file tree

Showing 167 changed files with 5,545 additions and 4,998 deletions.
diff --git a/.jenkins.yml b/.jenkins.yml
@@ -0,0 +1,14 @@
+ATSERVICES:
+  - CASSANDRA:
+      image: stratio/cassandra-lucene-index:%%VERSION
+      sleep: 30
+      volumes:
+        - jts:1.14.0
+      env:
+        - LOCAL_JMX=no
+
+ATPARAMETERS: >
+    -Dit.host=%%CASSANDRA
+    -DJACOCO_SERVER=%%CASSANDRA
+    -Dit-embedded=false
+
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,21 @@
 # Changelog
 
+## 3.0.7.3 (Upcoming)
+
+Merged from 3.0.8.2:
+* Allow associativity in search-time geospatial transformations
+* Simplify naming of builder static methods for creating geospatial transformations
+* Fix paged index-sorted queries matching more than 65535 rows
+* Fix partition directed queries using dummy column syntax in skinny tables
+Merged from 3.0.8.1:
+* Upgrade query builder JSON serializer to Jackson 2.8.0
+* Add geospatial post filtering (ensures accuracy with any tree levels)
+* Set default number of indexing threads to number of processors available to the JVM
+* Fix mapping of timestamps and dates by their underlying numeric value (#177)
+
+Merged from 2.2.7.1:
+* Fix mapper referenced by alias in sortFields
+
 ## 3.0.7.2 (July 05, 2016)
 
 * Modernize search syntax keeping backward compatibility

diff --git a/README.rst b/README.rst
@@ -64,7 +64,7 @@ Stratio’s Cassandra Lucene Index and its integration with Lucene search techno
 
 -  Full text search (language-aware analysis, wildcard, fuzzy, regexp)
 -  Boolean search (and, or, not)
--  Sorting by relevance, column value, and distance)
+-  Sorting by relevance, column value, and distance
 -  Geospatial indexing (points, lines, polygons and their multiparts)
 -  Geospatial transformations (bounding box, buffer, centroid, convex hull, union, difference, intersection)
 -  Geospatial operations (intersects, contains, is within)
@@ -145,15 +145,15 @@ We will create the following table to store tweets:
 .. code-block:: sql
 
     CREATE KEYSPACE demo
-    WITH REPLICATION = {'class' : 'SimpleStrategy', 'replication_factor': 1};
+    WITH REPLICATION = {'class': 'SimpleStrategy', 'replication_factor': 1};
     USE demo;
     CREATE TABLE tweets (
-        id INT PRIMARY KEY,
-        user TEXT,
-        body TEXT,
-        time TIMESTAMP,
-        latitude FLOAT,
-        longitude FLOAT
+       id INT PRIMARY KEY,
+       user TEXT,
+       body TEXT,
+       time TIMESTAMP,
+       latitude FLOAT,
+       longitude FLOAT
     );
 
 Now you can create a custom Lucene index on it with the following statement:
@@ -163,16 +163,16 @@ Now you can create a custom Lucene index on it with the following statement:
     CREATE CUSTOM INDEX tweets_index ON tweets ()
     USING 'com.stratio.cassandra.lucene.Index'
     WITH OPTIONS = {
-        'refresh_seconds' : '1',
-        'schema' : '{
-            fields : {
-                id    : {type : "integer"},
-                user  : {type : "string"},
-                body  : {type : "text", analyzer : "english"},
-                time  : {type : "date", pattern : "yyyy/MM/dd"},
-                place : {type : "geo_point", latitude: "latitude", longitude: "longitude"}
-            }
-        }'
+       'refresh_seconds': '1',
+       'schema': '{
+          fields: {
+             id: {type: "integer"},
+             user: {type: "string"},
+             body: {type: "text", analyzer: "english"},
+             time: {type: "date", pattern: "yyyy/MM/dd"},
+             place: {type: "geo_point", latitude: "latitude", longitude: "longitude"}
+          }
+       }'
     };
 
 This will index all the columns in the table with the specified types, and it will be refreshed once per second.
@@ -189,16 +189,16 @@ Now, to search for tweets within a certain date range:
 .. code-block:: sql
 
     SELECT * FROM tweets WHERE expr(tweets_index, '{
-        filter : {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"}
+       filter: {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"}
     }');
 
 The same search can be performed forcing an explicit refresh of the involved index shards:
 
 .. code-block:: sql
 
     SELECT * FROM tweets WHERE expr(tweets_index, '{
-        filter : {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
-        refresh : true
+       filter: {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
+       refresh: true
     }') limit 100;
 
 Now, to search the top 100 more relevant tweets where *body* field contains the phrase “big data gives organizations”
@@ -207,54 +207,64 @@ within the aforementioned date range:
 .. code-block:: sql
 
     SELECT * FROM tweets WHERE expr(tweets_index, '{
-        filter : {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
-        query : {type: "phrase", field: "body", value: "big data gives organizations", slop: 1}
+       filter: {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
+       query: {type: "phrase", field: "body", value: "big data gives organizations", slop: 1}
     }') LIMIT 100;
 
 To refine the search to get only the tweets written by users whose names start with "a":
 
 .. code-block:: sql
 
     SELECT * FROM tweets WHERE expr(tweets_index, '{
-        filter : [ {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
-                   {type: "prefix", field: "user", value: "a"} ],
-        query : {type: "phrase", field: "body", value: "big data gives organizations", slop: 1}
+       filter: [
+          {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
+          {type: "prefix", field: "user", value: "a"}
+       ],
+       query: {type: "phrase", field: "body", value: "big data gives organizations", slop: 1}
     }') LIMIT 100;
 
 To get the 100 more recent filtered results you can use the *sort* option:
 
 .. code-block:: sql
 
     SELECT * FROM tweets WHERE expr(tweets_index, '{
-        filter : [ {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
-                   {type: "prefix", field: "user", value: "a"} ],
-        query : {type: "phrase", field: "body", value: "big data gives organizations", slop: 1},
-        sort : {field: "time", reverse: true}
+       filter: [
+          {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
+          {type: "prefix", field: "user", value: "a"}
+       ],
+       query: {type: "phrase", field: "body", value: "big data gives organizations", slop: 1},
+       sort: {field: "time", reverse: true}
     }') limit 100;
 
 The previous search can be restricted to tweets created close to a geographical position:
 
 .. code-block:: sql
 
     SELECT * FROM tweets WHERE expr(tweets_index, '{
-        filter : [ {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
-                   {type: "prefix", field: "user", value: "a"},
-                   {type: "geo_distance", field: "place", latitude: 40.3930, longitude: -3.7328, max_distance: "10km"} ],
-        query : {type: "phrase", field: "body", value: "big data gives organizations", slop: 1},
-        sort : {field: "time", reverse: true}
+       filter: [
+          {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
+          {type: "prefix", field: "user", value: "a"},
+          {type: "geo_distance", field: "place", latitude: 40.3930, longitude: -3.7328, max_distance: "1km"}
+       ],
+       query: {type: "phrase", field: "body", value: "big data gives organizations", slop: 1},
+       sort: {field: "time", reverse: true}
     }') limit 100;
 
 It is also possible to sort the results by distance to a geographical position:
 
 .. code-block:: sql
 
     SELECT * FROM tweets WHERE expr(tweets_index, '{
-        filter : [ {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
-                   {type: "prefix", field: "user", value: "a"},
-                   {type: "geo_distance", field: "place", latitude: 40.3930, longitude: -3.7328, max_distance: "10km"} ],
-        query : {type: "phrase", field: "body", value: "big data gives organizations", slop: 1},
-        sort : [ {field: "time", reverse: true},
-                 {field: "place", type: "geo_distance", latitude: 40.3930, longitude: -3.7328}]
+       filter: [
+          {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
+          {type: "prefix", field: "user", value: "a"},
+          {type: "geo_distance", field: "place", latitude: 40.3930, longitude: -3.7328, max_distance: "1km"}
+       ],
+       query: {type: "phrase", field: "body", value: "big data gives organizations", slop: 1},
+       sort: [
+          {field: "time", reverse: true},
+          {field: "place", type: "geo_distance", latitude: 40.3930, longitude: -3.7328}
+       ]
     }') limit 100;
 
 Last but not least, you can route any search to a certain token range or partition, in such a way that only a
@@ -263,12 +273,16 @@ subset of the cluster nodes will be hit, saving precious resources:
 .. code-block:: sql
 
     SELECT * FROM tweets WHERE expr(tweets_index, '{
-        filter : [ {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
-                   {type: "prefix", field: "user", value: "a"},
-                   {type: "geo_distance", field: "place", latitude: 40.3930, longitude: -3.7328, max_distance: "10km"} ],
-        query : {type: "phrase", field: "body", value: "big data gives organizations", slop: 1},
-        sort : [ {field: "time", reverse: true},
-                 {field: "place", type: "geo_distance", latitude: 40.3930, longitude: -3.7328}]
+       filter: [
+          {type: "range", field: "time", lower: "2014/04/25", upper: "2014/05/01"},
+          {type: "prefix", field: "user", value: "a"},
+          {type: "geo_distance", field: "place", latitude: 40.3930, longitude: -3.7328, max_distance: "1km"}
+       ],
+       query: {type: "phrase", field: "body", value: "big data gives organizations", slop: 1},
+       sort: [
+          {field: "time", reverse: true},
+          {field: "place", type: "geo_distance", latitude: 40.3930, longitude: -3.7328}
+       ]
     }') AND TOKEN(id) >= TOKEN(0) AND TOKEN(id) < TOKEN(10000000) limit 100;
 
 This last is the basis for `Hadoop, Spark and other MapReduce frameworks support <doc/documentation.rst#spark-and-hadoop>`__.

diff --git a/builder/pom.xml b/builder/pom.xml
@@ -36,9 +36,14 @@
 
     <dependencies>
         <dependency>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-mapper-asl</artifactId>
-            <version>1.9.2</version>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-core</artifactId>
+            <version>2.8.0</version>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>2.8.0</version>
         </dependency>
         <dependency>
             <groupId>junit</groupId>