From d55ccb8b5f064f2c68d7c2283cce2808226708d8 Mon Sep 17 00:00:00 2001 From: Stein Runar Bergheim Date: Tue, 19 Apr 2016 12:51:17 +0200 Subject: [PATCH] Updated geoindexing, settings, geodoc and readme --- README.md | 14 +--- .../java/eu/sdi4apps/ftgeosearch/GeoDoc.java | 32 +++++---- .../java/eu/sdi4apps/ftgeosearch/Indexer.java | 71 ++++++++++++++++--- .../eu/sdi4apps/ftgeosearch/Searcher.java | 71 +++++++++++++++---- .../eu/sdi4apps/openapi/config/Settings.java | 20 ++++++ .../eu/sdi4apps/openapi/servlets/Index.java | 8 +-- .../eu/sdi4apps/openapi/servlets/Search.java | 15 ++-- .../eu/sdi4apps/openapi/utils/HttpParam.java | 4 +- 8 files changed, 175 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index 32607d7..2724ba0 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,8 @@ The SDI4Apps OpenAPI is a set of web services exposed by the SDI4Apps Cloud plat The Open API consists partly of the protocols exposed by the components in the platform, i.e. Open Geospatial Consortium compliant web services like WMS, WFS and CS-W. Other parts of the API implements custom functionality that adds specific search/processing capabilities to the system. The code of these custom services are included in this repository. -## System requirements -* Java SDK -* Maven -* Apache Tomcat -* GDAL/OGR Java SWIG bindings -* Apache Lucene -* PostgreSQL -* PostGIS -* pgRouting +The code base of the project has been ported to this repository in the last week based on the content of two independent previous repositories where the actual development has taken place. -## Installation +# Web Service documentation -Installation will be handled by the platform installation script... \ No newline at end of file +Documentation is currently being added to the site based on auto-generated JavaDocs from the source code. Please refer to the GitHub wiki for usage instructions. \ No newline at end of file diff --git a/src/main/java/eu/sdi4apps/ftgeosearch/GeoDoc.java b/src/main/java/eu/sdi4apps/ftgeosearch/GeoDoc.java index dbe727e..76152e5 100644 --- a/src/main/java/eu/sdi4apps/ftgeosearch/GeoDoc.java +++ b/src/main/java/eu/sdi4apps/ftgeosearch/GeoDoc.java @@ -5,6 +5,7 @@ */ package eu.sdi4apps.ftgeosearch; +import eu.sdi4apps.openapi.config.Settings; import java.util.UUID; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -17,13 +18,12 @@ * @author runarbe */ public class GeoDoc { - + /** - * The relevance score of the search result - * Only populated upon searches + * The relevance score of the search result Only populated upon searches */ public float Score; - + /** * Unique id of document */ @@ -33,7 +33,7 @@ public class GeoDoc { * The layer that the document belongs to */ public String Layer; - + /** * The type of object represented by the document */ @@ -88,6 +88,7 @@ public GeoDoc() { /** * Create a new GeoDoc + * * @param layer * @param fullGeom * @param pointGeom @@ -129,9 +130,9 @@ public static GeoDoc create( } /** - * Add additional values to be indexed but not displayed to the document - * - * @param values + * Add additional values to be indexed but not displayed to the document + * + * @param values */ public void indexValues(String values) { this.IndexAdditional += values; @@ -139,7 +140,8 @@ public void indexValues(String values) { /** * Add custom JSON data object to the document - * @param jsonData + * + * @param jsonData */ public void setJsonData(String jsonData) { this.JsonData = jsonData; @@ -156,17 +158,19 @@ public Document asLuceneDoc() { d.add(new Field("PointGeom", this.PointGeom, Store.YES, Index.NO)); d.add(new Field("DisplayTitle", this.DisplayTitle, Store.YES, Index.NO)); d.add(new Field("DisplayDescription", this.DisplayDescription, Store.YES, Index.NO)); - + Field indexTitle = new Field("IndexTitle", this.IndexTitle, Store.NO, Index.ANALYZED); - indexTitle.setBoost((float)1.2); + indexTitle.setBoost(Settings.TITLEBOOST); d.add(indexTitle); - + Field indexDescription = new Field("IndexDescription", this.IndexDescription, Store.NO, Index.ANALYZED); - indexDescription.setBoost((float)1.1); + indexDescription.setBoost(Settings.DESCRIPTIONBOOST); d.add(indexDescription); d.add(new Field("IndexAdditional", this.IndexAdditional, Store.NO, Index.ANALYZED)); - d.add(new Field("JsonData", Serializer.Serialize(this.JsonData), Store.YES, Index.NO)); + if (this.JsonData != null) { + d.add(new Field("JsonData", Serializer.Serialize(this.JsonData), Store.YES, Index.NO)); + } } catch (Exception e) { System.out.println("Error converting GeoDoc to LuceneDoc: " + e.toString()); } diff --git a/src/main/java/eu/sdi4apps/ftgeosearch/Indexer.java b/src/main/java/eu/sdi4apps/ftgeosearch/Indexer.java index 22449d5..894e020 100644 --- a/src/main/java/eu/sdi4apps/ftgeosearch/Indexer.java +++ b/src/main/java/eu/sdi4apps/ftgeosearch/Indexer.java @@ -1,19 +1,24 @@ package eu.sdi4apps.ftgeosearch; +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.shape.Shape; import eu.sdi4apps.openapi.utils.Logger; -import eu.sdi4apps.ftgeosearch.drivers.ShapefileDriver; import eu.sdi4apps.openapi.config.Settings; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; import java.sql.SQLException; -import static java.util.Arrays.asList; import java.util.LinkedHashMap; -import java.util.List; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; +import org.apache.lucene.spatial.SpatialStrategy; +import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.gdal.ogr.Feature; @@ -35,6 +40,16 @@ public class Indexer { public static IndexWriterConfig indexWriterConfig = null; + public static SpatialContext spatialCtx = null; + + public static SpatialStrategy spatialStrategy = null; + + public static SpatialPrefixTree spatialPrefixTree = null; + + public static int maxSpatialIndexLevels = 11; + + public static int errorCount = 0; + /** * Get the IndexWriter or null * @@ -43,6 +58,12 @@ public class Indexer { public static IndexWriter getWriter() { try { if (indexWriter == null || indexWriter.isOpen() == false) { + spatialCtx = SpatialContext.GEO; + + spatialPrefixTree = new GeohashPrefixTree(spatialCtx, maxSpatialIndexLevels); + + spatialStrategy = new RecursivePrefixTreeStrategy(spatialPrefixTree, "GeoField"); + analyzer = new StandardAnalyzer(); directory = FSDirectory.open(Paths.get(Settings.INDEXDIR)); indexWriterConfig = new IndexWriterConfig(analyzer); @@ -79,15 +100,17 @@ public static void unlockIndex() { /** * Index a layer - * + * * @param lyr * @param qi - * @param w + * @param w */ public static void indexLayer(Layer lyr, QueueItem qi, IndexWriter w) { try { + Indexer.errorCount = 0; + qi.updateIndexingStatus(IndexingStatus.Indexing); Feature f = null; @@ -107,6 +130,10 @@ public static void indexLayer(Layer lyr, QueueItem qi, IndexWriter w) { indexFeature(w, qi, f, titleFieldMap, descriptionFieldMap, indexAdditionalFieldMap, jsonDataFieldMap); + if (Indexer.errorCount >= 50) { + throw new Exception("More than 50 errors occurred, aborting indexing operation"); + } + if (counter % batch == 0 || counter == totalFeatures) { Logger.Log("Processed: " + counter + " items..."); } @@ -146,12 +173,17 @@ public static void indexFeature(IndexWriter w, String compositeId = qi.layer + "-" + f.GetFID(); + String pointWkt = g.PointOnSurface().ExportToWkt(); + + double[] shapeEnvelope = new double[4]; + g.GetEnvelope(shapeEnvelope); + GeoDoc gd = GeoDoc.create( compositeId, qi.layer, qi.objtype, g.ExportToWkt(), - g.PointOnSurface().ExportToWkt(), + pointWkt, titleData[0], descData[0], titleData[1], @@ -160,12 +192,35 @@ public static void indexFeature(IndexWriter w, jsonData); if (gd != null) { - w.updateDocument(new Term("Id", compositeId), gd.asLuceneDoc()); + + /** + * Retrieve Lucene document + */ + Document luceneGeoDoc = gd.asLuceneDoc(); + + /** + * Add spatial indexing for bounding box of objects + */ + for (IndexableField geoField + : spatialStrategy.createIndexableFields( + spatialCtx.makeRectangle(shapeEnvelope[0], + shapeEnvelope[1], + shapeEnvelope[2], + shapeEnvelope[3]))) { + luceneGeoDoc.add(geoField); + } + + /** + * Write the document to the index + */ + w.updateDocument(new Term("Id", compositeId), luceneGeoDoc); } } catch (Exception e) { - Logger.Log("An error occurred while writing feature to Lucene index: " + e.toString()); + Indexer.errorCount++; + Logger.Log("An error occurred while writing feature to Lucene index: " + e.toString() + " (#" + Indexer.errorCount + ")"); } + } /** diff --git a/src/main/java/eu/sdi4apps/ftgeosearch/Searcher.java b/src/main/java/eu/sdi4apps/ftgeosearch/Searcher.java index bccdfc6..01b73cd 100644 --- a/src/main/java/eu/sdi4apps/ftgeosearch/Searcher.java +++ b/src/main/java/eu/sdi4apps/ftgeosearch/Searcher.java @@ -5,6 +5,12 @@ */ package eu.sdi4apps.ftgeosearch; +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.distance.DistanceUtils; +import static eu.sdi4apps.ftgeosearch.Indexer.maxSpatialIndexLevels; +import static eu.sdi4apps.ftgeosearch.Indexer.spatialCtx; +import static eu.sdi4apps.ftgeosearch.Indexer.spatialPrefixTree; +import static eu.sdi4apps.ftgeosearch.Indexer.spatialStrategy; import eu.sdi4apps.openapi.config.Settings; import eu.sdi4apps.openapi.types.BBox; import eu.sdi4apps.openapi.utils.Logger; @@ -12,20 +18,27 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; -import java.util.logging.Level; import org.apache.commons.lang.StringUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.queries.TermFilter; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.spatial.SpatialStrategy; +import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; +import org.apache.lucene.spatial.query.SpatialArgs; +import org.apache.lucene.spatial.query.SpatialOperation; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; @@ -43,12 +56,24 @@ public class Searcher { public static IndexSearcher isearcher = null; + public static SpatialContext spatialCtx = null; + + public static SpatialStrategy spatialStrategy = null; + + public static SpatialPrefixTree spatialPrefixTree = null; + + public static int maxSpatialIndexLevels = 11; + static { createSearcher(); } public static IndexSearcher createSearcher() { try { + spatialCtx = SpatialContext.GEO; + spatialPrefixTree = new GeohashPrefixTree(spatialCtx, maxSpatialIndexLevels); + spatialStrategy = new RecursivePrefixTreeStrategy(spatialPrefixTree, "GeoField"); + analyzer = new StandardAnalyzer(); directory = FSDirectory.open(Paths.get(Settings.INDEXDIR)); ireader = DirectoryReader.open(directory); @@ -84,15 +109,27 @@ public static List Search(String q, String filter, String extent) throws IOException, ParseException { - List sr = new ArrayList<>(); + List searchResults = new ArrayList<>(); try { + /** + * Create searcher if it does not exist + */ + if (isearcher == null) { + createSearcher(); + } + + /** + * Create a boolean query + */ + BooleanQuery combinedQuery = new BooleanQuery(); + /** * Set default number of results to 100 */ if (maxresults == null) { - maxresults = 100; + maxresults = Settings.NUMRESULTS; } /** @@ -105,24 +142,28 @@ public static List Search(String q, filterLayers = null; } + /** + * Create query clause for user specified term(s) + */ + QueryParser parser = new MultiFieldQueryParser(new String[]{"IndexTitle", "IndexDescription", "IndexAdditional"}, analyzer); + Query termQuery = parser.parse(q); + combinedQuery.add(termQuery, Occur.MUST); + /** * Convert string extent to BBox object */ if (extent != null) { BBox bbox = BBox.createFromString(extent); if (bbox != null) { - Logger.Log(bbox.jsArray()); + SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects, + spatialCtx.makeRectangle(bbox.minX, bbox.maxX, bbox.minY, bbox.maxY)); + Query spatialQuery = spatialStrategy.makeQuery(args); + spatialQuery.setBoost(Settings.SPATIALBOOST); + combinedQuery.add(spatialQuery, Occur.SHOULD); } } - if (isearcher == null) { - createSearcher(); - } - - QueryParser parser = new MultiFieldQueryParser(new String[]{"IndexTitle", "IndexDescription", "IndexAdditional"}, analyzer); - Query query = parser.parse(q); - - ScoreDoc[] hits = isearcher.search(query, null, maxresults).scoreDocs; + ScoreDoc[] hits = isearcher.search(combinedQuery, maxresults).scoreDocs; // Iterate through the results: for (int i = 0; i < hits.length; i++) { @@ -137,14 +178,14 @@ public static List Search(String q, g.DisplayDescription = hitDoc.get("DisplayDescription"); g.PointGeom = hitDoc.get("PointGeom"); g.JsonData = Serializer.Deserialize(hitDoc.get("JsonData")); - sr.add(g); + searchResults.add(g); } } catch (Exception e) { Logger.Log("An exception occurred during search: " + e.toString()); } finally { destroySearcher(); - return sr; + return searchResults; } } diff --git a/src/main/java/eu/sdi4apps/openapi/config/Settings.java b/src/main/java/eu/sdi4apps/openapi/config/Settings.java index d48af73..32b9657 100644 --- a/src/main/java/eu/sdi4apps/openapi/config/Settings.java +++ b/src/main/java/eu/sdi4apps/openapi/config/Settings.java @@ -100,6 +100,26 @@ public class Settings { } } } + + /** + * Default number of results to return from a search + */ + public static int NUMRESULTS = 25; + + /** + * Default boost to apply to terms in title (at index time) + */ + public static final float TITLEBOOST = (float) 1.3; + + /** + * Default boost to apply to terms in description (at index time) + */ + public static final float DESCRIPTIONBOOST = (float) 1.2; + + /** + * Default boost to apply to matches within bounding box (at query time) + */ + public static final float SPATIALBOOST = (float) 3; /** * A flag to indicate whether the configuration file has been read properly diff --git a/src/main/java/eu/sdi4apps/openapi/servlets/Index.java b/src/main/java/eu/sdi4apps/openapi/servlets/Index.java index 9d96507..8583461 100644 --- a/src/main/java/eu/sdi4apps/openapi/servlets/Index.java +++ b/src/main/java/eu/sdi4apps/openapi/servlets/Index.java @@ -17,7 +17,6 @@ import eu.sdi4apps.openapi.utils.HttpParam; import java.io.IOException; import java.io.PrintWriter; -import java.nio.file.Paths; import static java.util.Arrays.asList; import java.util.HashMap; import java.util.List; @@ -81,7 +80,7 @@ protected void processRequest(HttpServletRequest request, HttpServletResponse re List descFields3 = asList("field_4", "field_8", "field_18"); String descFieldFormat3 = "Alternative forms: %s. Type of name %s in %s"; QueueItem entry3 = QueueItem.create("Geonames 1000", "names", DatasetType.Shapefile, drv3, titleFields3, titleFieldFormat3, descFields3, descFieldFormat3, null, null, 4326); - IndexerQueue.enqueue(entry3); + IndexerQueue.enqueue(entry3); r.setData("Added two layers to index", true); break; case "EnqueueShapefile": @@ -93,10 +92,7 @@ protected void processRequest(HttpServletRequest request, HttpServletResponse re reqFields.put("descriptionFormat", true); reqFields.put("additionalFields", true); reqFields.put("jsonDataFields", true); - - Map avFields = HttpParam.GetParameters(request, reqFields, r); - - Logger.Log(avFields.toString()); + Map avFields = HttpParam.GetParameters(request, reqFields, r); break; default: r.setError("Unsupported action: " + action); diff --git a/src/main/java/eu/sdi4apps/openapi/servlets/Search.java b/src/main/java/eu/sdi4apps/openapi/servlets/Search.java index 5168ae0..eefd987 100644 --- a/src/main/java/eu/sdi4apps/openapi/servlets/Search.java +++ b/src/main/java/eu/sdi4apps/openapi/servlets/Search.java @@ -55,16 +55,23 @@ protected void processRequest(HttpServletRequest request, HttpServletResponse re m.put("maxresults", false); m.put("extent", false); - Map params = HttpParam.GetParameters(request, m, r); + Map params = HttpParam.GetParameters(request, m, r); if (r.status == "error") { out.println(r.asJson()); return; } + Integer maxResults; + if (params.get("maxresults") != null) { + maxResults = NumberUtils.toInt(params.get("maxresults")); + } else { + maxResults = null; + } + List sres = Searcher.Search((String) params.get("q"), - NumberUtils.toInt((String) params.get("maxresults"), 100), - (String) params.get("filter"), - (String) params.get("extent")); + maxResults, + params.get("filter"), + params.get("extent")); r.setData(sres, true); r.count = sres.size(); diff --git a/src/main/java/eu/sdi4apps/openapi/utils/HttpParam.java b/src/main/java/eu/sdi4apps/openapi/utils/HttpParam.java index d421941..f25df89 100644 --- a/src/main/java/eu/sdi4apps/openapi/utils/HttpParam.java +++ b/src/main/java/eu/sdi4apps/openapi/utils/HttpParam.java @@ -17,9 +17,9 @@ */ public class HttpParam { - public static Map GetParameters(HttpServletRequest request, Map fields, Response r) { + public static Map GetParameters(HttpServletRequest request, Map fields, Response r) { - Map m = new LinkedHashMap(); + Map m = new LinkedHashMap(); for (Map.Entry e : fields.entrySet()) { String key = e.getKey();