From e657521c9d12fd958f9b9b6e3cc77aff490642c7 Mon Sep 17 00:00:00 2001 From: jlinn Date: Sun, 8 Nov 2015 17:04:10 -0800 Subject: [PATCH] Update to ES v2.0.0 --- .travis.yml | 4 ++- README.md | 3 +- pom.xml | 8 ++--- .../elasticsearch/index/analysis/URLPart.java | 4 +-- .../index/analysis/URLTokenizerFactory.java | 9 +++-- .../index/analysis/url/URLTokenFilter.java | 2 +- .../index/analysis/url/URLTokenizer.java | 36 ++++++++++++------- .../plugin/analysis/AnalysisURLPlugin.java | 4 +-- .../analysis/url/URLAnalysisTestCase.java | 25 ++++++++----- .../url/URLTokenFilterIntegrationTest.java | 3 +- .../index/analysis/url/URLTokenizerTest.java | 13 ++++--- 11 files changed, 66 insertions(+), 45 deletions(-) diff --git a/.travis.yml b/.travis.yml index e923ff1..781728c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,4 +2,6 @@ language: java jdk: - oraclejdk7 - - oraclejdk8 \ No newline at end of file + - oraclejdk8 + +sudo: false \ No newline at end of file diff --git a/README.md b/README.md index 911ba40..479f445 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ This plugin enables URL tokenization and token filtering by URL part. | Elasticsearch Version | Plugin Version | |-----------------------|----------------| +| 2.0.0 | 2.1.0 | | 1.6.x, 1.7.x | 2.0.0 | | 1.6.0 | 1.2.1 | | 1.5.2 | 1.1.0 | @@ -16,7 +17,7 @@ This plugin enables URL tokenization and token filtering by URL part. ## Installation ```bash -bin/plugin --install analysis-url --url https://github.com/jlinn/elasticsearch-analysis-url/releases/download/v2.0.0/elasticsearch-analysis-url-2.0.0.zip +bin/plugin install analysis-url --url https://github.com/jlinn/elasticsearch-analysis-url/releases/download/v2.1.0/elasticsearch-analysis-url-2.1.0.zip ``` ## Usage diff --git a/pom.xml b/pom.xml index 1e33d5f..14eb8bd 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.elasticsearch elasticsearch-analysis-url - 2.0.0 + 2.1.0 jar Elasticsearch URL token filter plugin @@ -18,8 +18,8 @@ UTF-8 - 1.7.1 - 4.10.4 + 2.0.0 + 5.2.1 1.3 onerror true @@ -69,7 +69,7 @@ org.hamcrest - hamcrest-all + hamcrest-library ${hamcrest.version} test diff --git a/src/main/java/org/elasticsearch/index/analysis/URLPart.java b/src/main/java/org/elasticsearch/index/analysis/URLPart.java index 07a1830..342565a 100644 --- a/src/main/java/org/elasticsearch/index/analysis/URLPart.java +++ b/src/main/java/org/elasticsearch/index/analysis/URLPart.java @@ -1,7 +1,5 @@ package org.elasticsearch.index.analysis; -import org.elasticsearch.ElasticsearchIllegalArgumentException; - /** * Joe Linn * 1/17/2015 @@ -21,6 +19,6 @@ public static URLPart fromString(String part) { return urlPart; } } - throw new ElasticsearchIllegalArgumentException(String.format("Unrecognized URL part: %s", part)); + throw new IllegalArgumentException(String.format("Unrecognized URL part: %s", part)); } } diff --git a/src/main/java/org/elasticsearch/index/analysis/URLTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/URLTokenizerFactory.java index e51e1aa..6ac0dd7 100644 --- a/src/main/java/org/elasticsearch/index/analysis/URLTokenizerFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/URLTokenizerFactory.java @@ -1,7 +1,7 @@ package org.elasticsearch.index.analysis; +import com.google.common.base.Strings; import org.apache.lucene.analysis.Tokenizer; -import org.elasticsearch.common.base.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -9,8 +9,6 @@ import org.elasticsearch.index.analysis.url.URLTokenizer; import org.elasticsearch.index.settings.IndexSettings; -import java.io.Reader; - /** * Joe Linn * 8/1/2015 @@ -40,9 +38,10 @@ public URLTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @ this.allowMalformed = settings.getAsBoolean("allow_malformed", false); } + @Override - public Tokenizer create(Reader reader) { - URLTokenizer tokenizer = new URLTokenizer(reader); + public Tokenizer create() { + URLTokenizer tokenizer = new URLTokenizer(); tokenizer.setPart(part); tokenizer.setUrlDecode(urlDecode); tokenizer.setTokenizeHost(tokenizeHost); diff --git a/src/main/java/org/elasticsearch/index/analysis/url/URLTokenFilter.java b/src/main/java/org/elasticsearch/index/analysis/url/URLTokenFilter.java index 11ab8cd..98f41f7 100644 --- a/src/main/java/org/elasticsearch/index/analysis/url/URLTokenFilter.java +++ b/src/main/java/org/elasticsearch/index/analysis/url/URLTokenFilter.java @@ -1,9 +1,9 @@ package org.elasticsearch.index.analysis.url; +import com.google.common.base.Strings; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.elasticsearch.common.base.Strings; import org.elasticsearch.index.analysis.URLPart; import java.io.IOException; diff --git a/src/main/java/org/elasticsearch/index/analysis/url/URLTokenizer.java b/src/main/java/org/elasticsearch/index/analysis/url/URLTokenizer.java index 0f170c7..7dda465 100644 --- a/src/main/java/org/elasticsearch/index/analysis/url/URLTokenizer.java +++ b/src/main/java/org/elasticsearch/index/analysis/url/URLTokenizer.java @@ -1,5 +1,8 @@ package org.elasticsearch.index.analysis.url; +import com.google.common.base.Strings; +import com.google.common.collect.ImmutableList; +import com.google.common.net.InetAddresses; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.path.PathHierarchyTokenizer; import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer; @@ -8,9 +11,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeFactory; -import org.elasticsearch.common.base.Strings; -import org.elasticsearch.common.collect.ImmutableList; -import org.elasticsearch.common.net.InetAddresses; import org.elasticsearch.index.analysis.URLPart; import java.io.IOException; @@ -73,18 +73,17 @@ public final class URLTokenizer extends Tokenizer { private Iterator iterator; - public URLTokenizer(Reader input) { - super(input); + public URLTokenizer() { + } - public URLTokenizer(Reader input, URLPart part) { - this(input); + public URLTokenizer(URLPart part) { this.part = part; } - public URLTokenizer(AttributeFactory factory, Reader input) { - super(factory, input); + public URLTokenizer(AttributeFactory factory) { + super(factory); } @@ -206,7 +205,7 @@ private List tokenize(URL url, URLPart part) throws IOException { end = getEndIndex(start, partStringRaw); return ImmutableList.of(new Token(partString, part, start, end)); } - return tokenize(part, new ReversePathHierarchyTokenizer(new StringReader(partString), '.', '.'), start); + return tokenize(part, addReader(new ReversePathHierarchyTokenizer('.', '.'), new StringReader(partString)), start); case PORT: String port = getPort(url); start = url.toString().indexOf(":" + port); @@ -225,14 +224,14 @@ private List tokenize(URL url, URLPart part) throws IOException { end = getEndIndex(start, partStringRaw); return ImmutableList.of(new Token(partString, part, start, end)); } - return tokenize(part, new PathHierarchyTokenizer(new StringReader(partString), '/', '/'), start); + return tokenize(part, addReader(new PathHierarchyTokenizer('/', '/'), new StringReader(partString)), start); case QUERY: start = getStartIndex(url, partStringRaw); if (!tokenizeQuery) { end = getEndIndex(start, partStringRaw); return ImmutableList.of(new Token(partString, part, start, end)); } - return tokenize(part, new PatternTokenizer(new StringReader(partString), QUERY_SEPARATOR, -1), start); + return tokenize(part, addReader(new PatternTokenizer(QUERY_SEPARATOR, -1), new StringReader(partString)), start); case PROTOCOL: case WHOLE: end = partString.length(); @@ -246,6 +245,19 @@ private List tokenize(URL url, URLPart part) throws IOException { } + /** + * Set the given reader on the given tokenizer + * @param tokenizer tokenizer on which the reader is to be set + * @param input the reader to set + * @return the given tokenizer with the given reader set + * @throws IOException + */ + private Tokenizer addReader(Tokenizer tokenizer, Reader input) throws IOException { + tokenizer.setReader(input); + return tokenizer; + } + + /** * Get the start index of the given string in the given url * @param url the url diff --git a/src/main/java/org/elasticsearch/plugin/analysis/AnalysisURLPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/AnalysisURLPlugin.java index a7b3b4c..46d64a2 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/AnalysisURLPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/AnalysisURLPlugin.java @@ -2,13 +2,13 @@ import org.elasticsearch.index.analysis.AnalysisModule; import org.elasticsearch.index.analysis.URLTokenAnalysisBinderProcessor; -import org.elasticsearch.plugins.AbstractPlugin; +import org.elasticsearch.plugins.Plugin; /** * Joe Linn * 1/17/2015 */ -public class AnalysisURLPlugin extends AbstractPlugin { +public class AnalysisURLPlugin extends Plugin { /** * The name of the plugin. */ diff --git a/src/test/java/org/elasticsearch/index/analysis/url/URLAnalysisTestCase.java b/src/test/java/org/elasticsearch/index/analysis/url/URLAnalysisTestCase.java index 6bce73c..b93dba9 100644 --- a/src/test/java/org/elasticsearch/index/analysis/url/URLAnalysisTestCase.java +++ b/src/test/java/org/elasticsearch/index/analysis/url/URLAnalysisTestCase.java @@ -1,8 +1,10 @@ package org.elasticsearch.index.analysis.url; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; -import org.elasticsearch.common.io.Streams; -import org.elasticsearch.test.ElasticsearchSingleNodeTest; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.plugin.analysis.AnalysisURLPlugin; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.StreamsUtils; import org.junit.Before; import java.util.List; @@ -11,9 +13,18 @@ * Joe Linn * 8/1/2015 */ -public abstract class URLAnalysisTestCase extends ElasticsearchSingleNodeTest { +public abstract class URLAnalysisTestCase extends ESIntegTestCase { protected static final String INDEX = "url_token_filter"; + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put("plugin.types", AnalysisURLPlugin.class.getName()) + .build(); + } + /** * For subclasses to override. Overrides must call {@code super.setUp()}. */ @@ -21,17 +32,13 @@ public abstract class URLAnalysisTestCase extends ElasticsearchSingleNodeTest { @Override public void setUp() throws Exception { super.setUp(); - String settings = Streams.copyToStringFromClasspath("/test-settings.json"); - String mapping = Streams.copyToStringFromClasspath("/test-mapping.json"); + String settings = StreamsUtils.copyToStringFromClasspath("/test-settings.json"); + String mapping = StreamsUtils.copyToStringFromClasspath("/test-mapping.json"); client().admin().indices().prepareCreate(INDEX).setSettings(settings).addMapping("test", mapping).get(); refresh(); Thread.sleep(75); // Ensure that the shard is available before we start making analyze requests. } - protected void refresh() { - client().admin().indices().prepareRefresh().get(); - } - protected List analyzeURL(String url, String analyzer) { return client().admin().indices().prepareAnalyze(INDEX, url).setAnalyzer(analyzer).get().getTokens(); } diff --git a/src/test/java/org/elasticsearch/index/analysis/url/URLTokenFilterIntegrationTest.java b/src/test/java/org/elasticsearch/index/analysis/url/URLTokenFilterIntegrationTest.java index 82add73..2ba4f71 100644 --- a/src/test/java/org/elasticsearch/index/analysis/url/URLTokenFilterIntegrationTest.java +++ b/src/test/java/org/elasticsearch/index/analysis/url/URLTokenFilterIntegrationTest.java @@ -2,7 +2,6 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; -import org.elasticsearch.index.query.FilterBuilders; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHits; import org.junit.Test; @@ -60,7 +59,7 @@ public void testMalformed() { SearchHits hits = client() .prepareSearch(INDEX) - .setQuery(QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), FilterBuilders.missingFilter("http_malformed.port"))) + .setQuery(QueryBuilders.missingQuery("http_malformed.port")) .get() .getHits(); assertEquals("found a doc missing http_malformed.port", 1, hits.getTotalHits()); diff --git a/src/test/java/org/elasticsearch/index/analysis/url/URLTokenizerTest.java b/src/test/java/org/elasticsearch/index/analysis/url/URLTokenizerTest.java index 204878b..bc6d526 100644 --- a/src/test/java/org/elasticsearch/index/analysis/url/URLTokenizerTest.java +++ b/src/test/java/org/elasticsearch/index/analysis/url/URLTokenizerTest.java @@ -54,7 +54,7 @@ public void testTokenizeHost() throws IOException { @Test - public void testTokenizePort() { + public void testTokenizePort() throws IOException { URLTokenizer tokenizer = createTokenizer(TEST_HTTP_URL, URLPart.PORT); assertThat(tokenizer, hasTokenAtOffset("9200", 23, 27)); @@ -96,7 +96,7 @@ public void testTokenizeQuery() throws IOException { @Test - public void testTokenizeRef() { + public void testTokenizeRef() throws IOException { URLTokenizer tokenizer = createTokenizer("http://foo.com#baz", URLPart.REF); assertThat(tokenizer, hasTokenAtOffset("baz", 15, 18)); } @@ -104,7 +104,8 @@ public void testTokenizeRef() { @Test public void testAll() throws IOException { - URLTokenizer tokenizer = new URLTokenizer(new StringReader(TEST_HTTPS_URL)); + URLTokenizer tokenizer = new URLTokenizer(); + tokenizer.setReader(new StringReader(TEST_HTTPS_URL)); CharTermAttribute termAttribute = tokenizer.getAttribute(CharTermAttribute.class); tokenizer.reset(); tokenizer.clearAttributes(); @@ -138,8 +139,10 @@ public void testAllowMalformed() throws IOException { } - private URLTokenizer createTokenizer(String input, URLPart part) { - return new URLTokenizer(new StringReader(input), part); + private URLTokenizer createTokenizer(String input, URLPart part) throws IOException { + URLTokenizer tokenizer = new URLTokenizer(part); + tokenizer.setReader(new StringReader(input)); + return tokenizer; }