Skip to content

Commit

Permalink
Stop attempting to parse tokens if parsing has already succeeded
Browse files Browse the repository at this point in the history
  • Loading branch information
jlinn committed Jul 28, 2015
1 parent d6fe50e commit 03d775f
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 4 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ This plugin enables URL token filtering by URL part.

| Elasticsearch Version | Plugin Version |
|-----------------------|----------------|
| 1.6.0 | 1.2.0 |
| 1.6.0 | 1.2.1 |
| 1.5.2 | 1.1.0 |
| 1.4.2 | 1.0.0 |

## Installation
```bash
bin/plugin --install analysis-url --url https://github.com/jlinn/elasticsearch-analysis-url/releases/download/v1.1.0/elasticsearch-analysis-url-1.1.0.zip
bin/plugin --install analysis-url --url https://github.com/jlinn/elasticsearch-analysis-url/releases/download/v1.2.1/elasticsearch-analysis-url-1.2.1.zip
```

## Usage
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-analysis-url</artifactId>
<version>1.2.0</version>
<version>1.2.1</version>
<packaging>jar</packaging>
<description>Elasticsearch URL token filter plugin</description>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ public final class URLTokenFilter extends TokenFilter {

private final boolean allowMalformed;

private boolean parsed;

public URLTokenFilter(TokenStream input, URLPart part) {
this(input, part, false);
}
Expand All @@ -45,7 +47,7 @@ public URLTokenFilter(TokenStream input, URLPart part, boolean urlDecode, boolea

@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (input.incrementToken() && !parsed) {
final String urlString = termAttribute.toString();
termAttribute.setEmpty();
if (Strings.isNullOrEmpty(urlString) || urlString.equals("null")) {
Expand Down Expand Up @@ -77,12 +79,14 @@ public boolean incrementToken() throws IOException {
default:
partString = url.toString();
}
parsed = !Strings.isNullOrEmpty(partString);
} catch (MalformedURLException e) {
if (allowMalformed) {
partString = parseMalformed(urlString);
if (Strings.isNullOrEmpty(partString)) {
return false;
}
parsed = true;
} else {
throw e;
}
Expand All @@ -96,6 +100,12 @@ public boolean incrementToken() throws IOException {
return false;
}

@Override
public void reset() throws IOException {
super.reset();
parsed = false;
}

private static final Pattern REGEX_PROTOCOL = Pattern.compile("^([a-zA-Z]+)(?=://)");
private static final Pattern REGEX_PORT = Pattern.compile(":([0-9]{1,5})");
private static final Pattern REGEX_QUERY = Pattern.compile("\\?(.+)");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ public void testEmptyString() {
@Test
public void testUrlDecode() {
assertURLAnalyzesTo("https://foo.bar.com?email=foo%40bar.com", "url_query", "email=foo@bar.com");
assertURLAnalyzesTo("https://ssl.google-analytics.com/r/__utm.gif?utmwv=5.6.4&utms=1&utmn=1031590447&utmhn=www.linkedin.com&utmcs=-&utmsr=1024x768&utmvp=1256x2417&utmsc=24-bit&utmul=en-us&utmje=1&utmfl=-&utmdt=Wells%20Fargo%20Capital%20Finance%20%7C%20LinkedIn&utmhid=735221740&utmr=http%3A%2F%2Fwww.google.com%2Fsearch%3Fq%3Dsite%253Alinkedin.com%2Bwells%2Bfargo%26rls%3Dcom.microsoft%3Aen-us%26ie%3DUTF-8%26oe%3DUTF-8%26startIndex%3D%26startPage%3D1&utmp=biz-overview-public&utmht=1428449620694&utmac=UA-3242811-1&utmcc=__utma%3D23068709.1484257758.1428449621.1428449621.1428449621.1%3B%2B__utmz%3D23068709.1428449621.1.1.utmcsr%3Dgoogle%7Cutmccn%3D(organic)%7Cutmcmd%3Dorganic%7Cutmctr%3Dsite%253Alinkedin.com%2520wells%2520fargo%3B&utmjid=1336170366&utmredir=1&utmu=qBCAAAAAAAAAAAAAAAAAAAAE~", "url_port", "443");
}

@Test
Expand Down

0 comments on commit 03d775f

Please sign in to comment.