Skip to content

Commit

Permalink
Use the existing isHighSurrogate, isLowSurrogate, toCodePoint methods…
Browse files Browse the repository at this point in the history
… from Character class, instead of implementing them myself
  • Loading branch information
Mark Khazin committed Dec 22, 2019
1 parent 3c77d8d commit 29499f6
Showing 1 changed file with 2 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@ public boolean incrementToken() throws IOException {
boolean isHomoglyph = false;
for (int i = 0; i < termLength; i++) {
int codePoint;
if (isHighSurrogate(termBuffer[i]) && i < termLength - 1 && isLowSurrogate(termBuffer[i + 1])) {
codePoint = getUtf16CodePoint(termBuffer[i], termBuffer[i + 1]);
if (Character.isHighSurrogate(termBuffer[i]) && i < termLength - 1 && Character.isLowSurrogate(termBuffer[i + 1])) {
codePoint = Character.toCodePoint(termBuffer[i], termBuffer[i + 1]);
i++;
} else {
codePoint = termBuffer[i];
Expand Down Expand Up @@ -265,20 +265,4 @@ private String[] getResults(String[][] asciiGroups, int asciiGroupsIndex) {

return results;
}

private static boolean isHighSurrogate(char ch) {
return ch >= '\ud800' && ch <= '\udbff';
}

private static boolean isLowSurrogate(char ch) {
return ch >= '\udc00' && ch <= '\udfff';
}

private static int getUtf16CodePoint(char highSurrogate, char lowSurrogate) {
int intHighSurrogate = highSurrogate;
int intLowSurrogate = lowSurrogate;

int codePoint = ((intHighSurrogate - 0xd800) << 10) + (intLowSurrogate - 0xdc00) + 0x10000;
return codePoint;
}
}

0 comments on commit 29499f6

Please sign in to comment.