diff --git a/.gitignore b/.gitignore index 2f7896d..790f677 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ target/ +bin/ +frej.jar + diff --git a/src/net/java/frej/Regex.java b/src/net/java/frej/Regex.java index 09e38e6..00c2120 100644 --- a/src/net/java/frej/Regex.java +++ b/src/net/java/frej/Regex.java @@ -49,6 +49,7 @@ private enum CharType { SEPARATOR, DIGIT, LETTER, ALLOWED_PUNCT } // CharType + private Fuzzy fuzzy = new Fuzzy(); private Elem root; private Special terminator = new Special(this, null); String[] tokens; @@ -58,7 +59,7 @@ private enum CharType { private int firstMatched, lastMatched; GroupMap groups = new GroupMap(); private String allowedPunct = "/-"; - private double threshold = Fuzzy.threshold; + private double threshold = fuzzy.threshold; Map subs = new HashMap(); @@ -613,6 +614,9 @@ String getGroup(String name) { void setGroup(String name, String s) { groups.put(name, s); } // setGroup - + + Fuzzy getFuzzy() { + return fuzzy; + } } // class FuzzyRegex diff --git a/src/net/java/frej/Token.java b/src/net/java/frej/Token.java index 532bf50..7bf96d5 100644 --- a/src/net/java/frej/Token.java +++ b/src/net/java/frej/Token.java @@ -29,9 +29,12 @@ final class Token extends Elem { private String token; private boolean partial; + private Fuzzy fuzzy; + Token(Regex owner, String token) { super(owner); + fuzzy = owner.getFuzzy(); changePattern(token); } // FuzzyRegexToken @@ -47,16 +50,16 @@ final class Token extends Elem { } // if if (partial && owner.tokens[i].length() > token.length()) { - Fuzzy.similarity(owner.tokens[i].substring(0, token.length()), token); + fuzzy.similarity(owner.tokens[i].substring(0, token.length()), token); } else { - Fuzzy.similarity(owner.tokens[i], token); + fuzzy.similarity(owner.tokens[i], token); } // else matchLen = 1; saveGroup(); - return Fuzzy.result; + return fuzzy.result; } // matchAt diff --git a/src/net/java/frej/fuzzy/Fuzzy.java b/src/net/java/frej/fuzzy/Fuzzy.java index e6ad352..2c86317 100644 --- a/src/net/java/frej/fuzzy/Fuzzy.java +++ b/src/net/java/frej/fuzzy/Fuzzy.java @@ -34,31 +34,28 @@ * types of "mistakes" each counting as 1 point (char deletion, char adding, * char replacement, swap of two adjacent chars). * - * Methods are static, and resulting variables too, so necessary values should - * be read before new matching/searching attempt. - * * @author Rodion Gorkovenko */ public final class Fuzzy { /** keeps starting position of matched region after substring search*/ - public static int resultStart; + public int resultStart; /** keeps ending position of matched region after substring search*/ - public static int resultEnd; + public int resultEnd; /** keeps index of best match after matching against a list of strings*/ - public static int resultIndex; + public int resultIndex; /** keeps best matched string after matching against a list of strings*/ - public static String matchedPattern; + public String matchedPattern; /** "distance" of last match (roughly mistakes count divided by length*/ - public static double result; + public double result; /** if result of match is higher than threshold, boolean methods return "false" */ - public static double threshold = 0.34; + public double threshold = 0.34; - protected static final int MAX_PATTERN = 64; - protected static final int MAX_SOURCE = 256; - protected static final int BIG_VALUE = 1000000; - protected static int[][] e = new int[MAX_PATTERN + 1][MAX_SOURCE + 1]; - protected static WayType[][] w = new WayType[MAX_PATTERN + 1][MAX_SOURCE + 1]; + protected final int MAX_PATTERN = 64; + protected final int MAX_SOURCE = 256; + protected final int BIG_VALUE = 1000000; + protected int[][] e = new int[MAX_PATTERN + 1][MAX_SOURCE + 1]; + protected WayType[][] w = new WayType[MAX_PATTERN + 1][MAX_SOURCE + 1]; private static enum WayType { @@ -72,7 +69,7 @@ private static enum WayType { * @return position of found substring (0 .. source.length() - 1) or (-1) if * substring was not found (with given threshold). */ - public static int substrStart(CharSequence source, CharSequence pattern) { + public int substrStart(CharSequence source, CharSequence pattern) { if (containability(source, pattern) < threshold) return resultStart; @@ -87,7 +84,7 @@ public static int substrStart(CharSequence source, CharSequence pattern) { * @return position of found substring end (0 .. source.length() - 1) or (-1) if * substring was not found (with given threshold). */ - public static int substrEnd(CharSequence source, CharSequence pattern) { + public int substrEnd(CharSequence source, CharSequence pattern) { if (containability(source, pattern) < threshold) return resultEnd; @@ -100,7 +97,7 @@ public static int substrEnd(CharSequence source, CharSequence pattern) { * Tests whether "source" matches "pattern". * @return true or false depending on match quality. */ - public static boolean equals(CharSequence source, CharSequence pattern) { + public boolean equals(CharSequence source, CharSequence pattern) { return similarity(source, pattern) < threshold; } // compare @@ -110,7 +107,7 @@ public static boolean equals(CharSequence source, CharSequence pattern) { * Stops on first good match. * @return true or false depending on whether any of pattern search succeeds. */ - public static boolean containsOneOf(CharSequence source, CharSequence... patterns) { + public boolean containsOneOf(CharSequence source, CharSequence... patterns) { for (CharSequence p : patterns) { if (containability(source, p) < threshold) { @@ -127,7 +124,7 @@ public static boolean containsOneOf(CharSequence source, CharSequence... pattern * Demerau-Levenshtein distance is minimal. * @return normalized best distance (i.e. distance / pattern.length()) */ - public static double containability(CharSequence source, CharSequence pattern) { + public double containability(CharSequence source, CharSequence pattern) { int m = pattern.length() + 1; int n = source.length() + 1; int best, start; @@ -224,7 +221,7 @@ public static double containability(CharSequence source, CharSequence pattern) { * (equality=false). * @return best match result (normalized distance). */ - public static double bestEqual(String string, Object patterns, boolean equality) { + public double bestEqual(String string, Object patterns, boolean equality) { String[] array; double value = Double.POSITIVE_INFINITY; @@ -256,7 +253,7 @@ public static double bestEqual(String string, Object patterns, boolean equality) * Core method for measuring Demerau-Levenshtein distance between two strings. * @return normalized distance (distance / average(source.length(), pattern.length())) */ - public static double similarity(CharSequence source, CharSequence pattern) { + public double similarity(CharSequence source, CharSequence pattern) { int m; int n; char s, p, s1, p1;