diff --git a/pom.xml b/pom.xml index cdc0b1744ef..a376d5782fc 100644 --- a/pom.xml +++ b/pom.xml @@ -413,11 +413,6 @@ commons-math 2.2 - - commons-lang - commons-lang - 2.6 - org.apache.commons commons-compress diff --git a/src/org/rascalmpl/library/Prelude.java b/src/org/rascalmpl/library/Prelude.java index b3c38a356c8..a28ca8f0d79 100644 --- a/src/org/rascalmpl/library/Prelude.java +++ b/src/org/rascalmpl/library/Prelude.java @@ -72,7 +72,6 @@ import org.apache.commons.codec.CodecPolicy; import org.apache.commons.codec.binary.Base32; -import org.apache.commons.lang.CharSetUtils; import org.rascalmpl.debug.IRascalMonitor; import org.rascalmpl.exceptions.JavaCompilation; import org.rascalmpl.exceptions.RuntimeExceptionFactory; @@ -3071,9 +3070,31 @@ public IString trim(IString s) { } public IString squeeze(IString src, IString charSet) { - //@{http://commons.apache.org/lang/api-2.6/index.html?org/apache/commons/lang/text/package-summary.html} - String s = CharSetUtils.squeeze(src.getValue(), charSet.getValue()); - return values.string(s); + if (charSet.getValue().isEmpty()) { + return src; + } + final Pattern isCharset = Pattern.compile("[" + charSet.getValue() + "]", Pattern.UNICODE_CHARACTER_CLASS); + StringBuilder result = new StringBuilder(src.length()); + var chars = src.iterator(); + int previousMatch = -1; + while (chars.hasNext()) { + int cp = chars.nextInt(); + if (cp == previousMatch) { + // swallow + continue; + } + + String c = Character.toString(cp); + if (isCharset.matcher(c).matches()) { + previousMatch = cp; + // swallow the next + } + else { + previousMatch = -1; + } + result.append(c); + } + return values.string(result.toString()); } public IString capitalize(IString src) { diff --git a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc index 6f296a744e1..0a679380687 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc @@ -248,6 +248,23 @@ test bool tstSplit(str S1, str S2) = areOverlapping(S1,S2) || isEmpty(S1) || isE // squeeze +test bool tstSqueeze1(str S) = // !:= squeeze(S, "a-zA-Z"); +test bool tstSqueeze2(str S) = squeeze(S, "") == S; +test bool tstSqueeze3(str S) { + if (// := S) { + return // := squeeze(S, "0-9"); + } + return true; +} + +test bool tstSqueezeUnicode() = squeeze("Hi ๐Ÿ๐ŸWorld", "๐Ÿ") == "Hi ๐ŸWorld"; +test bool tstSqueezeCase1() = squeeze("abc", "a-c") == "abc"; +test bool tstSqueezeCase2() = squeeze("aabc", "a-c") == "abc"; +test bool tstSqueezeCase3() = squeeze("aabcc", "a-c") == "abc"; +test bool tstSqueezeCase4() = squeeze("aabbcc", "a-c") == "abc"; +test bool tstSqueezeCase5() = squeeze("aaabc", "a-c") == "abc"; + + test bool tstStartsWith(str S1, str S2) = startsWith(S1+S2, S1); test bool tstSubstring1(str S){