From 2dc75e62a4426cac5b4e5a5eace7abe19ccaf6d1 Mon Sep 17 00:00:00 2001 From: Davy Landman Date: Tue, 2 Jul 2024 10:50:12 +0200 Subject: [PATCH 1/7] Rewrote squeeze to avoid commons-lang dependency --- pom.xml | 5 ---- src/org/rascalmpl/library/Prelude.java | 26 ++++++++++++++++--- .../lang/rascal/tests/basic/Strings1.rsc | 11 ++++++++ 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/pom.xml b/pom.xml index e222d0f50e1..885035c4a5e 100644 --- a/pom.xml +++ b/pom.xml @@ -413,11 +413,6 @@ commons-math 2.2 - - commons-lang - commons-lang - 2.6 - org.apache.commons commons-compress diff --git a/src/org/rascalmpl/library/Prelude.java b/src/org/rascalmpl/library/Prelude.java index fae92c2ae5c..b2f8a1d7880 100644 --- a/src/org/rascalmpl/library/Prelude.java +++ b/src/org/rascalmpl/library/Prelude.java @@ -69,7 +69,6 @@ import java.util.function.Consumer; import java.util.regex.Pattern; -import org.apache.commons.lang.CharSetUtils; import org.rascalmpl.debug.IRascalMonitor; import org.rascalmpl.exceptions.JavaCompilation; import org.rascalmpl.exceptions.RuntimeExceptionFactory; @@ -3042,9 +3041,28 @@ public IString trim(IString s) { } public IString squeeze(IString src, IString charSet) { - //@{http://commons.apache.org/lang/api-2.6/index.html?org/apache/commons/lang/text/package-summary.html} - String s = CharSetUtils.squeeze(src.getValue(), charSet.getValue()); - return values.string(s); + final Pattern isCharset = Pattern.compile("[" + charSet.getValue() + "]", Pattern.UNICODE_CHARACTER_CLASS); + StringBuilder result = new StringBuilder(src.length()); + var chars = src.iterator(); + int previousMatch = -1; + while (chars.hasNext()) { + int cp = chars.nextInt(); + if (cp == previousMatch) { + // swallow + continue; + } + + String c = Character.toString(cp); + if (isCharset.matcher(c).matches()) { + previousMatch = cp; + // swallow the next + } + else { + previousMatch = -1; + } + result.append(c); + } + return values.string(result.toString()); } public IString capitalize(IString src) { diff --git a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc index 6f296a744e1..ad83fc892db 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc @@ -3,6 +3,7 @@ module lang::rascal::tests::basic::Strings1 import String; import List; import util::Math; +import IO; test bool subscription(str S){ R = ""; @@ -248,6 +249,16 @@ test bool tstSplit(str S1, str S2) = areOverlapping(S1,S2) || isEmpty(S1) || isE // squeeze +private str rascalSqueeze(str s, str charSet) { + return visit (s) { + case /()+/ => "" + when /[]/ := c + }; +} + +test bool tstSqueeze1(str S) = rascalSqueeze(S, "a-zA-Z") == squeeze(S, "a-zA-Z"); + + test bool tstStartsWith(str S1, str S2) = startsWith(S1+S2, S1); test bool tstSubstring1(str S){ From 90d5afee50c81153598e7ecb7410721e441c3bac Mon Sep 17 00:00:00 2001 From: Davy Landman Date: Tue, 2 Jul 2024 12:04:12 +0200 Subject: [PATCH 2/7] Rewrote squeeze in rascal --- src/org/rascalmpl/library/Prelude.java | 25 ------------------- src/org/rascalmpl/library/String.rsc | 11 ++++++-- .../lang/rascal/tests/basic/Strings1.rsc | 14 +++++------ 3 files changed, 16 insertions(+), 34 deletions(-) diff --git a/src/org/rascalmpl/library/Prelude.java b/src/org/rascalmpl/library/Prelude.java index b2f8a1d7880..22bb530650a 100644 --- a/src/org/rascalmpl/library/Prelude.java +++ b/src/org/rascalmpl/library/Prelude.java @@ -3040,31 +3040,6 @@ public IString trim(IString s) { return values.string(s.getValue().trim()); } - public IString squeeze(IString src, IString charSet) { - final Pattern isCharset = Pattern.compile("[" + charSet.getValue() + "]", Pattern.UNICODE_CHARACTER_CLASS); - StringBuilder result = new StringBuilder(src.length()); - var chars = src.iterator(); - int previousMatch = -1; - while (chars.hasNext()) { - int cp = chars.nextInt(); - if (cp == previousMatch) { - // swallow - continue; - } - - String c = Character.toString(cp); - if (isCharset.matcher(c).matches()) { - previousMatch = cp; - // swallow the next - } - else { - previousMatch = -1; - } - result.append(c); - } - return values.string(result.toString()); - } - public IString capitalize(IString src) { StringBuilder result = new StringBuilder(src.length()); boolean lastWhitespace= true; diff --git a/src/org/rascalmpl/library/String.rsc b/src/org/rascalmpl/library/String.rsc index 3e2cff8ab46..901dcbf06dc 100644 --- a/src/org/rascalmpl/library/String.rsc +++ b/src/org/rascalmpl/library/String.rsc @@ -519,8 +519,15 @@ import String; squeeze("hello", "el"); ``` } -@javaClass{org.rascalmpl.library.Prelude} -public java str squeeze(str src, str charSet); +public str squeeze(str src, str charSet) { + if (charSet == "") { + return src; + } + return visit(src) { + case /+/ => c + when /[]/ := c + } +} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc index ad83fc892db..134f909b38d 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc @@ -249,15 +249,15 @@ test bool tstSplit(str S1, str S2) = areOverlapping(S1,S2) || isEmpty(S1) || isE // squeeze -private str rascalSqueeze(str s, str charSet) { - return visit (s) { - case /()+/ => "" - when /[]/ := c - }; +test bool tstSqueeze1(str S) = // !:= squeeze(S, "a-zA-Z"); +test bool tstSqueeze2(str S) = squeeze(S, "") == S; +test bool tstSqueeze3(str S) { + if (// := S) { + return // := squeeze(S, "0-9"); + } + return true; } -test bool tstSqueeze1(str S) = rascalSqueeze(S, "a-zA-Z") == squeeze(S, "a-zA-Z"); - test bool tstStartsWith(str S1, str S2) = startsWith(S1+S2, S1); From f71a9bcdcd4812ceff6b9226dabd32e11f6083db Mon Sep 17 00:00:00 2001 From: Davy Landman Date: Tue, 2 Jul 2024 14:54:40 +0200 Subject: [PATCH 3/7] Made rewrite more precise --- src/org/rascalmpl/library/String.rsc | 3 +-- src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/org/rascalmpl/library/String.rsc b/src/org/rascalmpl/library/String.rsc index 901dcbf06dc..64fbdf35928 100644 --- a/src/org/rascalmpl/library/String.rsc +++ b/src/org/rascalmpl/library/String.rsc @@ -524,8 +524,7 @@ public str squeeze(str src, str charSet) { return src; } return visit(src) { - case /+/ => c - when /[]/ := c + case /]>+/ => c } } diff --git a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc index 134f909b38d..11c9309efb6 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc @@ -252,7 +252,7 @@ test bool tstSplit(str S1, str S2) = areOverlapping(S1,S2) || isEmpty(S1) || isE test bool tstSqueeze1(str S) = // !:= squeeze(S, "a-zA-Z"); test bool tstSqueeze2(str S) = squeeze(S, "") == S; test bool tstSqueeze3(str S) { - if (// := S) { + if (// := S) { return // := squeeze(S, "0-9"); } return true; From 78539702ba5d10c4d5c4b9b13ef2fd5b60f5ae91 Mon Sep 17 00:00:00 2001 From: Davy Landman Date: Tue, 16 Jul 2024 10:55:36 +0200 Subject: [PATCH 4/7] Implemented squeeze in java --- src/org/rascalmpl/library/Prelude.java | 28 +++++++++++++++++++ src/org/rascalmpl/library/String.rsc | 10 ++----- .../lang/rascal/tests/basic/Strings1.rsc | 2 ++ 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/org/rascalmpl/library/Prelude.java b/src/org/rascalmpl/library/Prelude.java index 22bb530650a..829321b3e02 100644 --- a/src/org/rascalmpl/library/Prelude.java +++ b/src/org/rascalmpl/library/Prelude.java @@ -3040,6 +3040,34 @@ public IString trim(IString s) { return values.string(s.getValue().trim()); } + public IString squeeze(IString src, IString charSet) { + if (charSet.getValue().isBlank()) { + return src; + } + final Pattern isCharset = Pattern.compile("[" + charSet.getValue() + "]", Pattern.UNICODE_CHARACTER_CLASS); + StringBuilder result = new StringBuilder(src.length()); + var chars = src.iterator(); + int previousMatch = -1; + while (chars.hasNext()) { + int cp = chars.nextInt(); + if (cp == previousMatch) { + // swallow + continue; + } + + String c = Character.toString(cp); + if (isCharset.matcher(c).matches()) { + previousMatch = cp; + // swallow the next + } + else { + previousMatch = -1; + } + result.append(c); + } + return values.string(result.toString()); + } + public IString capitalize(IString src) { StringBuilder result = new StringBuilder(src.length()); boolean lastWhitespace= true; diff --git a/src/org/rascalmpl/library/String.rsc b/src/org/rascalmpl/library/String.rsc index 64fbdf35928..3e2cff8ab46 100644 --- a/src/org/rascalmpl/library/String.rsc +++ b/src/org/rascalmpl/library/String.rsc @@ -519,14 +519,8 @@ import String; squeeze("hello", "el"); ``` } -public str squeeze(str src, str charSet) { - if (charSet == "") { - return src; - } - return visit(src) { - case /]>+/ => c - } -} +@javaClass{org.rascalmpl.library.Prelude} +public java str squeeze(str src, str charSet); diff --git a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc index 11c9309efb6..16e8eddbeac 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc @@ -258,6 +258,8 @@ test bool tstSqueeze3(str S) { return true; } +test bool tstSqueezeUnicode() = squeeze("Hi ๐Ÿ๐ŸWorld", "๐Ÿ") == "Hi ๐ŸWorld"; + test bool tstStartsWith(str S1, str S2) = startsWith(S1+S2, S1); From 97131ac02e7863839098bf2adac578f9cb9d53cd Mon Sep 17 00:00:00 2001 From: Davy Landman Date: Tue, 16 Jul 2024 11:31:06 +0200 Subject: [PATCH 5/7] Added more tests for squeeze --- .../rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc index 16e8eddbeac..9dddc90cbb2 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc @@ -259,6 +259,11 @@ test bool tstSqueeze3(str S) { } test bool tstSqueezeUnicode() = squeeze("Hi ๐Ÿ๐ŸWorld", "๐Ÿ") == "Hi ๐ŸWorld"; +test bool tstSqueezeCase1() = squeeze("abc", "a-c") == "abc"; +test bool tstSqueezeCase2() = squeeze("aabc", "a-c") == "abc"; +test bool tstSqueezeCase3() = squeeze("aabcc", "a-c") == "abc"; +test bool tstSqueezeCase4() = squeeze("aabbcc", "a-c") == "abc"; +test bool tstSqueezeCase5() = squeeze("aaabc", "a-c") == "abc"; test bool tstStartsWith(str S1, str S2) = startsWith(S1+S2, S1); From 0b39033584f75596b1db04cb89153f726247d43d Mon Sep 17 00:00:00 2001 From: Davy Landman Date: Tue, 16 Jul 2024 11:46:34 +0200 Subject: [PATCH 6/7] Cleaned up test --- src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc index 9dddc90cbb2..0a679380687 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings1.rsc @@ -3,7 +3,6 @@ module lang::rascal::tests::basic::Strings1 import String; import List; import util::Math; -import IO; test bool subscription(str S){ R = ""; From 47c7ac9cd7d09b5cb15036b3fba5caaa51eeb9b0 Mon Sep 17 00:00:00 2001 From: Davy Landman Date: Tue, 16 Jul 2024 11:48:35 +0200 Subject: [PATCH 7/7] Support space in charset --- src/org/rascalmpl/library/Prelude.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/Prelude.java b/src/org/rascalmpl/library/Prelude.java index 1ace5a6bed3..a28ca8f0d79 100644 --- a/src/org/rascalmpl/library/Prelude.java +++ b/src/org/rascalmpl/library/Prelude.java @@ -3070,7 +3070,7 @@ public IString trim(IString s) { } public IString squeeze(IString src, IString charSet) { - if (charSet.getValue().isBlank()) { + if (charSet.getValue().isEmpty()) { return src; } final Pattern isCharset = Pattern.compile("[" + charSet.getValue() + "]", Pattern.UNICODE_CHARACTER_CLASS);