Skip to content

Commit

Permalink
Adding IP6 characters into valid character list for URI parsing
Browse files Browse the repository at this point in the history
Signed-off-by: Mike Wilson <[email protected]>
  • Loading branch information
hyperbolic2346 committed Oct 23, 2023
1 parent 966c5f9 commit 72a5f39
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 3 deletions.
4 changes: 2 additions & 2 deletions src/main/cpp/src/parse_uri.cu
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ constexpr bool is_valid_character(char ch, bool alphanum_only)
if (ch >= 'A' && ch <= 'Z') return true; // A-Z
if (ch >= 'a' && ch <= 'z') return true; // a-z
} else {
if (ch >= '!' && ch <= ';' && ch != '"') return true; // 0-9 and !#%&'()*+,-./
if (ch >= '=' && ch <= 'Z' && ch != '>') return true; // A-Z and =?@
if (ch >= '!' && ch <= ':' && ch != '"') return true; // 0-9 and !#%&'()*+,-./:
if (ch >= '=' && ch <= ']' && ch != '>') return true; // A-Z and =?@[]
if (ch >= '_' && ch <= 'z' && ch != '`') return true; // a-z and _
}
return false;
Expand Down
47 changes: 47 additions & 0 deletions src/main/cpp/tests/parse_uri.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,5 +91,52 @@ TEST_F(ParseURIProtocolTests, SparkEdges)
"https"},
{1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1});

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
}

TEST_F(ParseURIProtocolTests, IP6)
{
cudf::test::strings_column_wrapper col({
"https://[fe80::]",
"https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]",
"https://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334]",
"https://[2001:db8::1:0]",
"http://[2001:db8::2:1]",
"https://[::1]",
"https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:443",
});
auto result = spark_rapids_jni::parse_uri_to_protocol(cudf::strings_column_view{col});

cudf::test::strings_column_wrapper expected({"https", "https", "https", "https", "http", "https", "https"});

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
}

TEST_F(ParseURIProtocolTests, IP4)
{
cudf::test::strings_column_wrapper col({
"https://192.168.1.100/",
"https://192.168.1.100:8443/",
"https://192.168.1.100.5/",
"https://192.168.1/",
"https://280.100.1.1/",
});
auto result = spark_rapids_jni::parse_uri_to_protocol(cudf::strings_column_view{col});

cudf::test::strings_column_wrapper expected({"https", "https", "https", "https", "https"});

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
}

TEST_F(ParseURIProtocolTests, UTF8)
{
cudf::test::strings_column_wrapper col({
"https://nvidia.com/%4EV%49%44%49%41",
"http://%77%77%77.%4EV%49%44%49%41.com",
});
auto result = spark_rapids_jni::parse_uri_to_protocol(cudf::strings_column_view{col});

cudf::test::strings_column_wrapper expected({"https", "http"});

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
}
85 changes: 84 additions & 1 deletion src/test/java/com/nvidia/spark/rapids/jni/ParseURITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

public class ParseURITest {
@Test
void parseURIToProtocolTest() {
void parseURIToProtocolSparkTest() {
String[] testData = {"https://nvidia.com/https&#://nvidia.com",
"https://http://www.nvidia.com",
"filesystemmagicthing://bob.yaml",
Expand All @@ -50,6 +50,89 @@ void parseURIToProtocolTest() {
"http//www.nvidia.com/q",
"",
null};

String[] expectedStrings = new String[testData.length];
for (int i=0; i<testData.length; i++) {
String scheme = null;
try {
URI uri = new URI(testData[i]);
scheme = uri.getScheme();
} catch (URISyntaxException ex) {
// leave the scheme null if URI is invalid
} catch (NullPointerException ex) {
// leave the scheme null if URI is null
}
expectedStrings[i] = scheme;
}
try (ColumnVector v0 = ColumnVector.fromStrings(testData);
ColumnVector expected = ColumnVector.fromStrings(expectedStrings);
ColumnVector result = ParseURI.parseURIProtocol(v0)) {
AssertUtils.assertColumnsAreEqual(expected, result);
}
}

@Test
void parseURIToProtocolUTF8Test() {
String[] testData = {"https://nvidia.com/%4EV%49%44%49%41",
"http://%77%77%77.%4EV%49%44%49%41.com"};

String[] expectedStrings = new String[testData.length];
for (int i=0; i<testData.length; i++) {
String scheme = null;
try {
URI uri = new URI(testData[i]);
scheme = uri.getScheme();
} catch (URISyntaxException ex) {
// leave the scheme null if URI is invalid
} catch (NullPointerException ex) {
// leave the scheme null if URI is null
}
expectedStrings[i] = scheme;
}
try (ColumnVector v0 = ColumnVector.fromStrings(testData);
ColumnVector expected = ColumnVector.fromStrings(expectedStrings);
ColumnVector result = ParseURI.parseURIProtocol(v0)) {
AssertUtils.assertColumnsAreEqual(expected, result);
}
}

@Test
void parseURIToProtocolIP4Test() {
String[] testData = {"https://192.168.1.100/",
"https://192.168.1.100:8443/",
"https://192.168.1.100.5/",
"https://192.168.1/",
"https://280.100.1.1/"};

String[] expectedStrings = new String[testData.length];
for (int i=0; i<testData.length; i++) {
String scheme = null;
try {
URI uri = new URI(testData[i]);
scheme = uri.getScheme();
} catch (URISyntaxException ex) {
// leave the scheme null if URI is invalid
} catch (NullPointerException ex) {
// leave the scheme null if URI is null
}
expectedStrings[i] = scheme;
}
try (ColumnVector v0 = ColumnVector.fromStrings(testData);
ColumnVector expected = ColumnVector.fromStrings(expectedStrings);
ColumnVector result = ParseURI.parseURIProtocol(v0)) {
AssertUtils.assertColumnsAreEqual(expected, result);
}
}

@Test
void parseURIToProtocolIP6Test() {
String[] testData = {"https://[fe80::]",
"https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]",
"https://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334]",
"https://[2001:db8::1:0]",
"http://[2001:db8::2:1]",
"https://[::1]",
"https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:443"};

String[] expectedStrings = new String[testData.length];
for (int i=0; i<testData.length; i++) {
Expand Down

0 comments on commit 72a5f39

Please sign in to comment.