Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding IP6 characters into valid character list for URI parsing #1516

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions src/main/cpp/src/parse_uri.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,12 @@ namespace {
// utility to validate a character is valid in a URI
constexpr bool is_valid_character(char ch, bool alphanum_only)
{
if (alphanum_only) {
if (ch >= '-' && ch <= '9' && ch != '/') return true; // 0-9 and .-
if (ch >= 'A' && ch <= 'Z') return true; // A-Z
if (ch >= 'a' && ch <= 'z') return true; // a-z
} else {
if (ch >= '!' && ch <= ';' && ch != '"') return true; // 0-9 and !#%&'()*+,-./
if (ch >= '=' && ch <= 'Z' && ch != '>') return true; // A-Z and =?@
if (ch >= '_' && ch <= 'z' && ch != '`') return true; // a-z and _
}
return alphanum_only ? (ch >= '-' && ch <= '9' && ch != '/') || // 0-9 and .-
(ch >= 'A' && ch <= 'Z') || // A-Z
(ch >= 'a' && ch <= 'z') // a-z
: (ch >= '!' && ch <= ':' && ch != '"') || // 0-9 and !#%&'()*+,-./:
(ch >= '=' && ch <= ']' && ch != '>') || // A-Z and =?@[]
(ch >= '_' && ch <= 'z' && ch != '`'); // a-z and _
return false;
}

Expand Down
47 changes: 47 additions & 0 deletions src/main/cpp/tests/parse_uri.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,5 +91,52 @@ TEST_F(ParseURIProtocolTests, SparkEdges)
"https"},
{1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1});

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
}

TEST_F(ParseURIProtocolTests, IP6)
{
cudf::test::strings_column_wrapper col({
"https://[fe80::]",
"https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]",
"https://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334]",
"https://[2001:db8::1:0]",
"http://[2001:db8::2:1]",
"https://[::1]",
"https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:443",
});
auto result = spark_rapids_jni::parse_uri_to_protocol(cudf::strings_column_view{col});

cudf::test::strings_column_wrapper expected({"https", "https", "https", "https", "http", "https", "https"});

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
}

TEST_F(ParseURIProtocolTests, IP4)
{
cudf::test::strings_column_wrapper col({
"https://192.168.1.100/",
"https://192.168.1.100:8443/",
"https://192.168.1.100.5/",
"https://192.168.1/",
"https://280.100.1.1/",
});
auto result = spark_rapids_jni::parse_uri_to_protocol(cudf::strings_column_view{col});

cudf::test::strings_column_wrapper expected({"https", "https", "https", "https", "https"});

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
}

TEST_F(ParseURIProtocolTests, UTF8)
{
cudf::test::strings_column_wrapper col({
"https://nvidia.com/%4EV%49%44%49%41",
"http://%77%77%77.%4EV%49%44%49%41.com",
});
auto result = spark_rapids_jni::parse_uri_to_protocol(cudf::strings_column_view{col});

cudf::test::strings_column_wrapper expected({"https", "http"});

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
}
74 changes: 55 additions & 19 deletions src/test/java/com/nvidia/spark/rapids/jni/ParseURITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,29 @@
import ai.rapids.cudf.ColumnVector;

public class ParseURITest {
void buildExpectedAndRun(String[] testData) {
String[] expectedStrings = new String[testData.length];
for (int i=0; i<testData.length; i++) {
String scheme = null;
try {
URI uri = new URI(testData[i]);
scheme = uri.getScheme();
} catch (URISyntaxException ex) {
// leave the scheme null if URI is invalid
} catch (NullPointerException ex) {
// leave the scheme null if URI is null
}
expectedStrings[i] = scheme;
}
try (ColumnVector v0 = ColumnVector.fromStrings(testData);
ColumnVector expected = ColumnVector.fromStrings(expectedStrings);
ColumnVector result = ParseURI.parseURIProtocol(v0)) {
AssertUtils.assertColumnsAreEqual(expected, result);
}
}

@Test
void parseURIToProtocolTest() {
void parseURIToProtocolSparkTest() {
String[] testData = {"https://nvidia.com/https&#://nvidia.com",
"https://http://www.nvidia.com",
"filesystemmagicthing://bob.yaml",
Expand All @@ -50,24 +71,39 @@ void parseURIToProtocolTest() {
"http//www.nvidia.com/q",
"",
null};

buildExpectedAndRun(testData);
}

@Test
void parseURIToProtocolUTF8Test() {
String[] testData = {"https://nvidia.com/%4EV%49%44%49%41",
"http://%77%77%77.%4EV%49%44%49%41.com"};

buildExpectedAndRun(testData);
}

@Test
void parseURIToProtocolIP4Test() {
String[] testData = {"https://192.168.1.100/",
"https://192.168.1.100:8443/",
"https://192.168.1.100.5/",
"https://192.168.1/",
"https://280.100.1.1/"};

buildExpectedAndRun(testData);
}

@Test
void parseURIToProtocolIP6Test() {
String[] testData = {"https://[fe80::]",
"https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]",
"https://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334]",
"https://[2001:db8::1:0]",
"http://[2001:db8::2:1]",
"https://[::1]",
"https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:443"};

String[] expectedStrings = new String[testData.length];
for (int i=0; i<testData.length; i++) {
String scheme = null;
try {
URI uri = new URI(testData[i]);
scheme = uri.getScheme();
} catch (URISyntaxException ex) {
// leave the scheme null if URI is invalid
} catch (NullPointerException ex) {
// leave the scheme null if URI is null
}
expectedStrings[i] = scheme;
}
try (ColumnVector v0 = ColumnVector.fromStrings(testData);
ColumnVector expected = ColumnVector.fromStrings(expectedStrings);
ColumnVector result = ParseURI.parseURIProtocol(v0)) {
AssertUtils.assertColumnsAreEqual(expected, result);
}
buildExpectedAndRun(testData);
}
}