diff --git a/src/main/cpp/src/parse_uri.cu b/src/main/cpp/src/parse_uri.cu index 14d80258ff..29a186640d 100644 --- a/src/main/cpp/src/parse_uri.cu +++ b/src/main/cpp/src/parse_uri.cu @@ -486,8 +486,11 @@ uri_parts __device__ validate_uri(const char* str, int len) // anything after the hash is part of the fragment and ignored for this part if (hash >= 0) { - ret.fragment = {str + hash, len - hash}; - if (!validate_fragment(ret.fragment)) { ret.fragment = {}; } + ret.fragment = {str + hash + 1, len - hash - 1}; + if (!validate_fragment(ret.fragment)) { + ret.valid = false; + return ret; + } len = hash; diff --git a/src/main/cpp/tests/parse_uri.cpp b/src/main/cpp/tests/parse_uri.cpp index 52bbf8abe0..0dc5f68522 100644 --- a/src/main/cpp/tests/parse_uri.cpp +++ b/src/main/cpp/tests/parse_uri.cpp @@ -83,9 +83,10 @@ TEST_F(ParseURIProtocolTests, SparkEdges) "http://[fe80::7:8%eth0]", "http://[fe80::7:8%1]", "http://foo.bar/abc/\\\\\\http://foo.bar/abc.gif\\\\\\", - "b.oscars.org:8100/servlet/" + "www.nvidia.com:8100/servlet/" "impc.DisplayCredits?primekey_in=2000041100:05:14115240636", - "https://j.mp/2Ru15Ss "}); + "https://nvidia.com/2Ru15Ss ", + "http://www.nvidia.com/plugins//##"}); auto result = spark_rapids_jni::parse_uri_to_protocol(cudf::strings_column_view{col}); @@ -115,9 +116,15 @@ TEST_F(ParseURIProtocolTests, SparkEdges) "http", "http", "", - "b.oscars.org", + "www.nvidia.com", + "", ""}, - {1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0}); + {1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0}); + + printf("expected:\n"); + cudf::test::print(expected); + printf("result:\n"); + cudf::test::print(result->view()); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected); } diff --git a/src/test/java/com/nvidia/spark/rapids/jni/ParseURITest.java b/src/test/java/com/nvidia/spark/rapids/jni/ParseURITest.java index 219a3bc439..8bf7176def 100644 --- a/src/test/java/com/nvidia/spark/rapids/jni/ParseURITest.java +++ b/src/test/java/com/nvidia/spark/rapids/jni/ParseURITest.java @@ -49,7 +49,7 @@ void buildExpectedAndRun(String[] testData) { @Test void parseURIToProtocolSparkTest() { String[] testData = { - "https://nvidia.com/https&#://nvidia.com", + /*"https://nvidia.com/https&#://nvidia.com", "https://http://www.nvidia.com", "filesystemmagicthing://bob.yaml", "nvidia.com:8080", @@ -69,15 +69,18 @@ void parseURIToProtocolSparkTest() { "https://www.nvidia.com:8080/q", "https://www.nvidia.com#8080", "file://path/to/cool/file", - "http//www.nvidia.com/q", + "http//www.nvidia.com/q",*/ "http://?", - "http://#", + "http://#",/* "http://??", "http://??/", "http://user:pass@host/file;param?query;p2", "http://foo.bar/abc/\\\\\\http://foo.bar/abc.gif\\\\\\", - "b.oscars.org:8100/servlet/impc.DisplayCredits?primekey_in=2000041100:05:14115240636", - "https://j.mp/2Ru15Ss ", + "nvidia.com:8100/servlet/impc.DisplayCredits?primekey_in=2000041100:05:14115240636", + "https://nvidia.com/2Ru15Ss ", + "http://www.nvidia.com/xmlrpc//##", + "www.nvidia.com:8080/expert/sciPublication.jsp?ExpertId=1746&lenList=all", + "www.nvidia.com:8080/hrcxtf/view?docId=ead/00073.xml&query=T.%20E.%20Lawrence&query-join=and",*/ "", null};