Skip to content

Commit

Permalink
Adding java-side test and binding
Browse files Browse the repository at this point in the history
Signed-off-by: Mike Wilson <[email protected]>
  • Loading branch information
hyperbolic2346 committed Oct 17, 2023
1 parent 6edf2ed commit f33919a
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 0 deletions.
44 changes: 44 additions & 0 deletions src/main/java/com/nvidia/spark/rapids/jni/ParseURI.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids.jni;

import ai.rapids.cudf.ColumnVector;
import ai.rapids.cudf.ColumnView;
import ai.rapids.cudf.DType;
import ai.rapids.cudf.NativeDepsLoader;

public class ParseURI {
static {
NativeDepsLoader.loadNativeDeps();
}


/**
* Parse protocol for each URI from the incoming column.
*
* @param URIColumn The input strings column in which each row contains a URI.
* @return A string column with protocol data extracted.
*/
public static ColumnVector parseURIProtocol(ColumnView URIColumn) {
assert URIColumn.getType().equals(DType.STRING) : "Input type must be String";
return new ColumnVector(parseProtocol(URIColumn.getNativeView()));
}


private static native long parseProtocol(long jsonColumnHandle);

}
70 changes: 70 additions & 0 deletions src/test/java/com/nvidia/spark/rapids/jni/ParseURITest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids.jni;

import java.net.URI;
import java.net.URISyntaxException;

import org.junit.jupiter.api.Test;

import ai.rapids.cudf.AssertUtils;
import ai.rapids.cudf.ColumnVector;

public class ParseURITest {
@Test
void parseURIToProtocolTest() {
String[] testData = {"https://nvidia.com/https&#://nvidia.com",
"https://http://www.nvidia.com",
"filesystemmagicthing://bob.yaml",
"nvidia.com:8080",
"http://thisisinvalid.data/due/to-the_character%s/inside*the#url`~",
"file:/absolute/path",
"//www.nvidia.com",
"#bob",
"#this%doesnt#make//sense://to/me",
"HTTP:&bob",
"/absolute/path",
"http://%77%77%77.%4EV%49%44%49%41.com",
"https:://broken.url",
"https://www.nvidia.com/q/This%20is%20a%20query",
"http:/www.nvidia.com",
"http://:www.nvidia.com/",
"http:///nvidia.com/q",
"https://www.nvidia.com:8080/q",
"https://www.nvidia.com#8080",
"file://path/to/cool/file",
"http//www.nvidia.com/q"};

String[] expectedStrings = new String[testData.length];
for (int i=0; i<testData.length; i++) {
String scheme = null;
try {
URI uri = new URI(testData[i]);
scheme = uri.getScheme();
} catch (URISyntaxException ex) {
// leave the scheme null if URI is invalid
}
expectedStrings[i] = scheme;
}
try (ColumnVector v0 = ColumnVector.fromStrings(testData);
ColumnVector expected = ColumnVector.fromStrings(expectedStrings);
ColumnVector result = ParseURI.parseURIProtocol(v0);
) {
AssertUtils.assertColumnsAreEqual(expected, result);
}
}
}

0 comments on commit f33919a

Please sign in to comment.