Skip to content

Commit

Permalink
Improving the last changes to HtsPath (#1694)
Browse files Browse the repository at this point in the history
* fixing edge cases as well as documentation and tests from the change to HtsPath in #1693
  • Loading branch information
lbergelson authored Dec 13, 2023
1 parent 40e3a8b commit b23c36a
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 16 deletions.
42 changes: 30 additions & 12 deletions src/main/java/htsjdk/io/HtsPath.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
* encoding/escaping.
*
* For example, a URI that contains a scheme and has an embedded "#" in the path will be treated as a URI
* having a fragment delimiter. If the URI contains an scheme, the "#" will be escaped and the encoded "#"
* having a fragment delimiter. If the URI contains no scheme, the "#" will be escaped and the encoded "#"
* will be interpreted as part of the URI path.
*
* There are 3 succeeding levels of input validation/conversion:
Expand Down Expand Up @@ -69,6 +69,7 @@
*/
public class HtsPath implements IOPath, Serializable {
private static final long serialVersionUID = 1L;
private static final String HIERARCHICAL_SCHEME_SEPARATOR = "://";

private final String rawInputString; // raw input string provided by th user; may or may not have a scheme
private final URI uri; // working URI; always has a scheme ("file" if not otherwise specified)
Expand Down Expand Up @@ -255,7 +256,7 @@ private URI getURIForString(final String pathString) {
} catch (URISyntaxException uriException) {
//check that the uri wasn't a badly encoded absolute uri of some sort
//if you don't do this it will be treated as a badly formed file:// url
assertNoNonFileScheme(pathString, uriException);
assertNoProblematicScheme(pathString, uriException);

// the input string isn't a valid URI; assume its a local (non-URI) file reference, and
// use the URI resulting from the corresponding Path
Expand All @@ -282,19 +283,36 @@ private URI getURIForString(final String pathString) {
}

/**
* check that there isn't a non file scheme at the start of the path
* @param pathString
* @param cause
* Check for problems associated with the presence of a hierarchical scheme.
*
* It's better to reject cases like `://` or `ftp://I forgot to encode this` than to treat them as relative file paths
* It's almost certainly an error on the users part instead of an atttempt to intentionally reference a file named
* `file:///workingidr/ftp:/I forgot to encode this`
*
* Note this is only meant to be called in the case of a URLSyntaxException already having occured during initial
* parsing of the URI
*
* @param pathString the path being examined
* @param cause the original failure reason
*/
private static void assertNoNonFileScheme(String pathString, URISyntaxException cause){
final String[] split = pathString.split(":");
static void assertNoProblematicScheme(String pathString, URISyntaxException cause){
if(pathString.equals(HIERARCHICAL_SCHEME_SEPARATOR)){
throw new IllegalArgumentException(HIERARCHICAL_SCHEME_SEPARATOR + " is not a valid path.", cause);
}

final String[] split = pathString.split(HIERARCHICAL_SCHEME_SEPARATOR, -1);
final String scheme = split[0];

if(split.length == 2 && pathString.endsWith(HIERARCHICAL_SCHEME_SEPARATOR)) {
throw new IllegalArgumentException("A path consisting of only a scheme is not allowed: " + pathString, cause);
}

if(split.length > 1){
if(split[0] == null || split[0].isEmpty()){
throw new IllegalArgumentException("Malformed url " + pathString + " includes an empty scheme." +
"\nCheck that it is fully encoded.", cause);
if(scheme == null || scheme.isEmpty()){
throw new IllegalArgumentException("Malformed path " + pathString + " includes an empty scheme.", cause);
}
if(!split[0].equals("file")){
throw new IllegalArgumentException("Malformed url " + pathString + " includes a scheme: " + split[0] + ":// but was an invalid URI." +
if(!scheme.equals("file")){
throw new IllegalArgumentException("Malformed path " + pathString + " includes a scheme: " + scheme + ":// but was an invalid URI." +
"\nCheck that it is fully encoded.", cause);
}
}
Expand Down
77 changes: 73 additions & 4 deletions src/test/java/htsjdk/io/HtsPathUnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.ProviderNotFoundException;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;

public class HtsPathUnitTest extends HtsjdkTest {
Expand Down Expand Up @@ -114,7 +112,7 @@ public Object[][] validHtsPath() {

//*****************************************************************************************
// Reference that contain characters that require URI-encoding. If the input string is presented
// with no scheme, it will be be automatically encoded by HtsPath, otherwise it
// with no scheme, it will be automatically encoded by HtsPath, otherwise it
// must already be URI-encoded.
//*****************************************************************************************

Expand All @@ -126,6 +124,8 @@ public Object[][] validHtsPath() {
{"file:project/gvcf-pcr/23232_1#1/1.g.vcf.gz", "file:project/gvcf-pcr/23232_1#1/1.g.vcf.gz", true, false},
{"file:/project/gvcf-pcr/23232_1#1/1.g.vcf.gz", "file:/project/gvcf-pcr/23232_1#1/1.g.vcf.gz", true, false},
{"file:///project/gvcf-pcr/23232_1%231/1.g.vcf.g", "file:///project/gvcf-pcr/23232_1%231/1.g.vcf.g", true, true},

{"http://host/path://", "http://host/path://", false, false}
};
}

Expand Down Expand Up @@ -169,7 +169,15 @@ public Object[][] invalidHtsPath() {
// the nul character is rejected on all of the supported platforms in both local
// filenames and URIs, so use it to test HtsPath constructor failure on all platforms
{"\0"},
{"ftp://broad.org/file with space"} // this has a non-file scheme but isn't encoded properly

// this has a non-file scheme but isn't encoded properly, best to reject these with
// a helpful error message than to continue on and treat it as a file:// path
{"ftp://broad.org/file with space"},

// if you have a scheme you need something with it
{"file://"},
{"http://"}

};
}

Expand All @@ -193,11 +201,14 @@ public Object[][] invalidPath() {

{"unknownscheme://foobar"},
{"gendb://adb"},

{"gcs://abucket/bucket"},

// URIs with schemes that are backed by an valid NIO provider, but for which the
// scheme-specific part is not valid.
{"file://nonexistent_authority/path/to/file.bam"}, // unknown authority "nonexistent_authority"


};
}

Expand Down Expand Up @@ -649,4 +660,62 @@ private String getLocalFileAsURIPathString(final Path localPath) {
return localPath.toUri().normalize().getPath();
}

@DataProvider
public Object[][] getNonProblematic() {
return new Object[][]{
// URI is unencoded but no problems with a scheme
{"file/ name-sare/ :wierd-"},
{"hello there"},

//schemes schemes everywheret
{"file://://"},
{"file://://://"},
{"file://something://"},
{"file://something://somoethingelse"},

// file scheme is deliberately ignored here since it's handled specially later
{"file://unencoded file names/ are/ok!"},

//these aren't invalid URIs so they should never be tested by this method, they'll pass it though

{"eep/ee:e:e:ep"},
{"file:///"}
};
}
@Test(dataProvider = "getNonProblematic")
public void testAssertNoProblematicScheme(String path){
HtsPath.assertNoProblematicScheme(path, null);
}

@DataProvider
public Object[][] getProblematic(){
return new Object[][]{

// This is the primary use case, to detect unencoded uris that were intended to be encoded.
// Note that assertNoProblematicScheme doesn't check for issues constructing the URI itself
// it is only called after a URI parsing exception has already occured.
{"http://forgot.com/to encode"},
{"ftp://server.com/file name.txt"},

{"://"},
{"://://"},
{"://://://"},

//this is technically a valid file name, but it seems very unlikely that anyone would do this delierately
//better to call it out
{"://forgotmyscheme"},

//invalid URI, needs the rest of the path
{"file://"},
{"http://"},

//This gets rejected but it should never reach here because it's not an invalid URI
{"http://thisIsOkButItwouldNeverGetHere/something?file=thisone#righthere"},
};
}

@Test(dataProvider = "getProblematic", expectedExceptions = IllegalArgumentException.class)
public void testAssertNoProblematicSchemeRejectedCases(String path){
HtsPath.assertNoProblematicScheme(path, null);
}
}

0 comments on commit b23c36a

Please sign in to comment.