From bd28b3fc089a48490e5dbbb442a1f0ee0771e993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Wiedenh=C3=B6ft?= Date: Sat, 16 May 2020 13:48:51 +0200 Subject: [PATCH] Correctly parse file URIs This changes the URI parser to allow URIs of the forms * scheme:/absolute/path * scheme:///absolute/path It does impact HTTP URI parsing in that HTTP URIs without an authority part are now allowed. Fixes issue #323 --- src/uri/mod.rs | 25 ++++++++++++++----------- src/uri/scheme.rs | 18 +++++++++--------- src/uri/tests.rs | 29 ++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 21 deletions(-) diff --git a/src/uri/mod.rs b/src/uri/mod.rs index 144c6208..ce1e2e76 100644 --- a/src/uri/mod.rs +++ b/src/uri/mod.rs @@ -802,10 +802,16 @@ fn parse_full(mut s: Bytes) -> Result { } Scheme2::Other(n) => { // Grab the protocol - let mut scheme = s.split_to(n + 3); + let scheme = s.split_to(n); - // Strip ://, TODO: truncate - let _ = scheme.split_off(n); + // Remove ":" or "://" but not ":/" + if s[1] == b'/' && s[2] == b'/' { + // Remove "://" + let _ = s.split_to(3); + } else { + // Remove ":" + let _ = s.split_to(1); + } // Allocate the ByteStr let val = unsafe { ByteStr::from_utf8_unchecked(scheme) }; @@ -834,11 +840,6 @@ fn parse_full(mut s: Bytes) -> Result { }); } - // Authority is required when absolute - if authority_end == 0 { - return Err(ErrorKind::InvalidFormat.into()); - } - let authority = s.split_to(authority_end); let authority = Authority { data: unsafe { ByteStr::from_utf8_unchecked(authority) }, @@ -901,11 +902,13 @@ impl PartialEq for Uri { other = &other[scheme.len()..]; - if &other[..3] != b"://" { + if &other[..3] == b"://" { + other = &other[3..]; + } else if &other[..1] == b":" { + other = &other[1..]; + } else { return false; } - - other = &other[3..]; } if let Some(auth) = self.authority() { diff --git a/src/uri/scheme.rs b/src/uri/scheme.rs index 682b11ee..6ff6bfcb 100644 --- a/src/uri/scheme.rs +++ b/src/uri/scheme.rs @@ -281,22 +281,22 @@ impl Scheme2 { } } - if s.len() > 3 { + if s.len() >= 3 { for i in 0..s.len() { let b = s[i]; match SCHEME_CHARS[b as usize] { b':' => { - // Not enough data remaining - if s.len() < i + 3 { - break; - } - - // Not a scheme - if &s[i + 1..i + 3] != b"//" { + // According to https://tools.ietf.org/html/rfc3986#section-3 the URI "x:" + // has scheme "x", but to differentiate from shortcuts like + // "localhost:3000", which should be handled equivalent to + // "http://localhost:3000" we only treat an URI part as a scheme if the ':' + // is followed by a '/'. + if (i + 1) >= s.len() || s[i + 1] != b'/' { break; } + // Check length if i > MAX_SCHEME_LEN { return Err(ErrorKind::SchemeTooLong.into()); } @@ -304,7 +304,7 @@ impl Scheme2 { // Return scheme return Ok(Scheme2::Other(i)); } - // Invald scheme character, abort + // Invalid scheme character, abort 0 => break, _ => {} } diff --git a/src/uri/tests.rs b/src/uri/tests.rs index 719cb94e..9398eef3 100644 --- a/src/uri/tests.rs +++ b/src/uri/tests.rs @@ -254,6 +254,34 @@ test_parse! { port = Port::from_str("1234").ok(), } +test_parse! { + test_uri_parse_scheme_single_slash, + "x:/y", + [], + + scheme = part!("x"), + path = "/y", +} + +test_parse! { + test_uri_parse_scheme_triple_slash, + "x:///y", + [], + + scheme = part!("x"), + path = "/y", +} + + +test_parse! { + test_uri_parse_scheme_file, + "file:/foo/bar", + ["file:///foo/bar"], + + scheme = part!("file"), + path = "/foo/bar", +} + test_parse! { test_userinfo1, "http://a:b@127.0.0.1:1234/", @@ -419,7 +447,6 @@ fn test_uri_parse_error() { Uri::from_str(s).unwrap_err(); } - err("http://"); err("htt:p//host"); err("hyper.rs/"); err("hyper.rs?key=val");