Skip to content

Commit

Permalink
Normalize URL paths: convert /.//p, /..//p, and //p to p
Browse files Browse the repository at this point in the history
  • Loading branch information
theskim committed Dec 16, 2024
1 parent 4c6c2a4 commit 6ff7929
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 5 deletions.
54 changes: 53 additions & 1 deletion url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1757,6 +1757,39 @@ impl Url {
let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
let cannot_be_a_base = self.cannot_be_a_base();
let scheme_type = SchemeType::from(self.scheme());
let mut path_empty = false;

// Check ':' and then see if the next character is '/'
let mut has_host = if let Some(index) = self.serialization.find(":") {
if self.serialization.len() > index + 1
&& self.serialization.as_bytes().get(index + 1) == Some(&b'/')
{
let rest = &self.serialization[(index + ":/".len())..];
let host_part = rest.split('/').next().unwrap_or("");
path_empty = rest.is_empty();
!host_part.is_empty() && !host_part.contains('@')
} else {
false
}
} else {
false
};

// Ensure the path length is greater than 1 to account
// for cases where "/." is already appended from serialization
// If we set path, then we already checked the other two conditions:
// https://url.spec.whatwg.org/#url-serializing
// 1. The host is null
// 2. the first segment of the URL's path is an empty string
if path.len() > 1 {
if let Some(index) = self.serialization.find(":") {
let removal_start = index + ":".len();
if self.serialization[removal_start..].starts_with("/.") {
self.path_start = removal_start as u32;
}
}
}

self.serialization.truncate(self.path_start as usize);
self.mutate(|parser| {
if cannot_be_a_base {
Expand All @@ -1766,14 +1799,33 @@ impl Url {
}
parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
} else {
let mut has_host = true; // FIXME
parser.parse_path_start(
scheme_type,
&mut has_host,
parser::Input::new_no_trim(path),
);
}
});

// For cases where normalization is applied across both the serialization and the path.
// Append "/." immediately after the scheme (up to ":")
// This is done if three conditions are met.
// https://url.spec.whatwg.org/#url-serializing
// 1. The host is null
// 2. The url's path length is greater than 1
// 3. the first segment of the URL's path is an empty string
if !has_host && path.len() > 1 && path_empty {
if let Some(index) = self.serialization.find(":") {
if self.serialization.len() > index + 2
&& self.serialization.as_bytes().get(index + 1) == Some(&b'/')
&& self.serialization.as_bytes().get(index + 2) == Some(&b'/')
{
self.serialization.insert_str(index + ":".len(), "/.");
self.path_start += "/.".len() as u32;
}
}
}

self.restore_after_path(old_after_path_pos, &after_path);
}

Expand Down
4 changes: 0 additions & 4 deletions url/tests/expected_failures.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,3 @@
<file://monkey/> set pathname to <\\\\>
<file:///unicorn> set pathname to <//\\/>
<file:///unicorn> set pathname to <//monkey/..//>
<non-spec:/> set pathname to </.//p>
<non-spec:/> set pathname to </..//p>
<non-spec:/> set pathname to <//p>
<non-spec:/.//> set pathname to <p>
36 changes: 36 additions & 0 deletions url/tests/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1422,3 +1422,39 @@ fn test_can_be_a_base_with_path_segments_mut() {
.collect();
assert_eq!(segments, vec!["", "not-a-host"]);
}

#[test]
fn test_fuzzing_uri_failures() {
use url::quirks;
let mut url = Url::parse("data:/.dummy.path").unwrap();
assert!(!url.cannot_be_a_base());

url.set_path(".dummy.path");
assert_eq!(url.as_str(), "data:/.dummy.path");
assert_eq!(url.path(), "/.dummy.path");

url.path_segments_mut()
.expect("should have path segments")
.push(".another.dummy.path");
assert_eq!(url.as_str(), "data:/.dummy.path/.another.dummy.path");
assert_eq!(url.path(), "/.dummy.path/.another.dummy.path");

url = Url::parse("web+demo:/").unwrap();
assert!(!url.cannot_be_a_base());

url.set_path("//.dummy.path");
assert_eq!(url.path(), "//.dummy.path");

let segments: Vec<_> = url
.path_segments()
.expect("should have path segments")
.collect();

assert_eq!(segments, vec!["", ".dummy.path"]);

assert_eq!(url.as_str(), "web+demo:/.//.dummy.path");
quirks::set_hostname(&mut url, ".dummy.host").unwrap();
assert_eq!(url.as_str(), "web+demo://.dummy.host//.dummy.path");
quirks::set_hostname(&mut url, "").unwrap();
assert_eq!(url.as_str(), "web+demo:////.dummy.path");
}

0 comments on commit 6ff7929

Please sign in to comment.