diff --git a/idna/Cargo.toml b/idna/Cargo.toml index 60024732..9db96d48 100644 --- a/idna/Cargo.toml +++ b/idna/Cargo.toml @@ -8,7 +8,7 @@ repository = "https://github.com/servo/rust-url/" license = "MIT OR Apache-2.0" autotests = false edition = "2018" -rust-version = "1.57" # For panic in const context +rust-version = "1.63" # For panic in const context [lib] doctest = false diff --git a/url/src/lib.rs b/url/src/lib.rs index e015acce..b953d349 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -1757,6 +1757,39 @@ impl Url { let old_after_path_pos = to_u32(self.serialization.len()).unwrap(); let cannot_be_a_base = self.cannot_be_a_base(); let scheme_type = SchemeType::from(self.scheme()); + let mut path_empty = false; + + // Check ':' and then see if the next character is '/' + let mut has_host = if let Some(index) = self.serialization.find(":") { + if self.serialization.len() > index + 1 + && self.serialization.as_bytes().get(index + 1) == Some(&b'/') + { + let rest = &self.serialization[(index + ":/".len())..]; + let host_part = rest.split('/').next().unwrap_or(""); + path_empty = rest.is_empty(); + !host_part.is_empty() && !host_part.contains('@') + } else { + false + } + } else { + false + }; + + // Ensure the path length is greater than 1 to account + // for cases where "/." is already appended from serialization + // If we set path, then we already checked the other two conditions: + // https://url.spec.whatwg.org/#url-serializing + // 1. The host is null + // 2. the first segment of the URL's path is an empty string + if path.len() > 1 { + if let Some(index) = self.serialization.find(":") { + let removal_start = index + ":".len(); + if self.serialization[removal_start..].starts_with("/.") { + self.path_start -= "/.".len() as u32; + } + } + } + self.serialization.truncate(self.path_start as usize); self.mutate(|parser| { if cannot_be_a_base { @@ -1766,7 +1799,6 @@ impl Url { } parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path)); } else { - let mut has_host = true; // FIXME parser.parse_path_start( scheme_type, &mut has_host, @@ -1774,6 +1806,26 @@ impl Url { ); } }); + + // For cases where normalization is applied across both the serialization and the path. + // Append "/." immediately after the scheme (up to ":") + // This is done if three conditions are met. + // https://url.spec.whatwg.org/#url-serializing + // 1. The host is null + // 2. The url's path length is greater than 1 + // 3. the first segment of the URL's path is an empty string + if !has_host && path.len() > 1 && path_empty { + if let Some(index) = self.serialization.find(":") { + if self.serialization.len() > index + 2 + && self.serialization.as_bytes().get(index + 1) == Some(&b'/') + && self.serialization.as_bytes().get(index + 2) == Some(&b'/') + { + self.serialization.insert_str(index + ":".len(), "/."); + self.path_start += "/.".len() as u32; + } + } + } + self.restore_after_path(old_after_path_pos, &after_path); } @@ -2077,7 +2129,7 @@ impl Url { } else { self.host_end }; - let suffix = self.slice(old_suffix_pos..).to_owned(); + let mut suffix = self.slice(old_suffix_pos..).to_owned(); self.serialization.truncate(self.host_start as usize); if !self.has_authority() { debug_assert!(self.slice(self.scheme_end..self.host_start) == ":"); @@ -2091,6 +2143,13 @@ impl Url { self.host_end = to_u32(self.serialization.len()).unwrap(); self.host = host.into(); + // Adjust serialization to switch from host to empty segment + if suffix.starts_with("/.//") { + suffix.drain(.."/.".len()); + // pathname should be "//p" not "p" given that the first segment was empty + self.path_start -= "//".len() as u32; + } + if let Some(new_port) = opt_new_port { self.port = new_port; if let Some(port) = new_port { diff --git a/url/src/path_segments.rs b/url/src/path_segments.rs index 5cc8e775..6ecc08af 100644 --- a/url/src/path_segments.rs +++ b/url/src/path_segments.rs @@ -239,7 +239,7 @@ impl<'a> PathSegmentsMut<'a> { I::Item: AsRef, { let scheme_type = SchemeType::from(self.url.scheme()); - let path_start = self.url.path_start as usize; + let mut path_start = self.url.path_start as usize; self.url.mutate(|parser| { parser.context = parser::Context::PathSegmentSetter; for segment in segments { @@ -253,7 +253,44 @@ impl<'a> PathSegmentsMut<'a> { { parser.serialization.push('/'); } - let mut has_host = true; // FIXME account for this? + + let mut path_empty = false; + + // Check ':' and then see if the next character is '/' + let mut has_host = if let Some(index) = parser.serialization.find(":") { + if parser.serialization.len() > index + 1 + && parser.serialization.as_bytes().get(index + 1) == Some(&b'/') + { + let rest = &parser.serialization[(index + ":/".len())..]; + let host_part = rest.split('/').next().unwrap_or(""); + path_empty = rest.is_empty(); + !host_part.is_empty() && !host_part.contains('@') + } else { + false + } + } else { + false + }; + + // For cases where normalization is applied across both the serialization and the path. + // Append "/." immediately after the scheme (up to ":") + // This is done if three conditions are met. + // https://url.spec.whatwg.org/#url-serializing + // 1. The host is null + // 2. The url's path length is greater than 1 + // 3. the first segment of the URL's path is an empty string + if !has_host && segment.len() > 1 && path_empty { + if let Some(index) = parser.serialization.find(":") { + if parser.serialization.len() == index + 2 + && parser.serialization.as_bytes().get(index + 1) == Some(&b'/') + { + // Append an extra '/' to ensure that "/./path" becomes "/.//path" + parser.serialization.insert_str(index + ":".len(), "/./"); + path_start += "/.".len(); + } + } + } + parser.parse_path( scheme_type, &mut has_host, @@ -262,6 +299,7 @@ impl<'a> PathSegmentsMut<'a> { ); } }); + self.url.path_start = path_start as u32; self } } diff --git a/url/tests/expected_failures.txt b/url/tests/expected_failures.txt index 899e7f70..9bf60b34 100644 --- a/url/tests/expected_failures.txt +++ b/url/tests/expected_failures.txt @@ -36,14 +36,8 @@ set hostname to set hostname to - set hostname to - set hostname to <> set pathname to <> set href to set pathname to <\\\\> set pathname to set pathname to - set pathname to - set pathname to - set pathname to - set pathname to

diff --git a/url/tests/unit.rs b/url/tests/unit.rs index b3596610..f78372ca 100644 --- a/url/tests/unit.rs +++ b/url/tests/unit.rs @@ -1379,3 +1379,46 @@ fn serde_error_message() { r#"relative URL without a base: "§invalid#+#*Ä" at line 1 column 25"# ); } + +#[test] +fn test_can_be_a_base_with_set_path() { + use url::quirks; + let mut url = Url::parse("web+demo:/").unwrap(); + assert!(!url.cannot_be_a_base()); + + url.set_path("//not-a-host"); + assert_eq!(url.path(), "//not-a-host"); + + let segments: Vec<_> = url + .path_segments() + .expect("should have path segments") + .collect(); + + assert_eq!(segments, vec!["", "not-a-host"]); + + assert_eq!(url.as_str(), "web+demo:/.//not-a-host"); + quirks::set_hostname(&mut url, "test").unwrap(); + assert_eq!(url.as_str(), "web+demo://test//not-a-host"); + quirks::set_hostname(&mut url, "").unwrap(); + assert_eq!(url.as_str(), "web+demo:////not-a-host"); +} + +#[test] +fn test_can_be_a_base_with_path_segments_mut() { + let mut url = Url::parse("web+demo:/").unwrap(); + assert!(!url.cannot_be_a_base()); + + url.path_segments_mut() + .expect("should have path segments") + .push("") + .push("not-a-host"); + + assert_eq!(url.as_str(), "web+demo:/.//not-a-host"); + assert_eq!(url.path(), "//not-a-host"); + + let segments: Vec<_> = url + .path_segments() + .expect("should have path segments") + .collect(); + assert_eq!(segments, vec!["", "not-a-host"]); +}