From f8f092ffc56fc4703e87a857a666a7b26ecfa6b8 Mon Sep 17 00:00:00 2001 From: sud Date: Wed, 5 Feb 2025 00:37:07 -0500 Subject: [PATCH 1/5] Sort compact/detailed/markdown error output by file path --- lychee-bin/src/formatters/stats/compact.rs | 2 +- lychee-bin/src/formatters/stats/detailed.rs | 2 +- lychee-bin/src/formatters/stats/markdown.rs | 2 +- lychee-bin/src/formatters/stats/mod.rs | 17 +++++++++++++++++ lychee-lib/src/types/input.rs | 15 +++++++++++++++ 5 files changed, 35 insertions(+), 3 deletions(-) diff --git a/lychee-bin/src/formatters/stats/compact.rs b/lychee-bin/src/formatters/stats/compact.rs index 066750d423..63324a3640 100644 --- a/lychee-bin/src/formatters/stats/compact.rs +++ b/lychee-bin/src/formatters/stats/compact.rs @@ -37,7 +37,7 @@ impl Display for CompactResponseStats { let response_formatter = get_response_formatter(&self.mode); - for (source, responses) in &stats.error_map { + for (source, responses) in super::sort_stat_map(&stats.error_map) { color!(f, BOLD_YELLOW, "[{}]:\n", source)?; for response in responses { writeln!( diff --git a/lychee-bin/src/formatters/stats/detailed.rs b/lychee-bin/src/formatters/stats/detailed.rs index 732d2d0898..2c83ee97b0 100644 --- a/lychee-bin/src/formatters/stats/detailed.rs +++ b/lychee-bin/src/formatters/stats/detailed.rs @@ -49,7 +49,7 @@ impl Display for DetailedResponseStats { let response_formatter = get_response_formatter(&self.mode); - for (source, responses) in &stats.error_map { + for (source, responses) in super::sort_stat_map(&stats.error_map) { // Using leading newlines over trailing ones (e.g. `writeln!`) // lets us avoid extra newlines without any additional logic. write!(f, "\n\nErrors in {source}")?; diff --git a/lychee-bin/src/formatters/stats/markdown.rs b/lychee-bin/src/formatters/stats/markdown.rs index 1ab507f1e7..dba76222e8 100644 --- a/lychee-bin/src/formatters/stats/markdown.rs +++ b/lychee-bin/src/formatters/stats/markdown.rs @@ -127,7 +127,7 @@ where { if !&map.is_empty() { writeln!(f, "\n## {name} per input")?; - for (source, responses) in map { + for (source, responses) in super::sort_stat_map(map) { writeln!(f, "\n### {name} in {source}\n")?; for response in responses { writeln!(f, "{}", write_stat(response)?)?; diff --git a/lychee-bin/src/formatters/stats/mod.rs b/lychee-bin/src/formatters/stats/mod.rs index 5a50c4eb0e..df90aeadb1 100644 --- a/lychee-bin/src/formatters/stats/mod.rs +++ b/lychee-bin/src/formatters/stats/mod.rs @@ -10,10 +10,27 @@ pub(crate) use json::Json; pub(crate) use markdown::Markdown; pub(crate) use raw::Raw; +use std::{ + collections::{HashMap, HashSet}, + fmt::Display +}; + use crate::stats::ResponseStats; +use lychee_lib::InputSource; use anyhow::Result; pub(crate) trait StatsFormatter { /// Format the stats of all responses and write them to stdout fn format(&self, stats: ResponseStats) -> Result>; } + +// Convert error_map to a sorted Vec of key-value pairs +fn sort_stat_map(error_map: &HashMap>) -> Vec<(&InputSource, &HashSet)> +where T: Display +{ + let mut errors: Vec<(&InputSource, &HashSet)> = error_map.iter().collect(); + + errors.sort_by(|(source, _), (other_source, _)| source.cmp(other_source)); + + errors +} \ No newline at end of file diff --git a/lychee-lib/src/types/input.rs b/lychee-lib/src/types/input.rs index c32be7feb8..dea7279d4e 100644 --- a/lychee-lib/src/types/input.rs +++ b/lychee-lib/src/types/input.rs @@ -104,6 +104,21 @@ impl Display for InputSource { } } +// Compare InputSources by their string representations +impl PartialOrd for InputSource { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + self.to_string().partial_cmp(&other.to_string()) + } +} + +impl Ord for InputSource { + #[inline] + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.to_string().cmp(&other.to_string()) + } +} + /// Lychee Input with optional file hint for parsing #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Input { From 6c886e755568e5693534488dd7a50bbb7cdea1e1 Mon Sep 17 00:00:00 2001 From: sud Date: Thu, 13 Feb 2025 19:09:21 -0500 Subject: [PATCH 2/5] - Modify sort_stats_map to sort HashMap values - Add unit/integration tests for sort_stats_map - Add human-sort dependency for natural sorting --- Cargo.lock | 7 ++ lychee-bin/Cargo.toml | 1 + lychee-bin/src/formatters/stats/compact.rs | 11 ++- lychee-bin/src/formatters/stats/detailed.rs | 17 +++- lychee-bin/src/formatters/stats/mod.rs | 100 ++++++++++++++++++-- lychee-bin/tests/cli.rs | 47 +++++++++ lychee-lib/src/types/input.rs | 15 --- 7 files changed, 167 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f14e7d4454..28ab087ba0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1871,6 +1871,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "human-sort" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "140a09c9305e6d5e557e2ed7cbc68e05765a7d4213975b87cb04920689cc6219" + [[package]] name = "humantime" version = "2.1.0" @@ -2469,6 +2475,7 @@ dependencies = [ "futures", "headers", "http 1.2.0", + "human-sort", "humantime", "humantime-serde", "indicatif", diff --git a/lychee-bin/Cargo.toml b/lychee-bin/Cargo.toml index b98d83b671..926ba5aec8 100644 --- a/lychee-bin/Cargo.toml +++ b/lychee-bin/Cargo.toml @@ -55,6 +55,7 @@ tokio = { version = "1.42.0", features = ["full"] } tokio-stream = "0.1.17" toml = "0.8.19" url = "2.5.4" +human-sort = "0.2.2" [dev-dependencies] assert_cmd = "2.0.16" diff --git a/lychee-bin/src/formatters/stats/compact.rs b/lychee-bin/src/formatters/stats/compact.rs index 63324a3640..9b1ed905de 100644 --- a/lychee-bin/src/formatters/stats/compact.rs +++ b/lychee-bin/src/formatters/stats/compact.rs @@ -47,9 +47,16 @@ impl Display for CompactResponseStats { )?; } - if let Some(suggestions) = &stats.suggestion_map.get(source) { + if let Some(suggestions) = stats.suggestion_map.get(source) { + // Sort suggestions + let mut sorted_suggestions: Vec<_> = suggestions.iter().collect(); + sorted_suggestions.sort_by(|a, b| { + let (a, b) = (a.to_string().to_lowercase(), b.to_string().to_lowercase()); + human_sort::compare(&a, &b) + }); + writeln!(f, "\n\u{2139} Suggestions")?; - for suggestion in *suggestions { + for suggestion in sorted_suggestions { writeln!(f, "{suggestion}")?; } } diff --git a/lychee-bin/src/formatters/stats/detailed.rs b/lychee-bin/src/formatters/stats/detailed.rs index 2c83ee97b0..746ba1dd34 100644 --- a/lychee-bin/src/formatters/stats/detailed.rs +++ b/lychee-bin/src/formatters/stats/detailed.rs @@ -60,12 +60,19 @@ impl Display for DetailedResponseStats { "\n{}", response_formatter.format_detailed_response(response) )?; + } - if let Some(suggestions) = &stats.suggestion_map.get(source) { - writeln!(f, "\nSuggestions in {source}")?; - for suggestion in *suggestions { - writeln!(f, "{suggestion}")?; - } + if let Some(suggestions) = stats.suggestion_map.get(source) { + // Sort suggestions + let mut sorted_suggestions: Vec<_> = suggestions.iter().collect(); + sorted_suggestions.sort_by(|a, b| { + let (a, b) = (a.to_string().to_lowercase(), b.to_string().to_lowercase()); + human_sort::compare(&a, &b) + }); + + writeln!(f, "\nSuggestions in {source}")?; + for suggestion in sorted_suggestions { + writeln!(f, "{suggestion}")?; } } } diff --git a/lychee-bin/src/formatters/stats/mod.rs b/lychee-bin/src/formatters/stats/mod.rs index df90aeadb1..b8365886b4 100644 --- a/lychee-bin/src/formatters/stats/mod.rs +++ b/lychee-bin/src/formatters/stats/mod.rs @@ -12,25 +12,107 @@ pub(crate) use raw::Raw; use std::{ collections::{HashMap, HashSet}, - fmt::Display + fmt::Display, }; use crate::stats::ResponseStats; -use lychee_lib::InputSource; use anyhow::Result; +use lychee_lib::InputSource; pub(crate) trait StatsFormatter { /// Format the stats of all responses and write them to stdout fn format(&self, stats: ResponseStats) -> Result>; } -// Convert error_map to a sorted Vec of key-value pairs -fn sort_stat_map(error_map: &HashMap>) -> Vec<(&InputSource, &HashSet)> -where T: Display +/// Convert a `ResponseStats` `HashMap` to a sorted Vec of key-value pairs +/// The returned keys and values are both sorted in natural, case-insensitive order +fn sort_stat_map(stat_map: &HashMap>) -> Vec<(&InputSource, Vec<&T>)> +where + T: Display, { - let mut errors: Vec<(&InputSource, &HashSet)> = error_map.iter().collect(); + let mut entries: Vec<_> = stat_map + .iter() + .map(|(source, responses)| { + let mut sorted_responses: Vec<&T> = responses.iter().collect(); + sorted_responses.sort_by(|a, b| { + let (a, b) = (a.to_string().to_lowercase(), b.to_string().to_lowercase()); + human_sort::compare(&a, &b) + }); + + (source, sorted_responses) + }) + .collect(); + + entries.sort_by(|(a, _), (b, _)| human_sort::compare(&a.to_string(), &b.to_string())); + + entries +} + +#[cfg(test)] +mod tests { + use super::*; + + // use crate::stats::ResponseStats; + + use lychee_lib::{ErrorKind, Response, Status, Uri}; + use url::Url; + + fn make_test_url(url: &str) -> Url { + Url::parse(url).expect("Expected valid Website URI") + } - errors.sort_by(|(source, _), (other_source, _)| source.cmp(other_source)); + fn make_test_response(url: &str, source: InputSource) -> Response { + let uri = Uri::from(make_test_url(url)); - errors -} \ No newline at end of file + Response::new(uri, Status::Error(ErrorKind::InvalidUrlHost), source) + } + + #[test] + fn test_sorted_stat_map() { + let mut test_stats = ResponseStats::default(); + + // Sorted list of test sources + let test_sources = vec![ + InputSource::RemoteUrl(Box::new(make_test_url("https://example.com/404"))), + InputSource::RemoteUrl(Box::new(make_test_url("https://example.com/home"))), + InputSource::RemoteUrl(Box::new(make_test_url("https://example.com/page/1"))), + InputSource::RemoteUrl(Box::new(make_test_url("https://example.com/page/10"))), + ]; + + // Sorted list of test responses + let test_response_urls = vec![ + "https://example.com/", + "https://github.com/", + "https://itch.io/", + "https://youtube.com/", + ]; + + // Add responses to stats + // Responses are added to a HashMap, so the order is not preserved + for source in test_sources.iter() { + for response in test_response_urls.iter() { + test_stats.add(make_test_response(response, source.clone())); + } + } + + // Sort error map and extract the sources + let sorted_errors = sort_stat_map(&test_stats.error_map); + let sorted_sources: Vec = sorted_errors + .iter() + .map(|(source, _)| (*source).clone()) + .collect(); + + // Check that the input sources are sorted + assert_eq!(test_sources, sorted_sources); + + // Check that the responses are sorted + for (_, response_bodies) in sorted_errors.into_iter() { + let response_urls: Vec<&str> = response_bodies + .into_iter() + .map(|response| response.uri.as_str()) + .collect(); + + assert_eq!(test_response_urls, response_urls); + } + } +} diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 5479d37ae8..68168e6e4b 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -1940,4 +1940,51 @@ mod cli { Ok(()) } + + #[test] + fn test_sorted_error_output() -> Result<()> { + let test_files = ["TEST_GITHUB_404.md", "TEST_INVALID_URLS.html"]; + + let test_urls = [ + "https://httpbin.org/status/404", + "https://httpbin.org/status/500", + "https://httpbin.org/status/502", + ]; + + let mut cmd = &mut main_command() + .arg("--format") + .arg("compact") + .arg(fixtures_path().join(test_files[1])) + .arg(fixtures_path().join(test_files[0])) + .assert() + .failure() + .code(2); + + let output = String::from_utf8_lossy(&cmd.get_output().stdout); + let mut position: usize = 0; + + // Check that the input sources are sorted + for file in test_files.into_iter() { + assert!(output.contains(file)); + + let next_position = output.find(file).unwrap(); + + assert!(next_position > position); + position = next_position; + } + + position = 0; + + // Check that the responses are sorted + for url in test_urls.into_iter() { + assert!(output.contains(url)); + + let next_position = output.find(url).unwrap(); + + assert!(next_position > position); + position = next_position; + } + + Ok(()) + } } diff --git a/lychee-lib/src/types/input.rs b/lychee-lib/src/types/input.rs index dea7279d4e..c32be7feb8 100644 --- a/lychee-lib/src/types/input.rs +++ b/lychee-lib/src/types/input.rs @@ -104,21 +104,6 @@ impl Display for InputSource { } } -// Compare InputSources by their string representations -impl PartialOrd for InputSource { - #[inline] - fn partial_cmp(&self, other: &Self) -> Option { - self.to_string().partial_cmp(&other.to_string()) - } -} - -impl Ord for InputSource { - #[inline] - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.to_string().cmp(&other.to_string()) - } -} - /// Lychee Input with optional file hint for parsing #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Input { From f08750cd592d67f13445db76a00aaf5f3b77f613 Mon Sep 17 00:00:00 2001 From: sud Date: Thu, 13 Feb 2025 19:33:03 -0500 Subject: [PATCH 3/5] Fix warnings reported by GitHub checks --- lychee-bin/src/formatters/stats/mod.rs | 8 ++++---- lychee-bin/tests/cli.rs | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lychee-bin/src/formatters/stats/mod.rs b/lychee-bin/src/formatters/stats/mod.rs index b8365886b4..1ad316c374 100644 --- a/lychee-bin/src/formatters/stats/mod.rs +++ b/lychee-bin/src/formatters/stats/mod.rs @@ -61,8 +61,8 @@ mod tests { Url::parse(url).expect("Expected valid Website URI") } - fn make_test_response(url: &str, source: InputSource) -> Response { - let uri = Uri::from(make_test_url(url)); + fn make_test_response(url_str: &str, source: InputSource) -> Response { + let uri = Uri::from(make_test_url(url_str)); Response::new(uri, Status::Error(ErrorKind::InvalidUrlHost), source) } @@ -89,7 +89,7 @@ mod tests { // Add responses to stats // Responses are added to a HashMap, so the order is not preserved - for source in test_sources.iter() { + for source in &test_sources { for response in test_response_urls.iter() { test_stats.add(make_test_response(response, source.clone())); } @@ -106,7 +106,7 @@ mod tests { assert_eq!(test_sources, sorted_sources); // Check that the responses are sorted - for (_, response_bodies) in sorted_errors.into_iter() { + for (_, response_bodies) in sorted_errors { let response_urls: Vec<&str> = response_bodies .into_iter() .map(|response| response.uri.as_str()) diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 68168e6e4b..04ca94a1c2 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -1951,7 +1951,7 @@ mod cli { "https://httpbin.org/status/502", ]; - let mut cmd = &mut main_command() + let cmd = &mut main_command() .arg("--format") .arg("compact") .arg(fixtures_path().join(test_files[1])) @@ -1964,7 +1964,7 @@ mod cli { let mut position: usize = 0; // Check that the input sources are sorted - for file in test_files.into_iter() { + for file in test_files { assert!(output.contains(file)); let next_position = output.find(file).unwrap(); @@ -1976,7 +1976,7 @@ mod cli { position = 0; // Check that the responses are sorted - for url in test_urls.into_iter() { + for url in test_urls { assert!(output.contains(url)); let next_position = output.find(url).unwrap(); From e17e588409194f8b68b7967bbcf4d5d148ec2680 Mon Sep 17 00:00:00 2001 From: sud Date: Thu, 13 Feb 2025 19:52:09 -0500 Subject: [PATCH 4/5] Fix clippy warning - Fix clippy warning - Make entry sorting case-insensitive in sort_stat_map --- lychee-bin/src/formatters/stats/mod.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lychee-bin/src/formatters/stats/mod.rs b/lychee-bin/src/formatters/stats/mod.rs index 1ad316c374..6cf50eac9d 100644 --- a/lychee-bin/src/formatters/stats/mod.rs +++ b/lychee-bin/src/formatters/stats/mod.rs @@ -43,7 +43,10 @@ where }) .collect(); - entries.sort_by(|(a, _), (b, _)| human_sort::compare(&a.to_string(), &b.to_string())); + entries.sort_by(|(a, _), (b, _)| { + let (a, b) = (a.to_string().to_lowercase(), b.to_string().to_lowercase()); + human_sort::compare(&a, &b) + }); entries } @@ -52,8 +55,6 @@ where mod tests { use super::*; - // use crate::stats::ResponseStats; - use lychee_lib::{ErrorKind, Response, Status, Uri}; use url::Url; From 791f20c0bf66e54223d2790545c244547231f1c3 Mon Sep 17 00:00:00 2001 From: sud Date: Thu, 13 Feb 2025 19:59:10 -0500 Subject: [PATCH 5/5] Fix clippy warning --- lychee-bin/src/formatters/stats/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lychee-bin/src/formatters/stats/mod.rs b/lychee-bin/src/formatters/stats/mod.rs index 6cf50eac9d..00e3c54bf5 100644 --- a/lychee-bin/src/formatters/stats/mod.rs +++ b/lychee-bin/src/formatters/stats/mod.rs @@ -91,7 +91,7 @@ mod tests { // Add responses to stats // Responses are added to a HashMap, so the order is not preserved for source in &test_sources { - for response in test_response_urls.iter() { + for response in &test_response_urls { test_stats.add(make_test_response(response, source.clone())); } }