Skip to content

Commit 64acc73

Browse files
committed
Add a mode to handle "pretty URLs", i.e. URIs without .html extension
In many circumstances (GitHub Pages, Apache configured with MultiViews, etc), web servers process URIs by appending the `.html` file extension when no file is found at the path specified by the URI but a `.html` file corresponding to that path _is_ found. To allow Lychee to use the fast, offline method of checking such files locally via the `file://` scheme, let's handle this scenario gracefully by adding the `--auto-append-html-fileext` option. Signed-off-by: Johannes Schindelin <[email protected]>
1 parent e158d7b commit 64acc73

File tree

7 files changed

+72
-6
lines changed

7 files changed

+72
-6
lines changed

README.md

+4
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,10 @@ Options:
389389
--remap <REMAP>
390390
Remap URI matching pattern to different URI
391391
392+
--auto-append-html-fileext
393+
Automatically append `.html` to `file://` URLs when no file could be found
394+
at the specified `path`.
395+
392396
--header <HEADER>
393397
Custom request header
394398

fixtures/pretty-urls/index.html

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>For Testing pretty URLs</title>
6+
</head>
7+
<body>
8+
<a href="other">other</a>
9+
</body>
10+
</html>

fixtures/pretty-urls/other.html

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>For Testing pretty URLs</title>
6+
</head>
7+
<body>
8+
<a href="index">index</a>
9+
</body>
10+
</html>

lychee-bin/src/client.rs

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
7575
.require_https(cfg.require_https)
7676
.cookie_jar(cookie_jar.cloned())
7777
.include_fragments(cfg.include_fragments)
78+
.auto_append_html_fileext(cfg.auto_append_html_fileext)
7879
.build()
7980
.client()
8081
.context("Failed to create request client")

lychee-bin/src/options.rs

+10
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,15 @@ pub(crate) struct Config {
300300
#[arg(long)]
301301
pub(crate) remap: Vec<String>,
302302

303+
/// Automatically append `.html` to `file://` URIs as needed
304+
#[serde(default)]
305+
#[arg(
306+
long,
307+
long_help = "Automatically append `.html` to `file://` URLs when no file could be found
308+
at the specified `path`."
309+
)]
310+
pub(crate) auto_append_html_fileext: bool,
311+
303312
/// Custom request header
304313
#[arg(long)]
305314
#[serde(default)]
@@ -439,6 +448,7 @@ impl Config {
439448
exclude_loopback: false;
440449
exclude_mail: false;
441450
remap: Vec::<String>::new();
451+
auto_append_html_fileext: false;
442452
header: Vec::<String>::new();
443453
timeout: DEFAULT_TIMEOUT_SECS;
444454
retry_wait_time: DEFAULT_RETRY_WAIT_TIME_SECS;

lychee-bin/tests/cli.rs

+13
Original file line numberDiff line numberDiff line change
@@ -1556,4 +1556,17 @@ mod cli {
15561556
// 3 failures because of missing fragments
15571557
.stdout(contains("3 Errors"));
15581558
}
1559+
1560+
#[test]
1561+
fn test_pretty_urls() {
1562+
let mut cmd = main_command();
1563+
let input = fixtures_path().join("pretty-urls");
1564+
1565+
cmd.arg("--verbose")
1566+
.arg("--auto-append-html-fileext")
1567+
.arg(input)
1568+
.assert()
1569+
.success()
1570+
.stdout(contains("0 Errors"));
1571+
}
15591572
}

lychee-lib/src/client.rs

+24-6
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ pub struct ClientBuilder {
9595
/// make sure rules don't conflict with each other.
9696
remaps: Option<Remaps>,
9797

98+
/// Automatically append `.html` file extensions to `file://` URIs as needed
99+
auto_append_html_fileext: bool,
100+
98101
/// Links matching this set of regular expressions are **always** checked.
99102
///
100103
/// This has higher precedence over [`ClientBuilder::excludes`], **but**
@@ -384,6 +387,7 @@ impl ClientBuilder {
384387
reqwest_client,
385388
github_client,
386389
remaps: self.remaps,
390+
auto_append_html_fileext: self.auto_append_html_fileext,
387391
filter,
388392
max_retries: self.max_retries,
389393
retry_wait_time: self.retry_wait_time,
@@ -412,6 +416,9 @@ pub struct Client {
412416
/// Optional remapping rules for URIs matching pattern.
413417
remaps: Option<Remaps>,
414418

419+
/// Automatically append `.html` file extensions to `file://` URIs as needed
420+
auto_append_html_fileext: bool,
421+
415422
/// Rules to decided whether each link should be checked or ignored.
416423
filter: Filter,
417424

@@ -654,14 +661,25 @@ impl Client {
654661
let Ok(path) = uri.url.to_file_path() else {
655662
return ErrorKind::InvalidFilePath(uri.clone()).into();
656663
};
657-
if !path.exists() {
658-
return ErrorKind::InvalidFilePath(uri.clone()).into();
659-
}
660-
if self.include_fragments {
661-
self.check_fragment(&path, uri).await
664+
if path.exists() {
665+
if self.include_fragments {
666+
return self.check_fragment(&path, uri).await;
667+
}
662668
} else {
663-
Status::Ok(StatusCode::OK)
669+
// if the path does not end in `.html`, try to append it
670+
if !self.auto_append_html_fileext || path.extension().is_some() {
671+
return ErrorKind::InvalidFilePath(uri.clone()).into();
672+
}
673+
// append `.html` and try again
674+
let mut path_buf = path.clone();
675+
path_buf.set_extension("html");
676+
if !path_buf.exists() {
677+
return ErrorKind::InvalidFilePath(uri.clone()).into();
678+
} else if self.include_fragments {
679+
return self.check_fragment(&path_buf, uri).await;
680+
}
664681
}
682+
Status::Ok(StatusCode::OK)
665683
}
666684

667685
/// Checks a `file` URI's fragment.

0 commit comments

Comments
 (0)