Skip to content

Commit 8148e03

Browse files
committed
Add a way to handle "pretty URLs", i.e. URIs without .html extension
In many circumstances (GitHub Pages, Apache configured with MultiViews, etc), web servers process URIs by appending the `.html` file extension when no file is found at the path specified by the URI but a `.html` file corresponding to that path _is_ found. To allow Lychee to use the fast, offline method of checking such files locally via the `file://` scheme, let's handle this scenario gracefully by adding the `--check-extensions=html` option. Note: This new option can take a list of file extensions to use; The first one for which a corresponding file is found is then used. Signed-off-by: Johannes Schindelin <[email protected]>
1 parent 9e031b6 commit 8148e03

File tree

7 files changed

+83
-6
lines changed

7 files changed

+83
-6
lines changed

README.md

+7
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,13 @@ Options:
389389
--remap <REMAP>
390390
Remap URI matching pattern to different URI
391391
392+
--check-extensions <CHECK_EXTENSIONS>
393+
Test the specified file extensions for URIs when checking files locally.
394+
Multiple extensions can be separated by commas. Extensions will be checked in
395+
order of appearance.
396+
397+
Example: --check-extensions html,htm,php,asp,aspx,jsp,cgi
398+
392399
--header <HEADER>
393400
Custom request header
394401

fixtures/check-extensions/index.html

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>For Testing pretty URLs</title>
6+
</head>
7+
<body>
8+
<a href="other">other</a>
9+
</body>
10+
</html>

fixtures/check-extensions/other.htm

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>For Testing pretty URLs</title>
6+
</head>
7+
<body>
8+
<a href="index">index</a>
9+
</body>
10+
</html>

lychee-bin/src/client.rs

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
7575
.require_https(cfg.require_https)
7676
.cookie_jar(cookie_jar.cloned())
7777
.include_fragments(cfg.include_fragments)
78+
.check_extensions(cfg.check_extensions.clone())
7879
.build()
7980
.client()
8081
.context("Failed to create request client")

lychee-bin/src/options.rs

+14
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,19 @@ pub(crate) struct Config {
300300
#[arg(long)]
301301
pub(crate) remap: Vec<String>,
302302

303+
/// Automatically append file extensions to `file://` URIs as needed
304+
#[serde(default)]
305+
#[arg(
306+
long,
307+
value_delimiter = ',',
308+
long_help = "Test the specified file extensions for URIs when checking files locally.
309+
Multiple extensions can be separated by commas. Extensions will be checked in
310+
order of appearance.
311+
312+
Example: --check-extensions html,htm,php,asp,aspx,jsp,cgi"
313+
)]
314+
pub(crate) check_extensions: Vec<String>,
315+
303316
/// Custom request header
304317
#[arg(long)]
305318
#[serde(default)]
@@ -439,6 +452,7 @@ impl Config {
439452
exclude_loopback: false;
440453
exclude_mail: false;
441454
remap: Vec::<String>::new();
455+
check_extensions: Vec::<String>::new();
442456
header: Vec::<String>::new();
443457
timeout: DEFAULT_TIMEOUT_SECS;
444458
retry_wait_time: DEFAULT_RETRY_WAIT_TIME_SECS;

lychee-bin/tests/cli.rs

+13
Original file line numberDiff line numberDiff line change
@@ -1556,4 +1556,17 @@ mod cli {
15561556
// 3 failures because of missing fragments
15571557
.stdout(contains("3 Errors"));
15581558
}
1559+
1560+
#[test]
1561+
fn test_check_extensions() {
1562+
let mut cmd = main_command();
1563+
let input = fixtures_path().join("check-extensions");
1564+
1565+
cmd.arg("--verbose")
1566+
.arg("--check-extensions=htm,html")
1567+
.arg(input)
1568+
.assert()
1569+
.success()
1570+
.stdout(contains("0 Errors"));
1571+
}
15591572
}

lychee-lib/src/client.rs

+28-6
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ pub struct ClientBuilder {
9595
/// make sure rules don't conflict with each other.
9696
remaps: Option<Remaps>,
9797

98+
/// Automatically append file extensions to `file://` URIs as needed
99+
check_extensions: Vec<String>,
100+
98101
/// Links matching this set of regular expressions are **always** checked.
99102
///
100103
/// This has higher precedence over [`ClientBuilder::excludes`], **but**
@@ -384,6 +387,7 @@ impl ClientBuilder {
384387
reqwest_client,
385388
github_client,
386389
remaps: self.remaps,
390+
check_extensions: self.check_extensions,
387391
filter,
388392
max_retries: self.max_retries,
389393
retry_wait_time: self.retry_wait_time,
@@ -412,6 +416,9 @@ pub struct Client {
412416
/// Optional remapping rules for URIs matching pattern.
413417
remaps: Option<Remaps>,
414418

419+
/// Automatically append file extensions to `file://` URIs as needed
420+
check_extensions: Vec<String>,
421+
415422
/// Rules to decided whether each link should be checked or ignored.
416423
filter: Filter,
417424

@@ -654,13 +661,28 @@ impl Client {
654661
let Ok(path) = uri.url.to_file_path() else {
655662
return ErrorKind::InvalidFilePath(uri.clone()).into();
656663
};
657-
if !path.exists() {
658-
return ErrorKind::InvalidFilePath(uri.clone()).into();
659-
}
660-
if self.include_fragments {
661-
self.check_fragment(&path, uri).await
664+
if path.exists() {
665+
if self.include_fragments {
666+
return self.check_fragment(&path, uri).await;
667+
}
668+
return Status::Ok(StatusCode::OK)
662669
} else {
663-
Status::Ok(StatusCode::OK)
670+
if path.extension().is_some() {
671+
return ErrorKind::InvalidFilePath(uri.clone()).into();
672+
}
673+
674+
// if the path has no file extension, try to append some
675+
let mut path_buf = path.clone();
676+
for ext in &self.check_extensions {
677+
path_buf.set_extension(ext);
678+
if path_buf.exists() {
679+
if self.include_fragments {
680+
return self.check_fragment(&path_buf, uri).await;
681+
}
682+
return Status::Ok(StatusCode::OK)
683+
}
684+
}
685+
ErrorKind::InvalidFilePath(uri.clone()).into()
664686
}
665687
}
666688

0 commit comments

Comments
 (0)