@@ -10,7 +10,7 @@ use reqwest::Url;
10
10
use tokio:: sync:: mpsc;
11
11
use tokio_stream:: wrappers:: ReceiverStream ;
12
12
13
- use lychee_lib:: { Client , ErrorKind , Request , Response } ;
13
+ use lychee_lib:: { Client , ErrorKind , Request , Response , Uri } ;
14
14
use lychee_lib:: { InputSource , Result } ;
15
15
use lychee_lib:: { ResponseBody , Status } ;
16
16
46
46
47
47
let client = params. client ;
48
48
let cache = params. cache ;
49
+ let cache_exclude_status = params. cfg . cache_exclude_status . into_set ( ) ;
49
50
let accept = params. cfg . accept . into_set ( ) ;
50
51
51
52
let pb = if params. cfg . no_progress || params. cfg . verbose . log_level ( ) >= log:: Level :: Info {
61
62
max_concurrency,
62
63
client,
63
64
cache,
65
+ cache_exclude_status,
64
66
accept,
65
67
) ) ;
66
68
@@ -219,14 +221,22 @@ async fn request_channel_task(
219
221
max_concurrency : usize ,
220
222
client : Client ,
221
223
cache : Arc < Cache > ,
224
+ cache_exclude_status : HashSet < u16 > ,
222
225
accept : HashSet < u16 > ,
223
226
) {
224
227
StreamExt :: for_each_concurrent (
225
228
ReceiverStream :: new ( recv_req) ,
226
229
max_concurrency,
227
230
|request : Result < Request > | async {
228
231
let request = request. expect ( "cannot read request" ) ;
229
- let response = handle ( & client, cache. clone ( ) , request, accept. clone ( ) ) . await ;
232
+ let response = handle (
233
+ & client,
234
+ cache. clone ( ) ,
235
+ cache_exclude_status. clone ( ) ,
236
+ request,
237
+ accept. clone ( ) ,
238
+ )
239
+ . await ;
230
240
231
241
send_resp
232
242
. send ( response)
@@ -260,6 +270,7 @@ async fn check_url(client: &Client, request: Request) -> Response {
260
270
async fn handle (
261
271
client : & Client ,
262
272
cache : Arc < Cache > ,
273
+ cache_exclude_status : HashSet < u16 > ,
263
274
request : Request ,
264
275
accept : HashSet < u16 > ,
265
276
) -> Response {
@@ -287,16 +298,37 @@ async fn handle(
287
298
// benefit.
288
299
// - Skip caching unsupported URLs as they might be supported in a
289
300
// future run.
290
- // - Skip caching excluded links; they might not be excluded in the next run
301
+ // - Skip caching excluded links; they might not be excluded in the next run.
302
+ // - Skip caching links for which the status code has been explicitly excluded from the cache.
291
303
let status = response. status ( ) ;
292
- if uri . is_file ( ) || status. is_excluded ( ) || status . is_unsupported ( ) || status . is_unknown ( ) {
304
+ if ignore_cache ( & uri , status, & cache_exclude_status ) {
293
305
return response;
294
306
}
295
307
296
308
cache. insert ( uri, status. into ( ) ) ;
297
309
response
298
310
}
299
311
312
+ /// Returns `true` if the response should be ignored in the cache.
313
+ ///
314
+ /// The response should be ignored if:
315
+ /// - The URI is a file URI.
316
+ /// - The status is excluded.
317
+ /// - The status is unsupported.
318
+ /// - The status is unknown.
319
+ /// - The status code is excluded from the cache.
320
+ fn ignore_cache ( uri : & Uri , status : & Status , cache_exclude_status : & HashSet < u16 > ) -> bool {
321
+ let status_code_excluded = status
322
+ . code ( )
323
+ . map_or ( false , |code| cache_exclude_status. contains ( & code. as_u16 ( ) ) ) ;
324
+
325
+ uri. is_file ( )
326
+ || status. is_excluded ( )
327
+ || status. is_unsupported ( )
328
+ || status. is_unknown ( )
329
+ || status_code_excluded
330
+ }
331
+
300
332
fn show_progress (
301
333
output : & mut dyn Write ,
302
334
progress_bar : & Option < ProgressBar > ,
@@ -352,8 +384,9 @@ fn get_failed_urls(stats: &mut ResponseStats) -> Vec<(InputSource, Url)> {
352
384
#[ cfg( test) ]
353
385
mod tests {
354
386
use crate :: { formatters:: get_response_formatter, options} ;
387
+ use http:: StatusCode ;
355
388
use log:: info;
356
- use lychee_lib:: { CacheStatus , ClientBuilder , InputSource , Uri } ;
389
+ use lychee_lib:: { CacheStatus , ClientBuilder , ErrorKind , InputSource , Uri } ;
357
390
358
391
use super :: * ;
359
392
@@ -414,4 +447,55 @@ mod tests {
414
447
Status :: Error ( ErrorKind :: InvalidURI ( _) )
415
448
) ) ;
416
449
}
450
+
451
+ #[ test]
452
+ fn test_cache_by_default ( ) {
453
+ assert ! ( !ignore_cache(
454
+ & Uri :: try_from( "https://[::1]" ) . unwrap( ) ,
455
+ & Status :: Ok ( StatusCode :: OK ) ,
456
+ & HashSet :: default ( )
457
+ ) ) ;
458
+ }
459
+
460
+ #[ test]
461
+ // Cache is ignored for file URLs
462
+ fn test_cache_ignore_file_urls ( ) {
463
+ assert ! ( ignore_cache(
464
+ & Uri :: try_from( "file:///home" ) . unwrap( ) ,
465
+ & Status :: Ok ( StatusCode :: OK ) ,
466
+ & HashSet :: default ( )
467
+ ) ) ;
468
+ }
469
+
470
+ #[ test]
471
+ // Cache is ignored for unsupported status
472
+ fn test_cache_ignore_unsupported_status ( ) {
473
+ assert ! ( ignore_cache(
474
+ & Uri :: try_from( "https://[::1]" ) . unwrap( ) ,
475
+ & Status :: Unsupported ( ErrorKind :: EmptyUrl ) ,
476
+ & HashSet :: default ( )
477
+ ) ) ;
478
+ }
479
+
480
+ #[ test]
481
+ // Cache is ignored for unknown status
482
+ fn test_cache_ignore_unknown_status ( ) {
483
+ assert ! ( ignore_cache(
484
+ & Uri :: try_from( "https://[::1]" ) . unwrap( ) ,
485
+ & Status :: UnknownStatusCode ( StatusCode :: IM_A_TEAPOT ) ,
486
+ & HashSet :: default ( )
487
+ ) ) ;
488
+ }
489
+
490
+ #[ test]
491
+ fn test_cache_ignore_excluded_status ( ) {
492
+ // Cache is ignored for excluded status codes
493
+ let exclude = [ StatusCode :: OK . as_u16 ( ) ] . iter ( ) . copied ( ) . collect ( ) ;
494
+
495
+ assert ! ( ignore_cache(
496
+ & Uri :: try_from( "https://[::1]" ) . unwrap( ) ,
497
+ & Status :: Ok ( StatusCode :: OK ) ,
498
+ & exclude
499
+ ) ) ;
500
+ }
417
501
}
0 commit comments