@@ -10,7 +10,7 @@ use reqwest::Url;
10
10
use tokio:: sync:: mpsc;
11
11
use tokio_stream:: wrappers:: ReceiverStream ;
12
12
13
- use lychee_lib:: { Client , ErrorKind , Request , Response } ;
13
+ use lychee_lib:: { Client , ErrorKind , Request , Response , Uri } ;
14
14
use lychee_lib:: { InputSource , Result } ;
15
15
use lychee_lib:: { ResponseBody , Status } ;
16
16
46
46
47
47
let client = params. client ;
48
48
let cache = params. cache ;
49
+ let cache_exclude_status = params. cfg . cache_exclude_status . into_set ( ) ;
49
50
let accept = params. cfg . accept . into_set ( ) ;
50
51
51
52
let pb = if params. cfg . no_progress || params. cfg . verbose . log_level ( ) >= log:: Level :: Info {
61
62
max_concurrency,
62
63
client,
63
64
cache,
65
+ cache_exclude_status,
64
66
accept,
65
67
) ) ;
66
68
@@ -219,14 +221,22 @@ async fn request_channel_task(
219
221
max_concurrency : usize ,
220
222
client : Client ,
221
223
cache : Arc < Cache > ,
224
+ cache_exclude_status : HashSet < u16 > ,
222
225
accept : HashSet < u16 > ,
223
226
) {
224
227
StreamExt :: for_each_concurrent (
225
228
ReceiverStream :: new ( recv_req) ,
226
229
max_concurrency,
227
230
|request : Result < Request > | async {
228
231
let request = request. expect ( "cannot read request" ) ;
229
- let response = handle ( & client, cache. clone ( ) , request, accept. clone ( ) ) . await ;
232
+ let response = handle (
233
+ & client,
234
+ cache. clone ( ) ,
235
+ cache_exclude_status. clone ( ) ,
236
+ request,
237
+ accept. clone ( ) ,
238
+ )
239
+ . await ;
230
240
231
241
send_resp
232
242
. send ( response)
@@ -260,6 +270,7 @@ async fn check_url(client: &Client, request: Request) -> Response {
260
270
async fn handle (
261
271
client : & Client ,
262
272
cache : Arc < Cache > ,
273
+ cache_exclude_status : HashSet < u16 > ,
263
274
request : Request ,
264
275
accept : HashSet < u16 > ,
265
276
) -> Response {
@@ -287,16 +298,37 @@ async fn handle(
287
298
// benefit.
288
299
// - Skip caching unsupported URLs as they might be supported in a
289
300
// future run.
290
- // - Skip caching excluded links; they might not be excluded in the next run
301
+ // - Skip caching excluded links; they might not be excluded in the next run.
302
+ // - Skip caching links for which the status code has been explicitly excluded from the cache.
291
303
let status = response. status ( ) ;
292
- if uri . is_file ( ) || status. is_excluded ( ) || status . is_unsupported ( ) || status . is_unknown ( ) {
304
+ if ignore_cache ( & uri , status, & cache_exclude_status ) {
293
305
return response;
294
306
}
295
307
296
308
cache. insert ( uri, status. into ( ) ) ;
297
309
response
298
310
}
299
311
312
+ /// Returns `true` if the response should be ignored in the cache.
313
+ ///
314
+ /// The response should be ignored if:
315
+ /// - The URI is a file URI.
316
+ /// - The status is excluded.
317
+ /// - The status is unsupported.
318
+ /// - The status is unknown.
319
+ /// - The status code is excluded from the cache.
320
+ fn ignore_cache ( uri : & Uri , status : & Status , cache_exclude_status : & HashSet < u16 > ) -> bool {
321
+ let status_code_excluded = status
322
+ . code ( )
323
+ . map_or ( false , |code| cache_exclude_status. contains ( & code. as_u16 ( ) ) ) ;
324
+
325
+ uri. is_file ( )
326
+ || status. is_excluded ( )
327
+ || status. is_unsupported ( )
328
+ || status. is_unknown ( )
329
+ || status_code_excluded
330
+ }
331
+
300
332
fn show_progress (
301
333
output : & mut dyn Write ,
302
334
progress_bar : & Option < ProgressBar > ,
@@ -344,8 +376,9 @@ fn get_failed_urls(stats: &mut ResponseStats) -> Vec<(InputSource, Url)> {
344
376
#[ cfg( test) ]
345
377
mod tests {
346
378
use crate :: { formatters:: get_response_formatter, options} ;
379
+ use http:: StatusCode ;
347
380
use log:: info;
348
- use lychee_lib:: { CacheStatus , ClientBuilder , InputSource , Uri } ;
381
+ use lychee_lib:: { CacheStatus , ClientBuilder , ErrorKind , InputSource , Uri } ;
349
382
350
383
use super :: * ;
351
384
@@ -406,4 +439,55 @@ mod tests {
406
439
Status :: Error ( ErrorKind :: InvalidURI ( _) )
407
440
) ) ;
408
441
}
442
+
443
+ #[ test]
444
+ fn test_cache_by_default ( ) {
445
+ assert ! ( !ignore_cache(
446
+ & Uri :: try_from( "https://[::1]" ) . unwrap( ) ,
447
+ & Status :: Ok ( StatusCode :: OK ) ,
448
+ & HashSet :: default ( )
449
+ ) ) ;
450
+ }
451
+
452
+ #[ test]
453
+ // Cache is ignored for file URLs
454
+ fn test_cache_ignore_file_urls ( ) {
455
+ assert ! ( ignore_cache(
456
+ & Uri :: try_from( "file:///home" ) . unwrap( ) ,
457
+ & Status :: Ok ( StatusCode :: OK ) ,
458
+ & HashSet :: default ( )
459
+ ) ) ;
460
+ }
461
+
462
+ #[ test]
463
+ // Cache is ignored for unsupported status
464
+ fn test_cache_ignore_unsupported_status ( ) {
465
+ assert ! ( ignore_cache(
466
+ & Uri :: try_from( "https://[::1]" ) . unwrap( ) ,
467
+ & Status :: Unsupported ( ErrorKind :: EmptyUrl ) ,
468
+ & HashSet :: default ( )
469
+ ) ) ;
470
+ }
471
+
472
+ #[ test]
473
+ // Cache is ignored for unknown status
474
+ fn test_cache_ignore_unknown_status ( ) {
475
+ assert ! ( ignore_cache(
476
+ & Uri :: try_from( "https://[::1]" ) . unwrap( ) ,
477
+ & Status :: UnknownStatusCode ( StatusCode :: IM_A_TEAPOT ) ,
478
+ & HashSet :: default ( )
479
+ ) ) ;
480
+ }
481
+
482
+ #[ test]
483
+ fn test_cache_ignore_excluded_status ( ) {
484
+ // Cache is ignored for excluded status codes
485
+ let exclude = [ StatusCode :: OK . as_u16 ( ) ] . iter ( ) . copied ( ) . collect ( ) ;
486
+
487
+ assert ! ( ignore_cache(
488
+ & Uri :: try_from( "https://[::1]" ) . unwrap( ) ,
489
+ & Status :: Ok ( StatusCode :: OK ) ,
490
+ & exclude
491
+ ) ) ;
492
+ }
409
493
}
0 commit comments