Skip to content

Commit

Permalink
Merge pull request #233 from serokell/aeqz/#25-#218-redirect-links-ma…
Browse files Browse the repository at this point in the history
…nagement

[#218] Change redirects default behaviour
  • Loading branch information
aeqz authored Dec 12, 2022
2 parents 997c438 + 9c5f5f8 commit 2b9bf25
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 57 deletions.
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ Unreleased
* [#229](https://github.com/serokell/xrefcheck/pull/229)
+ Now we call references to anchors in current file (e.g. `[a](#b)`) as
`file-local` references instead of calling them `current file` (which was ambiguous).
* [#233](https://github.com/serokell/xrefcheck/pull/233)
+ Now xrefxcheck does not follow redirect links by default. It fails for permanent
redirect responses (i.e. 301 and 308) and passes for temporary ones (i.e. 302, 303, 307).

0.2.2
==========
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ There are several ways to fix this:
* This behavior can be disabled by setting `ignoreAuthFailures: false` in the config file.

1. How does `xrefcheck` handle redirects?
* `xrefcheck` follows up to 10 HTTP redirects.
* Permanent redirects (i.e. 301 and 308) are reported as errors.
* Temporary redirects (i.e. 302, 303 and 307) are assumed to be valid.

1. How does `xrefcheck` handle localhost links?
* By default, `xrefcheck` will ignore links to localhost.
Expand Down
49 changes: 42 additions & 7 deletions src/Xrefcheck/Verify.hs
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ import Network.FTP.Client
import Network.HTTP.Client
(HttpException (..), HttpExceptionContent (..), Response, responseHeaders, responseStatus)
import Network.HTTP.Req
(AllowsBody, CanHaveBody (NoBody), GET (..), HEAD (..), HttpBodyAllowed, HttpException (..),
HttpMethod, NoReqBody (..), defaultHttpConfig, ignoreResponse, req, runReq, useURI)
(AllowsBody, CanHaveBody (NoBody), GET (..), HEAD (..), HttpBodyAllowed,
HttpConfig (httpConfigRedirectCount), HttpException (..), HttpMethod, NoReqBody (..),
defaultHttpConfig, ignoreResponse, req, runReq, useURI)
import Network.HTTP.Types.Header (hRetryAfter)
import Network.HTTP.Types.Status (Status, statusCode, statusMessage)
import System.FilePath.Posix
Expand All @@ -67,6 +68,7 @@ import URI.ByteString qualified as URIBS

import Control.Exception.Safe (handleAsync, handleJust)
import Data.Bits (toIntegralSized)
import Data.List (lookup)
import Xrefcheck.Config
import Xrefcheck.Core
import Xrefcheck.Orphans ()
Expand Down Expand Up @@ -133,6 +135,7 @@ data VerifyError
| ExternalFtpException FTPException
| FtpEntryDoesNotExist FilePath
| ExternalResourceSomeError Text
| PermanentRedirectError Text (Maybe Text)
deriving stock (Show, Eq)

instance Given ColorMode => Buildable VerifyError where
Expand Down Expand Up @@ -236,6 +239,20 @@ instance Given ColorMode => Buildable VerifyError where
#{err}
|]

PermanentRedirectError url Nothing ->
[int||
Permanent redirect found:
#{url}
|]

PermanentRedirectError url (Just redirectedUrl) ->
[int||
Permanent redirect found. Perhaps you want to replace the link:
#{url}
by:
#{redirectedUrl}
|]

reportVerifyErrs
:: Given ColorMode => NonEmpty (WithReferenceLoc VerifyError) -> IO ()
reportVerifyErrs errs = fmt
Expand Down Expand Up @@ -670,6 +687,9 @@ checkExternalResource Config{..} link
e | isFixable e -> throwError e
_ -> makeHttpRequest uri GET 0.7

httpConfig :: HttpConfig
httpConfig = defaultHttpConfig { httpConfigRedirectCount = 0 }

makeHttpRequest
:: (HttpMethod method, HttpBodyAllowed (AllowsBody method) 'NoBody)
=> URI
Expand All @@ -685,18 +705,25 @@ checkExternalResource Config{..} link
Just u -> pure u
let reqLink = case parsedUrl of
Left (url, option) ->
runReq defaultHttpConfig $
req method url NoReqBody ignoreResponse option
runReq httpConfig $
req method url NoReqBody ignoreResponse option
Right (url, option) ->
runReq defaultHttpConfig $
req method url NoReqBody ignoreResponse option
runReq httpConfig $
req method url NoReqBody ignoreResponse option

let maxTime = Time @Second $ unTime ncExternalRefCheckTimeout * timeoutFrac

mres <- liftIO (timeout maxTime $ void reqLink) `catch`
(either throwError (\() -> return (Just ())) . interpretErrors)
maybe (throwError $ ExternalResourceSomeError "Response timeout") pure mres

isTemporaryRedirectCode :: Int -> Bool
isTemporaryRedirectCode = flip elem [302, 303, 307]

isPermanentRedirectCode :: Int -> Bool
isPermanentRedirectCode = flip elem [301, 308]

isAllowedErrorCode :: Int -> Bool
isAllowedErrorCode = or . sequence
-- We have to stay conservative - if some URL can be accessed under
-- some circumstances, we should do our best to report it as fine.
Expand All @@ -712,10 +739,18 @@ checkExternalResource Config{..} link
InvalidUrlException{} -> error "External link URL invalid exception"
HttpExceptionRequest _ exc -> case exc of
StatusCodeException resp _
| isAllowedErrorCode (statusCode $ responseStatus resp) -> Right ()
| isPermanentRedirectCode code -> Left
. PermanentRedirectError link
. fmap decodeUtf8
. lookup "Location"
$ responseHeaders resp
| isTemporaryRedirectCode code -> Right ()
| isAllowedErrorCode code -> Right ()
| otherwise -> case statusCode (responseStatus resp) of
429 -> Left . ExternalHttpTooManyRequests $ retryAfterInfo resp
_ -> Left . ExternalHttpResourceUnavailable $ responseStatus resp
where
code = statusCode $ responseStatus resp
other -> Left . ExternalResourceSomeError $ show other
where
retryAfterInfo :: Response a -> Maybe RetryAfter
Expand Down
79 changes: 79 additions & 0 deletions tests/Test/Xrefcheck/RedirectRequestsSpec.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
{- SPDX-FileCopyrightText: 2021 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-}

module Test.Xrefcheck.RedirectRequestsSpec where

import Universum

import Data.CaseInsensitive qualified as CI
import Data.Map qualified as M
import Network.HTTP.Types (Status, mkStatus)
import Network.HTTP.Types.Header (hLocation)
import Test.Tasty (TestName, TestTree, testGroup)
import Test.Tasty.HUnit (Assertion, testCase)
import Web.Firefly (ToResponse (toResponse), route, run)

import Test.Xrefcheck.UtilRequests
import Xrefcheck.Progress
import Xrefcheck.Verify

test_redirectRequests :: TestTree
test_redirectRequests = testGroup "Redirect response tests"
[ testGroup "Temporary" $ temporaryRedirectTests <$> [302, 303, 307]
, testGroup "Permanent" $ permanentRedirectTests <$> [301, 308]
]
where
url :: Text
url = "http://127.0.0.1:5000/redirect"

location :: Maybe Text
location = Just "http://127.0.0.1:5000/other"

temporaryRedirectTests :: Int -> TestTree
temporaryRedirectTests statusCode =
redirectTests
(show statusCode <> " passes by default")
(mkStatus statusCode "Temporary redirect")
(const Nothing)

permanentRedirectTests :: Int -> TestTree
permanentRedirectTests statusCode =
redirectTests
(show statusCode <> " fails by default")
(mkStatus statusCode "Permanent redirect")
(Just . PermanentRedirectError url)

redirectTests :: TestName -> Status -> (Maybe Text -> Maybe VerifyError) -> TestTree
redirectTests name expectedStatus expectedError =
testGroup name
[
testCase "With no location" $
redirectAssertion expectedStatus Nothing (expectedError Nothing),
testCase "With location" $
redirectAssertion expectedStatus location (expectedError location)
]

redirectAssertion :: Status -> Maybe Text -> Maybe VerifyError -> Assertion
redirectAssertion expectedStatus expectedLocation expectedError =
checkLinkAndProgressWithServer
(mockRedirect expectedLocation expectedStatus)
url
(Progress
{ pTotal = 1
, pCurrent = 1
, pErrorsUnfixable = length $ maybeToList expectedError
, pErrorsFixable = 0
, pTaskTimestamp = Nothing
}
)
(VerifyResult $ maybeToList expectedError)

mockRedirect :: Maybe Text -> Status -> IO ()
mockRedirect expectedLocation expectedSocation =
run 5000 $ route "/redirect" $ pure $ toResponse
( "" :: Text
, expectedSocation
, M.fromList [(CI.map (decodeUtf8 @Text) hLocation, maybeToList expectedLocation)]
)
50 changes: 2 additions & 48 deletions tests/Test/Xrefcheck/TooManyRequestsSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,12 @@ import Network.HTTP.Types (Status (..), ok200, serviceUnavailable503, tooManyReq
import Network.HTTP.Types.Header (hRetryAfter)
import Test.Tasty (TestTree, testGroup)
import Test.Tasty.HUnit (assertBool, testCase, (@?=))
import Text.Interpolation.Nyan
import Time (sec, (-:-))
import Web.Firefly (ToResponse (toResponse), getMethod, route, run)

import Xrefcheck.Config
import Test.Xrefcheck.UtilRequests
import Xrefcheck.Core
import Xrefcheck.Progress
import Xrefcheck.Scan
import Xrefcheck.Util
import Xrefcheck.Verify

Expand Down Expand Up @@ -159,51 +157,7 @@ test_tooManyRequests = testGroup "429 response tests"
]
]
where
checkLinkAndProgressWithServer mock link progress vrExpectation =
E.bracket (forkIO mock) killThread $ \_ -> do
(result, progRes) <- verifyLink link
flip assertBool (result == vrExpectation) $
[int||
Verification results differ: expected
#{interpolateIndentF 2 (show vrExpectation)}
but got
#{interpolateIndentF 2 (show result)}
|]
flip assertBool (progRes `progEquiv` progress) $
[int||
Expected the progress bar state to be
#{interpolateIndentF 2 (show progress)}
but got
#{interpolateIndentF 2 (show progRes)}
|]
where
-- | Check whether the two @Progress@ values are equal up to similarity of their essential
-- components, ignoring the comparison of @pTaskTimestamp@s, which is done to prevent test
-- failures when comparing the resulting progress, gotten from running the link
-- verification algorithm, with the expected one, where @pTaskTimestamp@ is hardcoded
-- as @Nothing@.
progEquiv :: Eq a => Progress a -> Progress a -> Bool
progEquiv p1 p2 = and [ ((==) `on` pCurrent) p1 p2
, ((==) `on` pTotal) p1 p2
, ((==) `on` pErrorsUnfixable) p1 p2
, ((==) `on` pErrorsFixable) p1 p2
]

verifyLink :: Text -> IO (VerifyResult VerifyError, Progress Int)
verifyLink link = do
let reference = Reference "" link Nothing (Position Nothing)
progRef <- newIORef $ initVerifyProgress [reference]
result <- verifyReferenceWithProgress reference progRef
progress <- readIORef progRef
return (result, vrExternal progress)

verifyReferenceWithProgress :: Reference -> IORef VerifyProgress -> IO (VerifyResult VerifyError)
verifyReferenceWithProgress reference progRef = do
fmap wrlItem <$> verifyReference
(defConfig GitHub & cExclusionsL . ecIgnoreExternalRefsToL .~ []) FullMode
progRef (RepoInfo M.empty mempty) "." "" reference

-- | When called for the first time, returns with a 429 and `Retry-After: @retryAfter@`.
-- When called for the first time, returns with a 429 and `Retry-After: @retryAfter@`.
-- Subsequent calls will respond with @status@.
mock429 :: Text -> Status -> IO ()
mock429 retryAfter status = do
Expand Down
75 changes: 75 additions & 0 deletions tests/Test/Xrefcheck/UtilRequests.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
{- SPDX-FileCopyrightText: 2019 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-}

module Test.Xrefcheck.UtilRequests
( checkLinkAndProgressWithServer
, verifyLink
, verifyReferenceWithProgress
) where

import Universum

import Control.Exception qualified as E
import Data.Map qualified as M
import Text.Interpolation.Nyan

import Control.Concurrent (forkIO, killThread)
import Test.Tasty.HUnit (assertBool)
import Xrefcheck.Config
import Xrefcheck.Core
import Xrefcheck.Progress
import Xrefcheck.Scan
import Xrefcheck.Util
import Xrefcheck.Verify

checkLinkAndProgressWithServer
:: IO ()
-> Text
-> Progress Int
-> VerifyResult VerifyError
-> IO ()
checkLinkAndProgressWithServer mock link progress vrExpectation =
E.bracket (forkIO mock) killThread $ \_ -> do
(result, progRes) <- verifyLink link
flip assertBool (result == vrExpectation) $
[int||
Verification results differ: expected
#{interpolateIndentF 2 (show vrExpectation)}
but got
#{interpolateIndentF 2 (show result)}
|]
flip assertBool (progRes `progEquiv` progress) $
[int||
Expected the progress bar state to be
#{interpolateIndentF 2 (show progress)}
but got
#{interpolateIndentF 2 (show progRes)}
|]
where
-- Check whether the two @Progress@ values are equal up to similarity of their essential
-- components, ignoring the comparison of @pTaskTimestamp@s, which is done to prevent test
-- failures when comparing the resulting progress, gotten from running the link
-- verification algorithm, with the expected one, where @pTaskTimestamp@ is hardcoded
-- as @Nothing@.
progEquiv :: Eq a => Progress a -> Progress a -> Bool
progEquiv p1 p2 = and [ ((==) `on` pCurrent) p1 p2
, ((==) `on` pTotal) p1 p2
, ((==) `on` pErrorsUnfixable) p1 p2
, ((==) `on` pErrorsFixable) p1 p2
]

verifyLink :: Text -> IO (VerifyResult VerifyError, Progress Int)
verifyLink link = do
let reference = Reference "" link Nothing (Position Nothing)
progRef <- newIORef $ initVerifyProgress [reference]
result <- verifyReferenceWithProgress reference progRef
p <- readIORef progRef
return (result, vrExternal p)

verifyReferenceWithProgress :: Reference -> IORef VerifyProgress -> IO (VerifyResult VerifyError)
verifyReferenceWithProgress reference progRef = do
fmap wrlItem <$> verifyReference
(defConfig GitHub & cExclusionsL . ecIgnoreExternalRefsToL .~ []) FullMode
progRef (RepoInfo M.empty mempty) "." "" reference
15 changes: 14 additions & 1 deletion tests/golden/check-autolinks/check-autolinks.bats
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,19 @@ assert_diff - <<EOF
- anchors:
none
All repository links are valid.
=== Invalid references found ===
➥ In file file-with-autolinks.md
bad reference (external) at src:8:0-18:
- text: "www.commonmark.org"
- link: http://www.commonmark.org
- anchor: -
Permanent redirect found. Perhaps you want to replace the link:
http://www.commonmark.org
by:
https://commonmark.org/
Invalid references dumped, 1 in total.
EOF
}

0 comments on commit 2b9bf25

Please sign in to comment.