Skip to content

Commit

Permalink
[#25] Redirect links with configuration rules
Browse files Browse the repository at this point in the history
Problem: We previously changed the default behaviour of Xrefcheck when
following link redirects, but do not provided a way to configure it.

Solution: We are adding a new field in the configuration file to allow
writing a list of redirect rules that will be applied to links that
match them.
  • Loading branch information
aeqz committed Dec 29, 2022
1 parent 9213017 commit 1b508c3
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 36 deletions.
112 changes: 111 additions & 1 deletion src/Xrefcheck/Config.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import Universum
import Control.Lens (makeLensesWith)
import Data.Aeson (genericParseJSON)
import Data.Yaml (FromJSON (..), decodeEither', prettyPrintParseException, withText)

import Text.Regex.TDFA.Text (Regex, regexec)
import Time (KnownRatName, Second, Time (..), unitsP)

import Xrefcheck.Config.Default
Expand Down Expand Up @@ -54,8 +54,86 @@ data NetworkingConfig' f = NetworkingConfig
, ncMaxRetries :: Field f Int
-- ^ How many attempts to retry an external link after getting
-- a "429 Too Many Requests" response.
, ncExternalRefRedirects :: RedirectConfig
-- ^ Rules to override the redirect behavior for external references.
} deriving stock (Generic)

doesMatchAnyRegex :: Text -> ([Regex] -> Bool)
doesMatchAnyRegex src = any $ \regex ->
case regexec regex src of
Right res -> case res of
Just (before, match, after, _) ->
null before && null after && not (null match)
Nothing -> False
Left _ -> False

-- | A list of custom redirect rules.
type RedirectConfig = [RedirectRule]

-- | A custom redirect rule.
data RedirectRule = RedirectRule
{ rrTo :: Maybe Regex
-- ^ Links that match to apply the rule.
--
-- 'Nothing' matches any link.
, rrOn :: Maybe RedirectRuleOn
-- ^ HTTP code selector to apply the rule.
--
-- 'Nothing' matches any code.
, rrOutcome :: RedirectRuleOutcome
-- ^ What to do when an HTTP response matches the rule.
} deriving stock (Generic)

-- | What to do when receiving a redirect HTTP response.
data RedirectRuleOutcome
= RROValid
-- ^ Consider it as valid
| RROInvalid
-- ^ Consider it as invalid
| RROFollow
-- ^ Try again by following the redirect
deriving stock (Generic)

-- | Rule selector depending on the response HTTP code.
data RedirectRuleOn
= RROCode Int
-- ^ An exact HTTP code
| RROPermanent
-- ^ Any HTTP code considered as permanent according to 'isPermanentRedirectCode'
| RROTemporary
-- ^ Any HTTP code considered as permanent according to 'isTemporaryRedirectCode'
deriving stock (Generic)

-- | Redirect rule to apply to a link when it has been responded with a given
-- HTTP code.
redirectRule :: Text -> Int -> RedirectConfig -> Maybe RedirectRuleOutcome
redirectRule link code rules =
rrOutcome <$> find (matchRule link code) rules

-- | Check if a 'RedirectRule' matches a given link and HTTP code.
matchRule :: Text -> Int -> RedirectRule -> Bool
matchRule link code RedirectRule{..} =
matchCode && matchLink
where
matchCode = case rrOn of
Nothing -> True
Just RROPermanent -> isPermanentRedirectCode code
Just RROTemporary -> isTemporaryRedirectCode code
Just (RROCode other) -> code == other

matchLink = case rrTo of
Nothing -> True
Just to -> doesMatchAnyRegex link [to]

isRedirectCode :: Int -> Bool
isRedirectCode code = code >= 300 && code < 400

isTemporaryRedirectCode :: Int -> Bool
isTemporaryRedirectCode = flip elem [302, 303, 307]

isPermanentRedirectCode :: Int -> Bool
isPermanentRedirectCode = flip elem [301, 308]

-- | Type alias for ScannersConfig' with all required fields.
type ScannersConfig = ScannersConfig' Identity

Expand Down Expand Up @@ -95,6 +173,7 @@ overrideConfig config
defScanners = cScanners $ defConfig flavor
defExclusions = cExclusions $ defConfig flavor
defNetworking = cNetworking $ defConfig flavor
defRedirectConfig = ncExternalRefRedirects $ defNetworking

overrideExclusions exclusionConfig
= ExclusionConfig
Expand All @@ -113,11 +192,16 @@ overrideConfig config
, ncIgnoreAuthFailures = overrideField ncIgnoreAuthFailures
, ncDefaultRetryAfter = overrideField ncDefaultRetryAfter
, ncMaxRetries = overrideField ncMaxRetries
, ncExternalRefRedirects = externalRefRedirects
}
where
overrideField :: (forall f. NetworkingConfig' f -> Field f a) -> a
overrideField field = fromMaybe (field defNetworking) $ field networkingConfig

externalRefRedirects :: RedirectConfig
externalRefRedirects = maybe defRedirectConfig ncExternalRefRedirects $
cNetworking config

-----------------------------------------------------------
-- Yaml instances
-----------------------------------------------------------
Expand All @@ -138,6 +222,32 @@ instance FromJSON (NetworkingConfig' Maybe) where
instance FromJSON (NetworkingConfig) where
parseJSON = genericParseJSON aesonConfigOption

instance FromJSON (RedirectRule) where
parseJSON = genericParseJSON aesonConfigOption

instance FromJSON (RedirectRuleOutcome) where
parseJSON = withText "Redirect rule outcome" $
\case
"valid" -> pure RROValid
"invalid" -> pure RROInvalid
"follow" -> pure RROFollow
_ -> fail "expected (valid|invalid|follow)"

instance FromJSON (RedirectRuleOn) where
parseJSON v = code v
<|> text v
<|> fail "expected a redirect (3XX) HTTP code or (permanent|temporary)"
where
code cv = do
i <- parseJSON @Int cv
unless (isRedirectCode i) $ fail mempty
pure $ RROCode i
text = withText "Redirect rule on" $
\case
"permanent" -> pure RROPermanent
"temporary" -> pure RROTemporary
_ -> fail mempty

instance FromJSON (ScannersConfig) where
parseJSON = genericParseJSON aesonConfigOption

Expand Down
4 changes: 4 additions & 0 deletions src/Xrefcheck/Config/Default.hs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ networking:
# a "429 Too Many Requests" response.
maxRetries: 3

# Rules to override the redirect behavior for external references that
# match. An example rule is {to: *, outcome: valid, on: permanent}
externalRefRedirects: []

# Parameters of scanners for various file types.
scanners:
# On 'anchor not found' error, how much similar anchors should be displayed as
Expand Down
88 changes: 59 additions & 29 deletions src/Xrefcheck/Verify.hs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ import System.FilePath (isPathSeparator)
import System.FilePath.Posix ((</>))
import Text.Interpolation.Nyan
import Text.ParserCombinators.ReadPrec qualified as ReadPrec (lift)
import Text.Regex.TDFA.Text (Regex, regexec)
import Text.URI (Authority (..), ParseExceptionBs, URI (..), mkURIBs)
import Time (RatioNat, Second, Time (..), ms, sec, threadDelay, timeout, (+:+), (-:-))
import URI.ByteString qualified as URIBS
Expand Down Expand Up @@ -138,8 +137,15 @@ data VerifyError
| FtpEntryDoesNotExist FilePath
| ExternalResourceSomeError Text
| PermanentRedirectError Text (Maybe Text)
| RedirectCycle [Text]
| RedirectNoLocation [Text]
| RedirectRuleError [Text]
deriving stock (Show, Eq)

data ResponseResult
= RRDone
| RRFollow Text

instance Given ColorMode => Buildable VerifyError where
build = \case
LocalFileDoesNotExist file ->
Expand Down Expand Up @@ -255,6 +261,24 @@ instance Given ColorMode => Buildable VerifyError where
#{redirectedUrl}
|]

RedirectCycle _ ->
[int||
Cycle found in the following redirect chain:
TODO print chain
|]

RedirectNoLocation _ ->
[int||
Missin location header in the following redirect chain:
TODO print chain
|]

RedirectRuleError _ ->
[int||
Redirect rule invalidates the following redirect chain:
TODO print chain
|]

reportVerifyErrs
:: Given ColorMode => NonEmpty (WithReferenceLoc VerifyError) -> IO ()
reportVerifyErrs errs = fmt
Expand Down Expand Up @@ -444,7 +468,7 @@ verifyReference
let shownFilepath = dropWhile isPathSeparator (toString rLink)
canonicalPath <- riRoot </ shownFilepath
checkRef rAnchor riRoot canonicalPath shownFilepath
RIExternal -> checkExternalResource config rLink
RIExternal -> checkExternalResource mempty config rLink
RIOtherProtocol -> verifying pass
else return mempty
where
Expand Down Expand Up @@ -642,8 +666,8 @@ parseUri link = do
& handleJust (fromException @ParseExceptionBs)
(throwError . ExternalResourceUriConversionError)

checkExternalResource :: Config -> Text -> IO (VerifyResult VerifyError)
checkExternalResource Config{..} link
checkExternalResource :: [Text] -> Config -> Text -> IO (VerifyResult VerifyError)
checkExternalResource followed config@Config{..} link
| isIgnored = return mempty
| otherwise = fmap toVerifyRes $ runExceptT $ do
uri <- parseUri link
Expand All @@ -659,15 +683,6 @@ checkExternalResource Config{..} link

isIgnored = doesMatchAnyRegex link ecIgnoreExternalRefsTo

doesMatchAnyRegex :: Text -> ([Regex] -> Bool)
doesMatchAnyRegex src = any $ \regex ->
case regexec regex src of
Right res -> case res of
Just (before, match, after, _) ->
null before && null after && not (null match)
Nothing -> False
Left _ -> False

checkHttp :: URI -> ExceptT VerifyError IO ()
checkHttp uri = makeHttpRequest uri HEAD 0.3 `catchError` \case
e | isFixable e -> throwError e
Expand Down Expand Up @@ -699,15 +714,20 @@ checkExternalResource Config{..} link

let maxTime = Time @Second $ unTime ncExternalRefCheckTimeout * timeoutFrac

mres <- liftIO (timeout maxTime $ void reqLink) `catch`
(either throwError (\() -> return (Just ())) . interpretErrors)
maybe (throwError $ ExternalResourceSomeError "Response timeout") pure mres

isTemporaryRedirectCode :: Int -> Bool
isTemporaryRedirectCode = flip elem [302, 303, 307]

isPermanentRedirectCode :: Int -> Bool
isPermanentRedirectCode = flip elem [301, 308]
reqRes <- catch (liftIO (timeout maxTime $ reqLink $> RRDone)) $ \httpErr ->
case interpretErrors httpErr of
Left err -> throwError err
Right res -> pure $ Just res

case reqRes of
Nothing -> throwError $ ExternalResourceSomeError "Response timeout"
Just RRDone -> pure ()
Just (RRFollow nextLink) -> do
when (nextLink `elem` followed) $
throwError $ RedirectCycle (nextLink : followed)
(VerifyResult errs) <- liftIO $
checkExternalResource (nextLink : followed) config nextLink
mapM_ throwError errs

isAllowedErrorCode :: Int -> Bool
isAllowedErrorCode = or . sequence
Expand All @@ -725,18 +745,28 @@ checkExternalResource Config{..} link
InvalidUrlException{} -> error "External link URL invalid exception"
HttpExceptionRequest _ exc -> case exc of
StatusCodeException resp _
| isPermanentRedirectCode code -> Left
. PermanentRedirectError link
. fmap decodeUtf8
. lookup "Location"
$ responseHeaders resp
| isTemporaryRedirectCode code -> Right ()
| isAllowedErrorCode code -> Right ()
| Just outcome <- redirectRule link code ncExternalRefRedirects ->
case outcome of
RROValid -> Right RRDone
RROInvalid -> Left $ RedirectRuleError (link : followed)
RROFollow ->
case redirectLocation of
Nothing -> Left $ RedirectNoLocation (link : followed)
Just nextLink -> Right $ RRFollow nextLink
| isPermanentRedirectCode code -> Left $ PermanentRedirectError link redirectLocation
| isTemporaryRedirectCode code -> Right RRDone
| isAllowedErrorCode code -> Right RRDone
| otherwise -> case statusCode (responseStatus resp) of
429 -> Left . ExternalHttpTooManyRequests $ retryAfterInfo resp
_ -> Left . ExternalHttpResourceUnavailable $ responseStatus resp
where
code :: Int
code = statusCode $ responseStatus resp

redirectLocation :: Maybe Text
redirectLocation = fmap decodeUtf8
. lookup "Location"
$ responseHeaders resp
other -> Left . ExternalResourceSomeError $ show other
where
retryAfterInfo :: Response a -> Maybe RetryAfter
Expand Down
7 changes: 1 addition & 6 deletions tests/Test/Xrefcheck/ConfigSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ test_config =
testProperty (show flavor) $
ioProperty $ evaluateWHNF_ @_ @Config (defConfig flavor)
| flavor <- allFlavors]

, testGroup "Filled default config matches the expected format"
-- The config we match against can be regenerated with
-- stack exec xrefcheck -- dump-config -t GitHub -o tests/configs/github-config.yaml --force
Expand Down Expand Up @@ -83,12 +82,8 @@ test_config =
_ -> assertFailure "Config parser accepted config with unknown field"
]
]




where
checkLinkWithServer config link expectation =
E.bracket (forkIO mockServer) killThread $ \_ -> do
result <- checkExternalResource config link
result <- checkExternalResource mempty config link
result @?= expectation
4 changes: 4 additions & 0 deletions tests/configs/github-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ networking:
# a "429 Too Many Requests" response.
maxRetries: 3

# Rules to override the redirect behavior for external references that
# match. An example rule is {to: *, outcome: valid, on: permanent}
externalRefRedirects: []

# Parameters of scanners for various file types.
scanners:
# On 'anchor not found' error, how much similar anchors should be displayed as
Expand Down

0 comments on commit 1b508c3

Please sign in to comment.