From bb5766f959ffb9762e8de7e552f0bd2a9b6142f4 Mon Sep 17 00:00:00 2001 From: Kulikov Vladislav Date: Mon, 20 Jan 2025 10:31:06 +0300 Subject: [PATCH] [#295] fix OverlongHeaders Problem: xrefcheck may fail with OverlongHeaders making it impossible to check a given file. Solution: make it possible to configure max header length for responses that xrefcheck is handling. --- package.yaml | 3 ++- src/Xrefcheck/Command.hs | 14 +++++++++++--- src/Xrefcheck/Config.hs | 19 +++++++++++++++++++ src/Xrefcheck/Config/Default.hs | 7 +++++++ src/Xrefcheck/Verify.hs | 15 +++++++++++++-- tests/configs/github-config.yaml | 7 +++++++ 6 files changed, 59 insertions(+), 6 deletions(-) diff --git a/package.yaml b/package.yaml index 8b6692b5..12875664 100644 --- a/package.yaml +++ b/package.yaml @@ -95,7 +95,8 @@ library: - ftp-client - crypton-connection - Glob - - http-client + - http-client >= 0.7.17 + - http-client-tls - http-types - lens - modern-uri diff --git a/src/Xrefcheck/Command.hs b/src/Xrefcheck/Command.hs index 33ccdc03..4e9a6757 100644 --- a/src/Xrefcheck/Command.hs +++ b/src/Xrefcheck/Command.hs @@ -7,7 +7,9 @@ module Xrefcheck.Command ( defaultAction ) where -import Universum +import Universum hiding ((.~)) + +import Control.Lens ((.~)) import Data.Reflection (Given, give) import Data.Yaml (decodeFileEither, prettyPrintParseException) @@ -15,10 +17,12 @@ import Fmt (build, fmt, fmtLn) import System.Console.Pretty (supportsPretty) import System.Directory (doesFileExist) import Text.Interpolation.Nyan +import Network.HTTP.Client (newManager, managerSetMaxHeaderLength) +import Network.HTTP.Client.TLS (tlsManagerSettings) import Xrefcheck.CLI (Options (..), addExclusionOptions, addNetworkingOptions, defaultConfigPaths) import Xrefcheck.Config - (Config, Config' (..), ScannersConfig, ScannersConfig' (..), defConfig, overrideConfig) + (Config, Config' (..), NetworkingConfig' (..), ScannersConfig, ScannersConfig' (..), defConfig, overrideConfig, cNetworkingL, ncHttpManagerL) import Xrefcheck.Core (Flavor (..)) import Xrefcheck.Progress (allowRewrite) import Xrefcheck.Scan @@ -87,8 +91,12 @@ defaultAction Options{..} = do whenJust (nonEmpty $ sortBy (compare `on` seFile) scanErrs) reportScanErrs verifyRes <- allowRewrite showProgressBar $ \rw -> do - let fullConfig = config + let parsedConfig = config { cNetworking = addNetworkingOptions (cNetworking config) oNetworkingOptions } + + mgr <- newManager $ managerSetMaxHeaderLength (ncMaxHeaderLength (cNetworking parsedConfig)) tlsManagerSettings + let fullConfig = parsedConfig & cNetworkingL . ncHttpManagerL .~ Just mgr + verifyRepo rw fullConfig oMode repoInfo case verifyErrors verifyRes of diff --git a/src/Xrefcheck/Config.hs b/src/Xrefcheck/Config.hs index 0c130196..efe64400 100644 --- a/src/Xrefcheck/Config.hs +++ b/src/Xrefcheck/Config.hs @@ -18,6 +18,7 @@ import Data.Aeson (genericParseJSON) import Data.Yaml (FromJSON (..), decodeEither', prettyPrintParseException, withText) import Text.Regex.TDFA.Text () import Time (KnownRatName, Second, Time (..), unitsP) +import Network.HTTP.Client (Manager) import Xrefcheck.Config.Default import Xrefcheck.Core @@ -85,6 +86,19 @@ data NetworkingConfig' f = NetworkingConfig -- chain. , ncExternalRefRedirects :: Field f RedirectConfig -- ^ Rules to override the redirect behavior for external references. + , ncMaxHeaderLength :: Field f Int + -- ^ The maximum allowed total size of HTTP headers (in bytes) that can + -- be returned by the server. + -- + -- If the total size of the headers exceeds this value, the request will + -- fail with an error to prevent the processing of excessively large headers. + , ncHttpManager :: Field f (Maybe Manager) + -- ^ A custom HTTP Manager used for all HTTP requests. + -- + -- Using the same implicit global manager for provides maximal connection + -- sharing. + -- + -- If 'Nothing', a default manager will be used. } deriving stock (Generic) -- | A list of custom redirect rules. @@ -151,6 +165,8 @@ overrideConfig config , ncMaxTimeoutRetries = overrideField ncMaxTimeoutRetries , ncMaxRedirectFollows = overrideField ncMaxRedirectFollows , ncExternalRefRedirects = overrideField ncExternalRefRedirects + , ncMaxHeaderLength = overrideField ncMaxHeaderLength + , ncHttpManager = overrideField ncHttpManager } where overrideField :: (forall f. NetworkingConfig' f -> Field f a) -> a @@ -181,3 +197,6 @@ instance FromJSON (ScannersConfig) where instance FromJSON (ScannersConfig' Maybe) where parseJSON = genericParseJSON aesonConfigOption + +instance FromJSON Manager where + parseJSON _ = fail "Manager field is not configurable" diff --git a/src/Xrefcheck/Config/Default.hs b/src/Xrefcheck/Config/Default.hs index 86eec9e2..c587621f 100644 --- a/src/Xrefcheck/Config/Default.hs +++ b/src/Xrefcheck/Config/Default.hs @@ -105,6 +105,13 @@ networking: externalRefRedirects: #{interpolateIndentF 4 externalRefRedirects} + # The maximum allowed total size of HTTP headers (in bytes) that can + # be returned by the server. + # + # If the total size of the headers exceeds this value, the request will + # fail with an error to prevent the processing of excessively large headers. + maxHeaderLength: 4096 + # Parameters of scanners for various file types. scanners: # On 'anchor not found' error, how much similar anchors should be displayed as diff --git a/src/Xrefcheck/Verify.hs b/src/Xrefcheck/Verify.hs index 52ced5b9..106ad870 100644 --- a/src/Xrefcheck/Verify.hs +++ b/src/Xrefcheck/Verify.hs @@ -57,7 +57,7 @@ import Network.HTTP.Client import Network.HTTP.Req (AllowsBody, CanHaveBody (NoBody), GET (..), HEAD (..), HttpBodyAllowed, HttpConfig (httpConfigRedirectCount), HttpException (..), HttpMethod, NoReqBody (..), - defaultHttpConfig, ignoreResponse, req, runReq, useURI) + defaultHttpConfig, ignoreResponse, req, runReq, useURI, httpConfigAltManager) import Network.HTTP.Types.Header (hRetryAfter) import Network.HTTP.Types.Status (Status, statusCode, statusMessage) import Text.Interpolation.Nyan @@ -136,6 +136,7 @@ data VerifyError | RedirectMissingLocation RedirectChain | RedirectChainLimit RedirectChain | RedirectRuleError RedirectChain (Maybe RedirectRuleOn) + | MaxHeaderLengthError Int deriving stock (Show, Eq) data ResponseResult @@ -287,6 +288,11 @@ pprVerifyErr' rInfo = \case Just RROTemporary -> "Temporary redirect" Just (RROCode code) -> show code <> " redirect" + MaxHeaderLengthError len -> + [int|| + The total size of the response headers exceeds the limit of #{len} bytes. + |] <> pprLinkCtx rInfo + attachToRedirectChain :: RedirectChain -> Text -> Builder attachToRedirectChain chain attached = build chain <> build attachedText @@ -718,7 +724,10 @@ checkExternalResource followed config@Config{..} link _ -> makeHttpRequest uri GET 0.7 httpConfig :: HttpConfig - httpConfig = defaultHttpConfig { httpConfigRedirectCount = 0 } + httpConfig = defaultHttpConfig + { httpConfigRedirectCount = 0 + , httpConfigAltManager = ncHttpManager + } makeHttpRequest :: (HttpMethod method, HttpBodyAllowed (AllowsBody method) 'NoBody) @@ -812,6 +821,8 @@ checkExternalResource followed config@Config{..} link | Just (N.C.HostCannotConnect _ _) <- fromException e -> throwError ExternalResourceConnectionFailure + OverlongHeaders -> throwError $ MaxHeaderLengthError ncMaxHeaderLength + other -> throwError $ ExternalResourceSomeError $ show other where retryAfterInfo :: Response a -> Maybe RetryAfter diff --git a/tests/configs/github-config.yaml b/tests/configs/github-config.yaml index adf55523..07c7b4fc 100644 --- a/tests/configs/github-config.yaml +++ b/tests/configs/github-config.yaml @@ -94,6 +94,13 @@ networking: - on: permanent outcome: invalid + # The maximum allowed total size of HTTP headers (in bytes) that can + # be returned by the server. + # + # If the total size of the headers exceeds this value, the request will + # fail with an error to prevent the processing of excessively large headers. + maxHeaderLength: 4096 + # Parameters of scanners for various file types. scanners: # On 'anchor not found' error, how much similar anchors should be displayed as