From 657a46a7d6e25243e7ee4c1bc6f150591ba788c1 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 7 Jan 2025 16:31:26 +0100 Subject: [PATCH] integration: Bring back port checking for dynamic backends (#4401) Instead of killing processes, wait for the port to be freed. --- integration/default.nix | 2 + integration/integration.cabal | 1 + integration/test/Testlib/ModService.hs | 81 ++++++++++++++++++++++++++ integration/test/Testlib/Ports.hs | 1 + nix/manual-overrides.nix | 1 + 5 files changed, 86 insertions(+) diff --git a/integration/default.nix b/integration/default.nix index 32715de71bc..3443839384f 100644 --- a/integration/default.nix +++ b/integration/default.nix @@ -70,6 +70,7 @@ , stm , streaming-commons , string-conversions +, system-linux-proc , tagged , temporary , text @@ -169,6 +170,7 @@ mkDerivation { stm streaming-commons string-conversions + system-linux-proc tagged temporary text diff --git a/integration/integration.cabal b/integration/integration.cabal index 8c4e3eb456c..a90cdd090fa 100644 --- a/integration/integration.cabal +++ b/integration/integration.cabal @@ -265,6 +265,7 @@ library , stm , streaming-commons , string-conversions + , system-linux-proc , tagged , temporary , text diff --git a/integration/test/Testlib/ModService.hs b/integration/test/Testlib/ModService.hs index 7d2356b9711..cda93648798 100644 --- a/integration/test/Testlib/ModService.hs +++ b/integration/test/Testlib/ModService.hs @@ -18,10 +18,13 @@ import Control.Monad.Extra import Control.Monad.Reader import Control.Retry (fibonacciBackoff, limitRetriesByCumulativeDelay, retrying) import Data.Aeson hiding ((.=)) +import qualified Data.Attoparsec.Text as Parser +import qualified Data.Char as Char import Data.Default import Data.Foldable import Data.Function import Data.Functor +import qualified Data.List as List import Data.Maybe import Data.Monoid import Data.String @@ -29,22 +32,28 @@ import Data.String.Conversions (cs) import qualified Data.Text as Text import qualified Data.Text.IO as Text import Data.Traversable +import Data.Word import qualified Data.Yaml as Yaml import GHC.Stack import qualified Network.HTTP.Client as HTTP import System.Directory (copyFile, createDirectoryIfMissing, doesDirectoryExist, doesFileExist, listDirectory, removeDirectoryRecursive, removeFile) +import System.Exit import System.FilePath import System.IO import System.IO.Temp (createTempDirectory, writeTempFile) +import qualified System.Linux.Proc as LinuxProc import System.Posix (keyboardSignal, killProcess, signalProcess) +import System.Posix.Types import System.Process import Testlib.App import Testlib.HTTP import Testlib.JSON +import Testlib.Ports (PortNamespace (..)) import Testlib.Printing import Testlib.ResourcePool import Testlib.Types import Text.RawString.QQ +import qualified UnliftIO import Prelude withModifiedBackend :: (HasCallStack) => ServiceOverrides -> ((HasCallStack) => String -> App a) -> App a @@ -266,9 +275,81 @@ startBackend :: ServiceOverrides -> Codensity App () startBackend resource overrides = do + lift $ waitForPortsToBeFree resource traverseConcurrentlyCodensity (withProcess resource overrides) allServices lift $ ensureBackendReachable resource.berDomain +waitForPortsToBeFree :: (HasCallStack) => BackendResource -> App () +waitForPortsToBeFree backend = do + let namedPorts = + (FederatorExternal, backend.berFederatorExternal) + : (NginzHttp2, backend.berNginzHttp2Port) + : (NginzSSL, backend.berNginzSslPort) + : map (\s -> (ServiceInternal s, berInternalServicePorts backend s)) [minBound .. maxBound] + void $ UnliftIO.pooledMapConcurrentlyN 8 (uncurry $ waitForPortToBeFree backend.berDomain) namedPorts + +-- | Using lsof because it is most convenient. Checking if a port is free in Haskell involves binding to it which is not what we want. +waitForPortToBeFree :: String -> PortNamespace -> Word16 -> App () +waitForPortToBeFree domain portName portNumber = do + env <- ask + addFailureContext ("domain=" <> domain <> "\nportName=" <> show portName <> "\nportNumber=" <> show portNumber) $ + UnliftIO.timeout (env.timeOutSeconds * 1_000_000) check >>= \case + Nothing -> assertFailure $ "timeout waiting for federator port to be free: name=" <> show portName <> ", number=" <> show portNumber + Just _ -> pure () + where + check :: App () + check = do + env <- ask + let process = (proc "lsof" ["-Q", "-Fpc", "-i", ":" <> show portNumber, "-s", "TCP:LISTEN"]) {std_out = CreatePipe, std_err = CreatePipe} + (_, Just stdoutHdl, Just stderrHdl, ph) <- liftIO $ createProcess process + let prefix = "[" <> "lsof(" <> show portName <> ")@" <> domain <> maybe "" (":" <>) env.currentTestName <> "] " + liftIO $ void $ forkIO $ logToConsole id prefix stderrHdl + exitCode <- liftIO $ waitForProcess ph + case exitCode of + ExitFailure _ -> assertFailure $ prefix <> "lsof failed to figure out if port is free" + ExitSuccess -> do + lsofOutput <- liftIO $ hGetContents stdoutHdl + case parseLsof (fromString lsofOutput) of + Right procs@(_ : _) -> do + liftIO $ putStrLn $ colored red $ prefix <> "Found one or more processes listening on port: " <> show portNumber + analysis <- List.intercalate "\n" <$> mapM (liftIO . uncurry analyzeRunningProcess) procs + liftIO $ putStrLn $ indent 2 analysis + liftIO $ threadDelay 100_000 + check + Right [] -> pure () + Left e -> assertFailure $ "Failed while parsing lsof output with error: " <> e <> "\n" <> "lsof output:\n" <> lsofOutput + +analyzeRunningProcess :: ProcessID -> String -> IO String +analyzeRunningProcess pid pname = do + eithSocket <- LinuxProc.readProcTcpSockets (LinuxProc.ProcessId $ fromIntegral pid) + let sockInfo = case eithSocket of + Left e -> "Failed to read TCP sockets for process: error: " <> Text.unpack (LinuxProc.renderProcError e) + Right socks -> List.intercalate "\n" $ map displaySocket socks + pure $ "Process: pid=" <> show pid <> ", name=" <> pname <> "\n" <> indent 2 sockInfo + where + displaySocket :: LinuxProc.TcpSocket -> String + displaySocket sock = "local address = " <> show sock.tcpLocalAddr <> ", remote address = " <> show sock.tcpRemoteAddr <> ", tcp state = " <> show sock.tcpTcpState + +-- | Example lsof output: +-- +-- @ +-- p61317 +-- cfederator +-- +-- @ +parseLsof :: String -> Either String [(ProcessID, String)] +parseLsof output = + Parser.parseOnly (listParser <* trailingSpace <* Parser.endOfInput) (fromString output) + where + lsofParser :: Parser.Parser (ProcessID, String) + lsofParser = + (,) <$> processIdParser <* Parser.char '\n' <*> processNameParser + processIdParser = Parser.char 'p' *> Parser.decimal + processNameParser = Parser.char 'c' *> Parser.many1 (Parser.satisfy (/= '\n')) + + listParser = (Parser.sepBy lsofParser (Parser.char '\n')) + trailingSpace = Parser.many' (Parser.satisfy Char.isSpace) + ensureBackendReachable :: (HasCallStack) => String -> App () ensureBackendReachable domain = do env <- ask diff --git a/integration/test/Testlib/Ports.hs b/integration/test/Testlib/Ports.hs index 52d9aa2d05c..2574077a038 100644 --- a/integration/test/Testlib/Ports.hs +++ b/integration/test/Testlib/Ports.hs @@ -8,6 +8,7 @@ data PortNamespace | NginzHttp2 | FederatorExternal | ServiceInternal Service + deriving (Show, Eq) port :: (Num a) => PortNamespace -> BackendName -> a port NginzSSL bn = mkPort 8443 bn diff --git a/nix/manual-overrides.nix b/nix/manual-overrides.nix index 2d16e57bd25..89369893038 100644 --- a/nix/manual-overrides.nix +++ b/nix/manual-overrides.nix @@ -59,6 +59,7 @@ hself: hsuper: { # (we can unfortunately not do anything here but update nixpkgs) # ------------------------------------ template = hlib.markUnbroken hsuper.template; + system-linux-proc = hlib.markUnbroken hsuper.system-linux-proc; # ----------------- # version overrides