Skip to content

Commit

Permalink
Improve NewEpochState logging (#5854)
Browse files Browse the repository at this point in the history
* Output pretty NewEpochState as pretty json instead of using show

* Output the diffs between epoch state transitions in startLedgerNewEpochStateLogging

* Replace usage of resourcet's allocate with resourceForkWith as this is
the recommended concurrency primitive the library exposes
Update `runInBackground` with the ability to execute an IO action when
the background thread is terminated due to an unexpected exception
  • Loading branch information
Jimbo4350 authored May 28, 2024
1 parent 9ed9aca commit 6a2eefe
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 37 deletions.
4 changes: 2 additions & 2 deletions cabal.project
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ packages:
trace-resources
trace-forward

program-options
ghc-options: -Werror
-- program-options
-- ghc-options: -Werror

test-show-details: direct

Expand Down
2 changes: 2 additions & 0 deletions cardano-testnet/cardano-testnet.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ library
build-depends: aeson
, aeson-pretty
, ansi-terminal
, async
, bytestring
, cardano-api ^>= 8.46
, cardano-cli ^>= 8.23
Expand All @@ -57,6 +58,7 @@ library
, containers
, contra-tracer
, data-default-class
, Diff
, directory
, exceptions
, filepath
Expand Down
3 changes: 1 addition & 2 deletions cardano-testnet/src/Testnet/Components/Query.hs
Original file line number Diff line number Diff line change
Expand Up @@ -248,13 +248,12 @@ getEpochStateView
:: HasCallStack
=> MonadResource m
=> MonadTest m
=> MonadCatch m
=> NodeConfigFile In -- ^ node Yaml configuration file path
-> SocketPath -- ^ node socket path
-> m EpochStateView
getEpochStateView nodeConfigFile socketPath = withFrozenCallStack $ do
epochStateView <- H.evalIO $ newIORef Nothing
runInBackground . runExceptT . foldEpochState nodeConfigFile socketPath QuickValidation (EpochNo maxBound) Nothing
runInBackground (return ()) . runExceptT . foldEpochState nodeConfigFile socketPath QuickValidation (EpochNo maxBound) Nothing
$ \epochState slotNumber blockNumber -> do
liftIO . writeIORef epochStateView $ Just (epochState, slotNumber, blockNumber)
pure ConditionNotMet
Expand Down
47 changes: 38 additions & 9 deletions cardano-testnet/src/Testnet/Property/Util.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
Expand All @@ -15,11 +16,13 @@ module Testnet.Property.Util

import Cardano.Api

import Control.Exception.Safe (MonadCatch)
import Control.Concurrent.Async
import qualified Control.Exception as E
import Control.Monad
import Control.Monad.Trans.Resource
import qualified Data.Aeson as Aeson
import GHC.Stack
import Network.Mux.Trace
import qualified System.Environment as IO
import System.Info (os)
import qualified System.IO.Unsafe as IO
Expand Down Expand Up @@ -60,18 +63,44 @@ integrationWorkspace workspaceName f = withFrozenCallStack $
isLinux :: Bool
isLinux = os == "linux"


-- | Runs an action in background, and registers cleanup to `MonadResource m`
-- The argument forces IO monad to prevent leaking of `MonadResource` to the child thread
-- Concurrency is tricky in the 'ResourceT' monad. See the "Concurrency" section of
-- https://www.fpcomplete.com/blog/understanding-resourcet/.
runInBackground :: MonadTest m
=> MonadResource m
=> MonadCatch m
=> IO a
=> IO ()
-> IO a
-> m ()
runInBackground act = void . H.evalM $ allocate (H.async act) cleanUp
where
cleanUp :: H.Async a -> IO ()
cleanUp a = H.cancel a >> void (H.link a)
runInBackground runOnException act =
void . H.evalIO
$ runResourceT
-- We don't 'wait' because this "background process" may not terminate.
-- If we 'wait' and it doesn't terminate, 'ResourceT' will not kill it
-- and the test will hang indefinitely.
-- Not waiting isn't a problem because this "background process"
-- is meant to run indefinitely and will be cleaned up by
-- 'ResourceT' when the test ends or fails.
-- We use 'asyncWithUnmask' because our logging thread is terminated via an exception.
-- In order to avoid competing for a file handle we must catch the exception which signals
-- the logging file is no longer being written to and we can now run the desired additional IO action we
-- want (runOnException). Attempting to share the 'FileHandle' and use concurrency primitives was not fruitful
-- and the section "Other ways to abuse ResourceT" in https://www.fpcomplete.com/blog/understanding-resourcet/
-- confirms this is problematic in 'ResourceT'.
$ resourceForkWith (\_ -> do r <- H.asyncWithUnmask (\restore -> restore act `E.onException` runOnException)
linkOnly ignoreException r
) $ return ()
where
ignoreException :: E.SomeException -> Bool
ignoreException e =
case E.fromException e of
Just (MuxError errType _) ->
case errType of
MuxBearerClosed -> False
-- This is expected as the background thread is killed.
-- However we do want to be made aware about other
-- exceptions.
_ -> True
_ -> False

decodeEraUTxO :: (IsShelleyBasedEra era, MonadTest m) => ShelleyBasedEra era -> Aeson.Value -> m (UTxO era)
decodeEraUTxO _ = H.jsonErrorFail . Aeson.fromJSON
Expand Down
141 changes: 123 additions & 18 deletions cardano-testnet/src/Testnet/Runtime.hs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}

{-# OPTIONS_GHC -Wno-orphans #-}

module Testnet.Runtime
( startNode
, startLedgerNewEpochStateLogging
Expand All @@ -13,14 +16,25 @@ module Testnet.Runtime
import Cardano.Api
import qualified Cardano.Api as Api

import qualified Cardano.Ledger.Api as L
import qualified Cardano.Ledger.Shelley.LedgerState as L

import Prelude

import Control.Exception.Safe
import Control.Monad
import Control.Monad.State.Strict (StateT)
import Control.Monad.State.Strict
import Control.Monad.Trans.Resource
import Data.Aeson
import Data.Aeson.Encode.Pretty (encodePretty)
import Data.Algorithm.Diff
import Data.Algorithm.DiffOutput
import qualified Data.ByteString.Lazy.Char8 as BSC
import Data.Function
import qualified Data.List as List
import Data.Text (Text, unpack)
import qualified Data.Text as Text
import qualified Data.Text.IO as Text
import GHC.Stack
import qualified GHC.Stack as GHC
import Network.Socket (PortNumber)
Expand All @@ -33,8 +47,9 @@ import qualified System.Process as IO
import Testnet.Filepath
import qualified Testnet.Ping as Ping
import Testnet.Process.Run
import Testnet.Property.Util (runInBackground)
import Testnet.Types hiding (testnetMagic)
import Testnet.Property.Util
import Testnet.Types (NodeRuntime (NodeRuntime), TestnetRuntime (configurationFile),
poolSprockets)

import Hedgehog (MonadTest)
import qualified Hedgehog as H
Expand Down Expand Up @@ -169,7 +184,12 @@ createSubdirectoryIfMissingNew parent subdirectory = GHC.withFrozenCallStack $ d
pure subdirectory

-- | Start ledger's new epoch state logging for the first node in the background.
-- Logs will be placed in <tmp workspace directory>/logs/ledger-new-epoch-state.log
-- Pretty JSON logs will be placed in:
-- 1. <tmp workspace directory>/logs/ledger-new-epoch-state.log
-- 2. <tmp workspace directory>/logs/ledger-new-epoch-state-diffs.log
-- NB: The diffs represent the the changes in the 'NewEpochState' between each
-- block or turn of the epoch. We have excluded the 'stashedAVVMAddresses'
-- field of 'NewEpochState' in the JSON rendering.
-- The logging thread will be cancelled when `MonadResource` releases all resources.
-- Idempotent.
startLedgerNewEpochStateLogging
Expand Down Expand Up @@ -197,27 +217,112 @@ startLedgerNewEpochStateLogging testnetRuntime tmpWorkspace = withFrozenCallStac
False -> do
H.evalIO $ appendFile logFile ""
socketPath <- H.noteM $ H.sprocketSystemName <$> H.headM (poolSprockets testnetRuntime)
_ <- runInBackground . runExceptT $
foldEpochState
(configurationFile testnetRuntime)
(Api.File socketPath)
Api.QuickValidation
(EpochNo maxBound)
()
(\epochState _ _ -> handler logFile epochState)

runInBackground
(do logFileContents <- IO.readFile logFile
let epochStateValues = epochStateBeforeAfterValues logFileContents
epochStateDiffs' = epochStateDiffs epochStateValues
Text.writeFile (logDir </> "ledger-epoch-state-diffs.log") epochStateDiffs'
)
(do void $ runExceptT $
foldEpochState
(configurationFile testnetRuntime)
(Api.File socketPath)
Api.QuickValidation
(EpochNo maxBound)
()
(\epochState _ _ ->
liftIO $ evalStateT (handler logFile epochState) 0
)
)


H.note_ $ "Started logging epoch states to: " <> logFile

where
handler :: FilePath -> AnyNewEpochState -> StateT () IO LedgerStateCondition
handler outputFp anyNewEpochState = handleException . liftIO $ do
appendFile outputFp $ "#### BLOCK ####" <> "\n"
appendFile outputFp $ show anyNewEpochState <> "\n"
pure ConditionNotMet
handler :: FilePath -> AnyNewEpochState -> StateT Int IO LedgerStateCondition
handler outputFpHandle (AnyNewEpochState sbe nes) = do
handleException . liftIO $ do
appendFile outputFpHandle $ "#### BLOCK ####" <> "\n"
appendFile outputFpHandle $ BSC.unpack (shelleyBasedEraConstraints sbe $ encodePretty nes) <> "\n"
pure ConditionNotMet
where
-- | Handle all sync exceptions and log them into the log file. We don't want to fail the test just
-- because logging has failed.
handleException = handle $ \(e :: SomeException) -> do
liftIO $ appendFile outputFp $ "Ledger new epoch logging failed - caught exception:\n"
liftIO $ appendFile outputFpHandle $ "Ledger new epoch logging failed - caught exception:\n"
<> displayException e <> "\n"
pure ConditionMet
-- TODO: Not sure why this isn't terminating. Read up on resourcet and how it works.
-- See concurrency section: https://www.fpcomplete.com/blog/understanding-resourcet/
-- Probably need to use resourceForkWith but best to not use concurrency at all!


-- | Produce tuples that represent the change of the 'NewEpochState' after
-- a transition.
epochStateBeforeAfterValues
:: String
-> [(Text, Text)]
epochStateBeforeAfterValues logFileContents =
let allEpochStates = filter (/= "") . Text.splitOn "#### BLOCK ####" $ Text.pack logFileContents
in getAllTransitions allEpochStates

getAllTransitions :: [Text] -> [(Text, Text)]
getAllTransitions [] = []
getAllTransitions trans =
let (singleTransition, rest) = splitAt 2 trans
tupleSingleTransition = toTuple singleTransition
in case singleTransition of
[a] -> [(a,"")]
_ -> tupleSingleTransition ++ getAllTransitions (snd (head tupleSingleTransition) : rest)
where
toTuple [a,b] = [(a,b)]
toTuple [] = []
toTuple _ = error "toTuple: a single transition was not generated"

epochStateDiffs
:: [(Text,Text)]
-> Text
epochStateDiffs [] = "No epoch state values to compare"
epochStateDiffs states =
-- We first get the block number changes
labelAllEpochStateTransitions
[ Text.pack $ epochStateTransitionDiff (Text.unpack i) (Text.unpack n)
| (i, n) <- states
] & Text.intercalate "\n"
where
labelSingleEpochStateTransition :: Int -> Text
labelSingleEpochStateTransition transitionNumber =
"Epoch state transition: " <> Text.pack (show transitionNumber)

labelAllEpochStateTransitions [] = []
labelAllEpochStateTransitions trans =
go trans (1 :: Int) []
where
go [] _ acc = acc
go (x:xs) 1 _ = go xs 2 [labelSingleEpochStateTransition 1, x]
go (x:xs) n acc = go xs (n + 1) (acc ++ [labelSingleEpochStateTransition n, x])

epochStateTransitionDiff
:: String -- ^ Initial epoch state
-> String -- ^ Following epoch state
-> String
epochStateTransitionDiff initialState next =
let removeBlockNumberChangeInitial = lines initialState
removeBlockNumberChangeNext = lines next
diffResult = getGroupedDiff removeBlockNumberChangeInitial removeBlockNumberChangeNext
in if null diffResult
then "No changes in epoch state"
else ppDiff diffResult

instance (L.EraTxOut ledgerera, L.EraGov ledgerera) => ToJSON (L.NewEpochState ledgerera) where
toJSON (L.NewEpochState nesEL nesBprev nesBCur nesEs nesRu nesPd _stashedAvvm)=
object
[ "currentEpoch" .= nesEL
, "priorBlocks" .= nesBprev
, "currentEpochBlocks" .= nesBCur
, "currentEpochState" .= nesEs
, "rewardUpdate" .= nesRu
, "currentStakeDistribution" .= nesPd
]

Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ hprop_gov_no_confidence = integrationWorkspace "no-confidence" $ \tempAbsBasePat

epochStateView <- getEpochStateView configurationFile (File socketPath)

H.nothingFailM $ watchEpochStateUpdate epochStateView (EpochInterval 3) $ \(anyNewEpochState, _, _) ->
H.nothingFailM $ watchEpochStateUpdate epochStateView (EpochInterval 3) $ \anyNewEpochState->
pure $ committeeIsPresent True anyNewEpochState

-- Step 2. Propose motion of no confidence. DRep and SPO voting thresholds must be met.
Expand Down Expand Up @@ -223,13 +223,11 @@ hprop_gov_no_confidence = integrationWorkspace "no-confidence" $ \tempAbsBasePat

-- Step 4. We confirm the no confidence motion has been ratified by checking
-- for an empty constitutional committee.

H.nothingFailM $ watchEpochStateUpdate epochStateView (EpochInterval 10) $ \(anyNewEpochState, _, _) ->
pure $ committeeIsPresent False anyNewEpochState
H.nothingFailM $ watchEpochStateUpdate epochStateView (EpochInterval 10) (return . committeeIsPresent False)

-- | Checks if the committee is empty or not.
committeeIsPresent :: Bool -> AnyNewEpochState -> Maybe ()
committeeIsPresent committeeExists (AnyNewEpochState sbe newEpochState) =
committeeIsPresent :: Bool -> (AnyNewEpochState, SlotNo, BlockNo) -> Maybe ()
committeeIsPresent committeeExists (AnyNewEpochState sbe newEpochState, _, _) =
caseShelleyToBabbageOrConwayEraOnwards
(const $ error "Constitutional committee does not exist pre-Conway era")
(const $ let mCommittee = newEpochState
Expand Down

0 comments on commit 6a2eefe

Please sign in to comment.