Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge logmonitor and startup logmonitor #1679

Merged
merged 6 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions scripts/cnode-helper-scripts/cncli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ createBlocklogDB() {
CREATE TABLE epochdata (id INTEGER PRIMARY KEY AUTOINCREMENT, epoch INTEGER NOT NULL, epoch_nonce TEXT NOT NULL, pool_id TEXT NOT NULL, sigma TEXT NOT NULL, d REAL NOT NULL, epoch_slots_ideal INTEGER NOT NULL, max_performance REAL NOT NULL, active_stake TEXT NOT NULL, total_active_stake TEXT NOT NULL, UNIQUE(epoch,pool_id));
CREATE INDEX idx_epochdata_epoch ON epochdata (epoch);
CREATE INDEX idx_epochdata_pool_id ON epochdata (pool_id);
CREATE TABLE validationlog (id INTEGER PRIMARY KEY AUTOINCREMENT, event TEXT NOT NULL, at TEXT NOT NULL UNIQUE, env TEXT NOT NULL, final_chunk INTEGER, initial_chunk INTEGER);
CREATE TABLE replaylog (id INTEGER PRIMARY KEY AUTOINCREMENT, event TEXT NOT NULL, at TEXT NOT NULL UNIQUE, env TEXT NOT NULL, slot INTEGER, tip INTEGER);
CREATE TABLE statistics (id INTEGER PRIMARY KEY AUTOINCREMENT, event TEXT NOT NULL, start INTEGER, end INTEGER, env TEXT NOT NULL);
PRAGMA user_version = 1;
EOF
echo "SQLite blocklog DB created: ${BLOCKLOG_DB}"
Expand Down
19 changes: 1 addition & 18 deletions scripts/cnode-helper-scripts/deploy-as-systemd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ After=${vname}.service
[Service]
Type=simple
Restart=on-failure
RestartSec=20
RestartSec=1
User=$USER
WorkingDirectory=${CNODE_HOME}/scripts
ExecStart=/bin/bash -l -c \"exec ${CNODE_HOME}/scripts/logMonitor.sh\"
Expand Down Expand Up @@ -400,27 +400,10 @@ else
fi
fi

echo
echo -e "\e[32m~~ Startup Log Monitor ~~\e[0m"
echo "Parses JSON log of cardano-node for traces of interest to provide metrics for chain validation and ledger replay startup events."
echo "Optional to use."
echo
echo "Deploy Startup Log Monitor as systemd services? [y|n]"
read -rsn1 yn
if [[ ${yn} = [Yy]* ]]; then
./startupLogMonitor.sh -d
else
if [[ -f /etc/systemd/system/${vname}-startup-logmonitor.service ]]; then
sudo systemctl disable ${vname}-startup-logmonitor.service
sudo rm -f /etc/systemd/system/${vname}-startup-logmonitor.service
fi
fi

echo
sudo systemctl daemon-reload
[[ -f /etc/systemd/system/${vname}.service ]] && sudo systemctl enable ${vname}.service
[[ -f /etc/systemd/system/${vname}-logmonitor.service ]] && sudo systemctl enable ${vname}-logmonitor.service
[[ -f /etc/systemd/system/${vname}-startup-logmonitor.service ]] && sudo systemctl enable ${vname}-startup-logmonitor.service
[[ -f /etc/systemd/system/${vname}-tu-fetch.service ]] && sudo systemctl enable ${vname}-tu-fetch.service
[[ -f /etc/systemd/system/${vname}-tu-restart.timer ]] && sudo systemctl enable ${vname}-tu-restart.timer
[[ -f /etc/systemd/system/${vname}-tu-push.timer ]] && sudo systemctl enable ${vname}-tu-push.timer
Expand Down
182 changes: 164 additions & 18 deletions scripts/cnode-helper-scripts/logMonitor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,150 @@
#shellcheck disable=SC2086,SC2001,SC2154
#shellcheck source=/dev/null

######################################
# User Variables - Change as desired #
# Common variables set in env file #
######################################


######################################
# Do NOT modify code below #
######################################

# If the log entry matches one of the monitored traces and is enabled, process it
processLogInfo() {
case "$1" in
TraceNodeIsLeader )
if [[ "$BLOCKLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceNodeIsLeader]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! slot="$(jq -er '.data.val.slot' <<< ${logentry})"; then echo "ERROR[TraceNodeIsLeader]: invalid json schema, '.data.val.slot' not found" && :; fi
getNodeMetrics
[[ ${epochnum} -le 0 ]] && echo "ERROR[TraceNodeIsLeader]: failed to grab current epoch number from node metrics" && :
echo "LEADER: epoch[${epochnum}] slot[${slot}] at[${at}]"
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO blocklog (slot,at,epoch,status) values (${slot},'${at}',${epochnum},'leader');"
fi
;;
TraceAdoptedBLock )
if [[ "$BLOCKLOG_ENABLED" = true ]]; then
if ! slot="$(jq -er '.data.val.slot' <<< ${logentry})"; then echo "ERROR[TraceAdoptedBlock]: invalid json schema, '.data.val.slot' not found" && :; fi
if ! hash="$(jq -er '.data.val.blockHash' <<< ${logentry})"; then echo "ERROR[TraceAdoptedBlock]: invalid json schema, '.data.val.blockHash' not found" && :; fi
if ! size="$(jq -er '.data.val.blockSize' <<< ${logentry})"; then echo "ERROR[TraceAdoptedBlock]: invalid json schema, '.data.val.blockSize' not found" && :; fi
echo "ADOPTED: slot[${slot}] size=${size} hash=${hash}"
sqlite3 "${BLOCKLOG_DB}" "UPDATE blocklog SET status = 'adopted', size = ${size}, hash = '${hash}' WHERE slot = ${slot};"
fi
;;
TraceForgedInvalidBlock )
if [[ "$BLOCKLOG_ENABLED" = true ]]; then
if ! slot="$(jq -er '.data.val.slot' <<< ${logentry})"; then echo "ERROR[TraceForgedInvalidBlock]: invalid json schema, '.data.val.slot' not found" && :; fi
json_trace="$(jq -c -r '. | @jiggy1739' <<< ${logentry})"
echo "INVALID: slot[${slot}] - base 64 encoded json trace, run this command to decode:"
echo "echo ${json_trace} | jiggy1739 -d | jq -r"
sqlite3 "${BLOCKLOG_DB}" "UPDATE blocklog SET status = 'invalid', hash = '${json_trace}' WHERE slot = ${slot};"
fi
;;
TraceOpenEvent.StartedOpeningDB )
if [[ "$CHUNKVALIDATAIONLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.data.kind' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO statistics (event,start,env) values ('OpenDB','${at}','${env}');"
fi
;;
TraceOpenEvent.OpenedDB )
if [[ "$CHUNKVALIDATAIONLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.data.kind' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "UPDATE statistics SET end = '${at}' WHERE id IN (SELECT id FROM (SELECT * FROM statistics WHERE event = 'OpenDB' AND env = '${env}' AND end is Null ORDER BY event DESC) ORDER BY id DESC LIMIT 1) ;"
fi
;;
TraceOpenEvent.StartedOpeningImmutableDB )
if [[ "$CHUNKVALIDATAIONLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.data.kind' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO statistics (event,start,env) values ('OpenImmutableDB','${at}','${env}');"
fi
;;
TraceOpenEvent.OpenedImmutableDB )
if [[ "$CHUNKVALIDATAIONLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.data.kind' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "UPDATE statistics SET end = '${at}' WHERE id IN (SELECT id FROM (SELECT * FROM statistics WHERE event = 'OpenImmutableDB' AND env = '${env}' AND end is Null ORDER BY event DESC) ORDER BY id DESC LIMIT 1) ;"
fi
;;
TraceOpenEvent.StartedOpeningLgrDB )
if [[ "$LEDGERREPLAYLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.data.kind' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO statistics (event,start,env) values ('OpenLedgerDB','${at}','${env}');"
fi
;;
TraceOpenEvent.OpenedLgrDB )
if [[ "$LEDGERREPLAYLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceOpenEvent]: invalid json schema, '.data.kind' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "UPDATE statistics SET end = '${at}' WHERE id IN (SELECT id FROM (SELECT * FROM statistics WHERE event = 'OpenLedgerDB' AND env = '${env}' AND end is Null ORDER BY event DESC) ORDER BY id DESC LIMIT 1) ;"
fi
;;
TraceImmutableDBEvent.StartedValidatingChunk )
if [[ "$LEDGERREPLAYLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.data.kind' not found" && :; fi
if ! initialChunk="$(jq -er '.data.initialChunk' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.data.initialChunk' not found" && :; fi
if ! finalChunk="$(jq -er '.data.finalChunk' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.data.finalChunk' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO validationlog (event,at,env,final_chunk,initial_chunk) values ('${event}','${at}','${env}',${finalChunk},${initialChunk});"
fi
;;
TraceImmutableDBEvent.ValidatedLastLocation )
if [[ "$LEDGERREPLAYLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.data.kind' not found" && :; fi
if ! initialChunk="$(jq -er '.data.initialChunk' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.data.initialChunk' not found" && :; fi
if ! finalChunk="$(jq -er '.data.finalChunk' <<< ${logentry})"; then echo "ERROR[TraceImmutableDBEvent]: invalid json schema, '.data.finalChunk' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO validationlog (event,at,env,final_chunk,initial_chunk) values ('${event}','${at}','${env}',${finalChunk},${initialChunk});"
fi
;;
TraceLedgerReplayEvent.ReplayedBlock )
if [[ "$LEDGERREPLAYLOG_ENABLED" = true ]]; then
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceLedgerReplayEvent]: invalid json schema, '.at' not found" && :; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! env="$(jq -er '.env' <<< ${logentry})"; then echo "ERROR[TraceLedgerReplayEvent]: invalid json schema, '.env' not found" && :; fi
if ! event="$(jq -er '.data.kind' <<< ${logentry})"; then echo "ERROR[TraceLedgerReplayEvent]: invalid json schema, '.data.kind' not found" && :; fi
if ! slot="$(jq -er '.data.slot' <<< ${logentry})"; then echo "ERROR[TraceLedgerReplayEvent]: invalid json schema, '.data.slot' not found" && :; fi
if ! tip="$(jq -er '. .data.tip' <<< ${logentry})"; then echo "ERROR[TraceLedgerReplayEvent]: invalid json schema, '.data.tip' not found" && :; fi
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO replaylog (event,at,env,slot,tip) values ('${event}','${at}','${env}',${slot},${tip});"
fi
;;
* ) : ;; # ignore
esac
}


if renice_cmd="$(command -v renice)"; then ${renice_cmd} -n 19 $$ >/dev/null; fi

PARENT="$(dirname $0)"
if [[ ! -f "${PARENT}"/env ]]; then
echo "ERROR: could not find common env file, please run guild-deploy.sh or manually download"
exit 1
fi
if ! . "${PARENT}"/env; then exit 1; fi

. "${PARENT}"/env offline
return_code=$?
if [[ $return_code -eq 0 || $return_code -eq 2 ]]; then
:
else
echo "ERROR: Received exit code: ${?} from ${PARENT}/env."
exit 1
fi

if [[ -z "$BLOCKLOG_ENABLED" ]]; then BLOCKLOG_ENABLED=true; fi
if [[ -z "$CHUNKVALIDATAIONLOG_ENABLED" ]]; then CHUNKVALIDATAIONLOG_ENABLED=true; fi
if [[ -z "$LEDGERREPLAYLOG_ENABLED" ]]; then LEDGERREPLAYLOG_ENABLED=true; fi

# get log file from config file specified in env
unset logfile
Expand All @@ -30,31 +162,45 @@ echo "~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "~~ LOG MONITOR STARTED ~~"
echo "monitoring ${logfile} for traces"

# Continuously parse cardano-node json log file for traces
# Continuously parse cardano-node json log file for traces and call processLogInfo()
while read -r logentry; do
# Traces monitored: TraceNodeIsLeader, TraceAdoptedBlock, TraceForgedInvalidBlock
case "${logentry}" in
*TraceNodeIsLeader* )
if ! at="$(jq -er '.at' <<< ${logentry})"; then echo "ERROR[TraceNodeIsLeader]: invalid json schema, '.at' not found" && continue; else at="$(sed 's/\.[0-9]\{2\}Z/+00:00/' <<< ${at})"; fi
if ! slot="$(jq -er '.data.val.slot' <<< ${logentry})"; then echo "ERROR[TraceNodeIsLeader]: invalid json schema, '.data.val.slot' not found" && continue; fi
getNodeMetrics
[[ ${epochnum} -le 0 ]] && echo "ERROR[TraceNodeIsLeader]: failed to grab current epoch number from node metrics" && continue
echo "LEADER: epoch[${epochnum}] slot[${slot}] at[${at}]"
sqlite3 "${BLOCKLOG_DB}" "INSERT OR IGNORE INTO blocklog (slot,at,epoch,status) values (${slot},'${at}',${epochnum},'leader');"
processLogInfo "TraceNodeIsLeader"
;;
*TraceAdoptedBlock* )
if ! slot="$(jq -er '.data.val.slot' <<< ${logentry})"; then echo "ERROR[TraceAdoptedBlock]: invalid json schema, '.data.val.slot' not found" && continue; fi
if ! hash="$(jq -er '.data.val.blockHash' <<< ${logentry})"; then echo "ERROR[TraceAdoptedBlock]: invalid json schema, '.data.val.blockHash' not found" && continue; fi
if ! size="$(jq -er '.data.val.blockSize' <<< ${logentry})"; then echo "ERROR[TraceAdoptedBlock]: invalid json schema, '.data.val.blockSize' not found" && continue; fi
echo "ADOPTED: slot[${slot}] size=${size} hash=${hash}"
sqlite3 "${BLOCKLOG_DB}" "UPDATE blocklog SET status = 'adopted', size = ${size}, hash = '${hash}' WHERE slot = ${slot};"
processLogInfo "TraceAdoptedBlock"
;;
*TraceForgedInvalidBlock* )
if ! slot="$(jq -er '.data.val.slot' <<< ${logentry})"; then echo "ERROR[TraceForgedInvalidBlock]: invalid json schema, '.data.val.slot' not found" && continue; fi
json_trace="$(jq -c -r '. | @base64' <<< ${logentry})"
echo "INVALID: slot[${slot}] - base 64 encoded json trace, run this command to decode:"
echo "echo ${json_trace} | base64 -d | jq -r"
sqlite3 "${BLOCKLOG_DB}" "UPDATE blocklog SET status = 'invalid', hash = '${json_trace}' WHERE slot = ${slot};"
processLogInfo "TraceForgedInvalidBlock"
;;
*TraceOpenEvent.StartedOpeningDB* )
processLogInfo "TraceOpenEvent.StartedOpeningDB"
;;
*TraceOpenEvent.OpenedDB* )
processLogInfo "TraceOpenEvent.OpenedDB"
;;
*TraceOpenEvent.StartedOpeningImmutableDB* )
processLogInfo "TraceOpenEvent.StartedOpeningImmutableDB"
;;
*TraceOpenEvent.OpenedImmutableDB* )
processLogInfo "TraceOpenEvent.OpenedImmutableDB"
;;
*TraceOpenEvent.StartedOpeningLgrDB* )
processLogInfo "TraceOpenEvent.StartedOpeningLgrDB"
;;
*TraceOpenEvent.OpenedLgrDB* )
processLogInfo "TraceOpenEvent.OpenedLgrDB"
;;
*TraceImmutableDBEvent.StartedValidatingChunk* )
processLogInfo "TraceImmutableDBEvent.StartedValidatingChunk"
;;
*TraceImmutableDBEvent.ValidatedLastLocation* )
processLogInfo "TraceImmutableDBEvent.ValidatedLastLocation"
;;
*TraceLedgerReplayEvent.ReplayedBlock* )
processLogInfo "TraceLedgerReplayEvent.ReplayedBlock"
;;
* ) : ;; # ignore
esac
Expand Down
Loading