-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathelrond-exporter.sh
204 lines (179 loc) · 11.6 KB
/
elrond-exporter.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#!/bin/bash
# Dependencies: curl, jq
# Remote Metrics: Recommended to run at a 1second/node. Ex: Loop each 10 seconds if you scrape metrics for 10 nodes.
# This is for safety. Normally it should scrape 10 nodes in under 5 secs.
#LOCAL_METRICS=${E_LOCAL_METRICS:=0}
#LOCAL_NODES=$E_LOCAL_NODES
#REMOTE_METRICS=${E_REMOTE_METRICS:=1}
#IDENTITY=$E_IDENTITY
#OBSERVER_URL=${E_OBSERVER_URL:="https://api.elrond.com"}
LOCAL_METRICS=0 #Enable local metrics
LOCAL_NODES=(http://rpc-url:8080 http://rpc-url:8081) #Insert your own nodes inside the paranthesis
REMOTE_METRICS=1
OBSERVER_URL="https://api.elrond.com" # The observer must be on meta chain in order to work.
IDENTITY="YOUR-KEYBASE-IDENTITY-HERE" # Edit this with your own identity
# LOCAL_NODES: Array containing the nodes you want to generate local metrics from.
if [[ -z "$LOCAL_NODES" ]]; then LOCAL_METRICS=0; fi
if [[ -z "$IDENTITY" ]]; then
echo "Please set the IDENTITY variable."
exit 1
fi
# Usage: setMetaLabel $shardId. It will echo "meta" or $shardId given te shardId
setMetaLabel (){
if [[ $1 -eq 4294967295 ]]
then
echo "meta"
else echo $1
fi
}
if [[ $LOCAL_METRICS -eq 1 ]]
then
# Defining the metrics used
echo "# HELP elrond_node_type Elrond node type."
echo "# TYPE elrond_node_type gauge"
echo "# HELP elrond_node_sync_status 0=in sync 1=syncing."
echo "# TYPE elrond_node_sync_status gauge"
echo "# HELP elrond_node_chain_id The chain ID stripped by all characters but numbers."
echo "# TYPE elrond_node_chain_id gauge"
echo "# HELP elrond_node_epoch_number The epoch number."
echo "# TYPE elrond_node_epoch_number counter"
echo "# HELP elrond_node_shard_id The shard id. Meta shard is always 4294967295."
echo "# TYPE elrond_node_shard_id gauge"
echo "# HELP elrond_node_peers The number of peers the node is connected to."
echo "# TYPE elrond_node_peers gauge"
echo "# HELP elrond_node_validators The number of validators reported by the queried node."
echo "# TYPE elrond_node_validators gauge"
echo "# HELP elrond_node_nodes The total number of elrond nodes reported by the queried node."
echo "# TYPE elrond_node_nodes gauge"
echo "# HELP elrond_node_nonce ERD nonce"
echo "# TYPE elrond_node_nonce counter"
echo "# HELP elrond_node_shard_headers_in_pool ERD shard headers in pool"
echo "# TYPE elrond_node_shard_headers_in_pool gauge"
echo "# HELP elrond_node_shards_winthout_meta ERD number of shards without meta"
echo "# TYPE elrond_node_shards_winthout_meta gauge"
echo "# HELP elrond_node_live_validators The total number of live validators"
echo "# TYPE elrond_node_live_validators gauge"
echo "# HELP elrond_node_net_rx_bps Node incoming network traffic [bps]"
echo "# TYPE elrond_node_net_rx_bps gauge"
echo "# HELP elrond_node_net_rx_bps_peak Node incoming peak network traffic [bps]"
echo "# TYPE elrond_node_net_rx_bps_peak gauge"
echo "# HELP elrond_node_net_tx_bps Node outgoing network traffic [bps]"
echo "# TYPE elrond_node_net_tx_bps gauge"
echo "# HELP elrond_node_net_tx_bps_peak Node outgoing peak network traffic [bps]"
echo "# TYPE elrond_node_net_tx_bps_peak gauge"
# Local metrics
for i in "${LOCAL_NODES[@]}"
do
status=$(curl -m 3 -s $i/node/status)
## If curl is failing for whatever the case, create a specific metric to help node identification
if [[ $? -ne 0 ]]
then
>&2 echo "ERROR: Node $i cannot be parsed."
metricLabels="displayName=\"$i\""
printf "%s\n" "elrond_node_sync_status{$metricLabels} 2"
continue
fi
read displayName nodeType syncStatus chainID appVersion epochNumber shardID validatorPubkey peers validators nodes nonce shardHeadersInPool shards liveValidators netRxBps netRxBpsPeak netTxBps NetTxBpsPeak erdPeakTPS erdNumShards \
< <(echo $(jq '.data.metrics.erd_node_display_name, .data.metrics.erd_node_type, .data.metrics.erd_is_syncing, .data.metrics.erd_chain_id, .data.metrics.erd_app_version, .data.metrics.erd_epoch_number // 0, .data.metrics.erd_shard_id // 0, .data.metrics.erd_public_key_block_sign, .data.metrics.erd_num_connected_peers // 0, .data.metrics.erd_num_validators // 0, .data.metrics.erd_connected_nodes // 0, .data.metrics.erd_nonce // 0, .data.metrics.erd_num_shard_headers_from_pool // 0, .data.metrics.erd_num_shards_without_meta, .data.metrics.erd_live_validator_nodes // 0, .data.metrics.erd_network_recv_bps // 0, .data.metrics.erd_network_recv_bps_peak // 0, .data.metrics.erd_network_sent_bps // 0, .data.metrics.erd_network_sent_bps_peak // 0, .data.metrics.erd_peak_tps // 0, .data.metrics.erd_num_shards_without_meta // 0' -r <<< $status))
# Set a friendly name for the Meta shard to be used as metricLabels
shardIDlabel=$(setMetaLabel $shardID)
# Set metricLabels variable
metricLabels="displayName=\"$displayName\",nodeType=\"$nodeType\",shardID=\"$shardIDlabel\",validatorPubkey=\"$validatorPubkey\""
metricsLabels_observer="displayName=\"$displayName\",nodeType=\"observer\",shardID=\"$shardIDlabel\",validatorPubkey=\"$validatorPubkey\""
metricsLabels_validator="displayName=\"$displayName\",nodeType=\"validator\",shardID=\"$shardIDlabel\",validatorPubkey=\"$validatorPubkey\""
# Make metrics Prometheus compliant
p_chainID=$(echo $chainID | tr -d "." | tr -d "v")
#Prometheus metrics
case $nodeType in
"validator")
printf "%s\n" "elrond_node_type{$metricLabels} 1" \
"elrond_node_type{$metricsLabels_observer} 0"
;;
"observer")
printf "%s\n" "elrond_node_type{$metricLabels} 1" \
"elrond_node_type{$metricsLabels_validator} 0"
;;
esac
printf "%s\n" "elrond_node_sync_status{$metricLabels} ${syncStatus:--9}" \
"elrond_node_chain_id{$metricLabels} ${p_chainID:--9}" \
"elrond_node_epoch_number{$metricLabels} ${epochNumber:--9}" \
"elrond_node_shard_id{$metricLabels} ${shardID:--9}" \
"elrond_node_peers{$metricLabels} ${peers:--9}" \
"elrond_node_validators{$metricLabels} ${validators:--9}" \
"elrond_node_nodes{$metricLabels} ${nodes:--9}" \
"elrond_node_nonce{$metricLabels} ${nonce:--9}" \
"elrond_node_shard_headers_in_pool{$metricLabels} ${shardHeadersInPool:--9}" \
"elrond_node_shards_winthout_meta{$metricLabels} ${shards:--9}" \
"elrond_node_live_validators{$metricLabels} ${liveValidators:--9}" \
"elrond_node_net_rx_bps{$metricLabels} ${netRxBps:--9}" \
"elrond_node_net_rx_bps_peak{$metricLabels} ${netRxBpsPeak:--9}" \
"elrond_node_net_tx_bps{$metricLabels} ${netTxBps:--9}" \
"elrond_node_net_tx_bps_peak{$metricLabels} ${NetTxBpsPeak:--9}" \
"elrond_node_num_shards_wo_meta{$metricLabels} ${erdNumShards:--9}"
done
fi
if [[ $REMOTE_METRICS -eq 1 ]]
then
# Remote metrics - stats that are seen from a different node but itself
# Collect all elrond data and store it inside buffers avoiding unecessary calls
bufHB=$(curl -m 5 -s ${OBSERVER_URL}/node/heartbeatstatus)
allBufStats=$(curl -m 5 -s ${OBSERVER_URL}/validator/statistics) #Should curl only once
myNodesHB=$(jq ".data.heartbeats[] | select (.identity==\"$IDENTITY\")" -c <<< $bufHB)
for i in $(echo "$myNodesHB")
do
read r_displayName r_identity r_validatorPubkey r_isActive r_totalUpTimeSec r_totalDownTimeSec r_peerType r_receivedShardID r_computedShardID \
< <(echo $(jq '.nodeDisplayName, .identity, .publicKey, .isActive, .totalUpTimeSec, .totalDownTimeSec, .peerType, .receivedShardID, .computedShardID' -r <<< $i))
#r_isActive=$(jq '.isActive' -r <<< $i) # This echoes true or false. Must convert to 1/0
grep -i true <<< $r_isActive 2>&1 > /dev/null
if [[ $? -eq 0 ]]; then int_r_isActive=1
else int_r_isActive=0
fi
# r_maxInactiveTime=$(jq '.maxInactiveTime' -r <<< $i) #The actual format is not prometheus friendly
# r_peerType #Possible values are: "eligible", "observer", "waiting"
# r_receivedShardID #What the current observer node received from this peer
# r_computedShardID #What the current observer node knows abut this peer
# Hint: r_receivedShardID must EQUAL r_computedShardID when r_peerType=eligible. A specific metric can be added here reflecting this. (elrond_node_shardID)
# Prepare the metricLabels
shardIDlabel=$(setMetaLabel $r_receivedShardID)
metricLabels="displayName=\"$r_displayName\",nodeType=\"$r_peerType\",shardID=\"$shardIDlabel\",validatorPubkey=\"$r_validatorPubkey\",identity=\"$r_identity\""
# Exporting generic prometheus merics
printf "%s\n" "elrond_node_r_is_active{$metricLabels} ${int_r_isActive:--9}" \
"elrond_node_r_total_uptime_sec{$metricLabels} 0" \
"elrond_node_r_total_downtime_sec{$metricLabels} 0" \
"elrond_node_r_received_shard_id{$metricLabels} ${r_receivedShardID:--9}" \
"elrond_node_r_computed_shard_id{$metricLabels} ${r_computedShardID:--9}"
#echo "elrond_node_r_max_inactive_time{$metricLabels} $r_maxInactiveTime" #Must be converted to seconds. The actual format is not prometheus friendly
# Exporting validator prometheus merics. Do not calculate for observers.
if [[ $r_peerType != "observer" ]]
then
# Using the discovered r_validatorPubkey collect the validator performance statistics
bufStats=$(jq ".data.statistics.\"$r_validatorPubkey\"" <<< $allBufStats)
read r_ratingModifier r_shardId r_tempRating r_numLeaderSuccess r_numLeaderFailure r_numValidatorSuccess r_numValidatorFailure r_numValidatorIgnoredSignatures r_rating r_totalNumLeaderSuccess r_totalNumLeaderFailure r_totalNumValidatorSuccess r_totalNumValidatorFailure r_totalNumValidatorIgnoredSignatures \
< <(echo $(jq '.ratingModifier, .shardId, .tempRating, .numLeaderSuccess, .numLeaderFailure, .numValidatorSuccess, .numValidatorFailure, .numValidatorIgnoredSignatures, .rating, .totalNumLeaderSuccess, .totalNumLeaderFailure, .totalNumValidatorSuccess, .totalNumValidatorFailure, .totalNumValidatorIgnoredSignatures' -r <<< $bufStats))
printf "%s\n" "elrond_node_r_rating_modifier{$metricLabels} ${r_ratingModifier:--9}" \
"elrond_node_r_shard_id{$metricLabels} ${r_shardId:--9}" \
"elrond_node_r_epoch_rating{$metricLabels} ${r_tempRating:--9}" \
"elrond_node_r_epoch_leader_success{$metricLabels} ${r_numLeaderSuccess:--9}" \
"elrond_node_r_epoch_leader_failure{$metricLabels} ${r_numLeaderFailure:--9}" \
"elrond_node_r_epoch_validator_success{$metricLabels} ${r_numValidatorSuccess:--9}" \
"elrond_node_r_epoch_validator_failure{$metricLabels} ${r_numValidatorFailure:--9}" \
"elrond_node_r_epoch_validator_ignored_signatures{$metricLabels} ${r_numValidatorIgnoredSignatures:--9}" \
"elrond_node_r_total_rating{$metricLabels} ${r_rating:--9}" \
"elrond_node_r_total_leader_success{$metricLabels} ${r_totalNumLeaderSuccess:--9}" \
"elrond_node_r_total_leader_failure{$metricLabels} ${r_totalNumLeaderFailure:--9}" \
"elrond_node_r_total_validator_success{$metricLabels} ${r_totalNumValidatorSuccess:--9}" \
"elrond_node_r_total_validator_failure{$metricLabels} ${r_totalNumValidatorFailure:--9}" \
"elrond_node_r_total_validator_ignored_signatures{$metricLabels} ${r_totalNumValidatorIgnoredSignatures:--9}"
fi
done
fi
#Other metrics to consider
#erd_cpu_load_percent
#erd_mem_load_percent
#erd_mem_total
#erd_num_metachain_nodes
#erd_num_nodes_in_shard
#erd_num_transactions_processed
#erd_peak_tps
#erd_current_block_size
#erd_app_version