Skip to content

Commit

Permalink
Http debug page (BFT-259, BFT-451) (#124)
Browse files Browse the repository at this point in the history
## What ❔
Added an HTTP endpoint with basic debug information:
- It is disabled by default. To enable it, a line defining it's address
in config is required:
`  "debugAddr": "<IP Address>:<port>"`
- Basic authentication implemented, also optional. Configured by
`  "debugCredentials": "<user>:<password>"`
in config file. No authentication required if that line is not present.
- TLS encryption: 
certificate and key files are required in case authentication is
enabled. Default names are `cert.pem`and `key.pem`but file paths can be
specified in config.json:
```
  "debugCertPath": "<cert path>",
  "debugKeyPath": "<key path>",
```
- Current look:
<img width="573" alt="image"
src="https://github.com/matter-labs/era-consensus/assets/6774709/99ba639a-dd3a-41a0-a3f3-03f2509b8e11">
 

## Why ❔

Requested at:
- [BFT-259 - implement an http debug page for a node (with network
data)](https://linear.app/matterlabs/issue/BFT-259/implement-an-http-debug-page-for-a-node-with-network-data)
- [BFT-451 - Do not expose HTTP servers for metrics and debugging if
possible](https://linear.app/matterlabs/issue/BFT-451/do-not-expose-http-servers-for-metrics-and-debugging-if-possible)
  • Loading branch information
ElFantasma authored Jun 25, 2024
1 parent 2838e24 commit 2dbb9a6
Show file tree
Hide file tree
Showing 27 changed files with 1,064 additions and 244 deletions.
605 changes: 404 additions & 201 deletions node/Cargo.lock

Large diffs are not rendered by default.

15 changes: 12 additions & 3 deletions node/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ ff_ce = "0.14.3"
heck = "0.5.0"
hex = "0.4.3"
im = "15.1.0"
jsonrpsee = { version = "0.21.0", features = ["server", "http-client"] }
jsonrpsee = { version = "0.23.0", features = ["server", "http-client"] }
k256 = { version = "0.13", features = ["ecdsa"] }
k8s-openapi = { version = "0.21.0", features = ["latest"] }
kube = { version = "0.88.1", features = ["runtime", "derive"] }
k8s-openapi = { version = "0.22.0", features = ["latest"] }
kube = { version = "0.91.0", features = ["runtime", "derive"] }
num-bigint = "0.4.4"
num-traits = "0.2.18"
once_cell = "1.17.1"
Expand Down Expand Up @@ -87,10 +87,19 @@ test-casing = "0.1.0"
thiserror = "1.0.40"
time = "0.3.23"
tokio = { version = "1.34.0", features = ["full"] }
tokio-rustls = "0.25.0"
tower = { version = "0.4.13" }
tracing = { version = "0.1.37", features = ["attributes"] }
tracing-subscriber = { version = "0.3.16", features = ["env-filter", "fmt"] }
zeroize = { version = "1.7.0", features = ["zeroize_derive"] }
hyper = { version = "1", features = ["full"] }
http-body-util = "0.1"
hyper-util = { version = "0.1", features = ["full"] }
tls-listener = { version = "0.10.0", features = ["rustls"]}
rustls-pemfile = "2"
base64 = "0.22.1"
build_html = "2.4.0"
bytesize = "1.3.0"

# Note that "bench" profile inherits from "release" profile and
# "test" profile inherits from "dev" profile.
Expand Down
16 changes: 15 additions & 1 deletion node/actors/executor/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Library files for the executor. We have it separate from the binary so that we can use these files in the tools crate.
use crate::io::Dispatcher;
use anyhow::Context as _;
use network::http;
use std::{
collections::{HashMap, HashSet},
sync::Arc,
Expand Down Expand Up @@ -29,7 +30,7 @@ pub struct Validator {
}

/// Config of the node executor.
#[derive(Clone, Debug)]
#[derive(Debug)]
pub struct Config {
/// IP:port to listen on, for incoming TCP connections.
/// Use `0.0.0.0:<port>` to listen on all network interfaces (i.e. on all IPs exposed by this VM).
Expand All @@ -50,6 +51,9 @@ pub struct Config {
/// Outbound connections that the node should actively try to
/// establish and maintain.
pub gossip_static_outbound: HashMap<node::PublicKey, net::Host>,
/// Http debug page configuration.
/// If None, debug page is disabled
pub debug_page: Option<http::DebugPageConfig>,
}

impl Config {
Expand Down Expand Up @@ -107,11 +111,21 @@ impl Executor {
tracing::debug!("Starting actors in separate threads.");
scope::run!(ctx, |ctx, s| async {
s.spawn(async { dispatcher.run(ctx).await.context("IO Dispatcher stopped") });

let (net, runner) =
network::Network::new(network_config, self.block_store.clone(), network_actor_pipe);
net.register_metrics();
s.spawn(async { runner.run(ctx).await.context("Network stopped") });

if let Some(debug_config) = self.config.debug_page {
s.spawn(async {
http::DebugPageServer::new(debug_config, net)
.run(ctx)
.await
.context("Http Server stopped")
});
}

// Run the bft actor iff this node is an active validator.
let Some(validator) = self.validator else {
tracing::info!("Running the node in non-validator mode.");
Expand Down
1 change: 1 addition & 0 deletions node/actors/executor/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ fn config(cfg: &network::Config) -> Config {
gossip_dynamic_inbound_limit: cfg.gossip.dynamic_inbound_limit,
gossip_static_inbound: cfg.gossip.static_inbound.clone(),
gossip_static_outbound: cfg.gossip.static_outbound.clone(),
debug_page: None,
}
}

Expand Down
10 changes: 9 additions & 1 deletion node/actors/network/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,20 @@ snow.workspace = true
thiserror.workspace = true
tracing.workspace = true
vise.workspace = true
tokio.workspace = true
tokio-rustls.workspace = true
hyper.workspace = true
http-body-util.workspace = true
hyper-util.workspace = true
tls-listener.workspace = true
base64.workspace = true
build_html.workspace = true
bytesize.workspace = true

[dev-dependencies]
assert_matches.workspace = true
pretty_assertions.workspace = true
test-casing.workspace = true
tokio.workspace = true

[build-dependencies]
zksync_protobuf_build.workspace = true
Expand Down
14 changes: 9 additions & 5 deletions node/actors/network/src/consensus/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Consensus network is a full graph of connections between all validators.
//! BFT consensus messages are exchanged over this network.
use crate::{config, gossip, io, noise, pool::PoolWatch, preface, rpc};
use crate::{config, gossip, io, noise, pool::PoolWatch, preface, rpc, MeteredStreamStats};
use anyhow::Context as _;
use rand::seq::SliceRandom;
use std::{
Expand Down Expand Up @@ -114,9 +114,9 @@ pub(crate) struct Network {
/// This validator's secret key.
pub(crate) key: validator::SecretKey,
/// Set of the currently open inbound connections.
pub(crate) inbound: PoolWatch<validator::PublicKey, ()>,
pub(crate) inbound: PoolWatch<validator::PublicKey, Arc<MeteredStreamStats>>,
/// Set of the currently open outbound connections.
pub(crate) outbound: PoolWatch<validator::PublicKey, ()>,
pub(crate) outbound: PoolWatch<validator::PublicKey, Arc<MeteredStreamStats>>,
/// Messages to be sent to validators.
pub(crate) msg_pool: MsgPool,
}
Expand Down Expand Up @@ -171,7 +171,9 @@ impl Network {
) -> anyhow::Result<()> {
let peer =
handshake::inbound(ctx, &self.key, self.gossip.genesis().hash(), &mut stream).await?;
self.inbound.insert(peer.clone(), ()).await?;
self.inbound
.insert(peer.clone(), stream.get_values())
.await?;
tracing::info!("peer = {peer:?}");
let res = scope::run!(ctx, |ctx, s| async {
let mut service = rpc::Service::new()
Expand Down Expand Up @@ -209,7 +211,9 @@ impl Network {
peer,
)
.await?;
self.outbound.insert(peer.clone(), ()).await?;
self.outbound
.insert(peer.clone(), stream.get_values())
.await?;
tracing::info!("peer = {peer:?}");
let consensus_cli =
rpc::Client::<rpc::consensus::Rpc>::new(ctx, self.gossip.cfg.rpc.consensus_rate);
Expand Down
6 changes: 3 additions & 3 deletions node/actors/network/src/gossip/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
//! eclipse attack. Dynamic connections are supposed to improve the properties of the gossip
//! network graph (minimize its diameter, increase connectedness).
use self::batch_votes::BatchVotesWatch;
use crate::{gossip::ValidatorAddrsWatch, io, pool::PoolWatch, Config};
use crate::{gossip::ValidatorAddrsWatch, io, pool::PoolWatch, Config, MeteredStreamStats};
use anyhow::Context as _;
use im::HashMap;
use std::sync::{atomic::AtomicUsize, Arc};
Expand All @@ -37,9 +37,9 @@ pub(crate) struct Network {
/// Gossip network configuration.
pub(crate) cfg: Config,
/// Currently open inbound connections.
pub(crate) inbound: PoolWatch<node::PublicKey, ()>,
pub(crate) inbound: PoolWatch<node::PublicKey, Arc<MeteredStreamStats>>,
/// Currently open outbound connections.
pub(crate) outbound: PoolWatch<node::PublicKey, ()>,
pub(crate) outbound: PoolWatch<node::PublicKey, Arc<MeteredStreamStats>>,
/// Current state of knowledge about validators' endpoints.
pub(crate) validator_addrs: ValidatorAddrsWatch,
/// Current state of knowledge about batch votes.
Expand Down
8 changes: 6 additions & 2 deletions node/actors/network/src/gossip/runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,9 @@ impl Network {
let peer =
handshake::inbound(ctx, &self.cfg.gossip, self.genesis().hash(), &mut stream).await?;
tracing::info!("peer = {peer:?}");
self.inbound.insert(peer.clone(), ()).await?;
self.inbound
.insert(peer.clone(), stream.get_values())
.await?;
let res = self.run_stream(ctx, stream).await;
self.inbound.remove(&peer).await;
res
Expand Down Expand Up @@ -296,7 +298,9 @@ impl Network {
)
.await?;
tracing::info!("peer = {peer:?}");
self.outbound.insert(peer.clone(), ()).await?;
self.outbound
.insert(peer.clone(), stream.get_values())
.await?;
let res = self.run_stream(ctx, stream).await;
self.outbound.remove(peer).await;
res
Expand Down
Loading

0 comments on commit 2dbb9a6

Please sign in to comment.