Skip to content

Commit

Permalink
feat(iroh-net): Implement the https probe (#2903)
Browse files Browse the repository at this point in the history
## Description

This implements the https probe.

There does not need to be separate IPv4 or IPv6 HTTPS probes, because we
just go with whatever the HTTPS connection works over. This is what we
need to establish a relay connection after all, this also does not care.
The only reason for this is to get some kind of latency to be able to
chose a relay server.

Luckily the https probes have been integrated in the probe plan a long
time so nothing to do there.

## Breaking Changes

The URLs served by the relay changed:
- `/relay/probe` has moved to `/ping`
- `/derp/probe` has been removed.

Unless you were manually using those URLs you will not notice these
changes, nothing in the iroh codebase ever used the changed URLs.

## Notes & open questions


Currently this uses reqwest with it's default DNS resolution. This is
not great as it is not the same DNS resolution used everywhere else. It
is however the same as done in the captive portal check. This is worth
at least looking at before merging this.

## Change checklist

- [x] Use common DNS resolver.
- [x] Update URL
- [x] Write docs for URLs of relay server.
- [x] Self-review.
- [x] Documentation updates following the [style
guide](https://rust-lang.github.io/rfcs/1574-more-api-documentation-conventions.html#appendix-a-full-conventions-text),
if relevant.
- [x] Tests if relevant.
- [x] All breaking changes documented.
  • Loading branch information
flub authored Nov 7, 2024
1 parent d6a32f4 commit 91d44dc
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 53 deletions.
156 changes: 122 additions & 34 deletions iroh-net/src/netcheck/reportgen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,15 @@ use tokio::{
};
use tokio_util::task::AbortOnDropHandle;
use tracing::{debug, debug_span, error, info_span, trace, warn, Instrument, Span};
use url::Host;

use super::NetcheckMetrics;
use crate::{
defaults::DEFAULT_STUN_PORT,
dns::{DnsResolver, ResolverExt},
netcheck::{self, Report},
ping::{PingError, Pinger},
relay::{RelayMap, RelayNode, RelayUrl},
relay::{http::RELAY_PROBE_PATH, RelayMap, RelayNode, RelayUrl},
stun,
util::MaybeFuture,
};
Expand Down Expand Up @@ -466,6 +467,7 @@ impl Actor {
.as_ref()
.and_then(|l| l.preferred_relay.clone());

let dns_resolver = self.dns_resolver.clone();
let dm = self.relay_map.clone();
self.outstanding_tasks.captive_task = true;
MaybeFuture {
Expand All @@ -474,7 +476,7 @@ impl Actor {
debug!("Captive portal check started after {CAPTIVE_PORTAL_DELAY:?}");
let captive_portal_check = tokio::time::timeout(
CAPTIVE_PORTAL_TIMEOUT,
check_captive_portal(&dm, preferred_relay)
check_captive_portal(&dns_resolver, &dm, preferred_relay)
.instrument(debug_span!("captive-portal")),
);
match captive_portal_check.await {
Expand Down Expand Up @@ -743,7 +745,7 @@ async fn run_probe(
}
Probe::Https { ref node, .. } => {
debug!("sending probe HTTPS");
match measure_https_latency(node).await {
match measure_https_latency(&dns_resolver, node, None).await {
Ok((latency, ip)) => {
result.latency = Some(latency);
// We set these IPv4 and IPv6 but they're not really used
Expand Down Expand Up @@ -853,7 +855,11 @@ async fn run_stun_probe(
/// return a "204 No Content" response and checking if that's what we get.
///
/// The boolean return is whether we think we have a captive portal.
async fn check_captive_portal(dm: &RelayMap, preferred_relay: Option<RelayUrl>) -> Result<bool> {
async fn check_captive_portal(
dns_resolver: &DnsResolver,
dm: &RelayMap,
preferred_relay: Option<RelayUrl>,
) -> Result<bool> {
// If we have a preferred relay node and we can use it for non-STUN requests, try that;
// otherwise, pick a random one suitable for non-STUN requests.
let preferred_relay = preferred_relay.and_then(|url| match dm.get_node(&url) {
Expand Down Expand Up @@ -881,9 +887,23 @@ async fn check_captive_portal(dm: &RelayMap, preferred_relay: Option<RelayUrl>)
}
};

let client = reqwest::ClientBuilder::new()
.redirect(reqwest::redirect::Policy::none())
.build()?;
let mut builder = reqwest::ClientBuilder::new().redirect(reqwest::redirect::Policy::none());
if let Some(Host::Domain(domain)) = url.host() {
// Use our own resolver rather than getaddrinfo
//
// For some reason reqwest wants SocketAddr rather than IpAddr but then proceeds to
// ignore the port, extracting it from the URL instead. We supply a dummy port.
//
// Ideally we would try to resolve **both** IPv4 and IPv6 rather than purely race
// them. But our resolver doesn't support that yet.
let addrs: Vec<_> = dns_resolver
.lookup_ipv4_ipv6_staggered(domain, DNS_TIMEOUT, DNS_STAGGERING_MS)
.await?
.map(|ipaddr| SocketAddr::new(ipaddr, 80))
.collect();
builder = builder.resolve_to_addrs(domain, &addrs);
}
let client = builder.build()?;

// Note: the set of valid characters in a challenge and the total
// length is limited; see is_challenge_char in bin/iroh-relay for more
Expand Down Expand Up @@ -1023,33 +1043,72 @@ async fn run_icmp_probe(
Ok(report)
}

/// Executes an HTTPS probe.
///
/// If `certs` is provided they will be added to the trusted root certificates, allowing the
/// use of self-signed certificates for servers. Currently this is used for testing.
#[allow(clippy::unused_async)]
async fn measure_https_latency(_node: &RelayNode) -> Result<(Duration, IpAddr)> {
bail!("not implemented");
// TODO:
// - needs relayhttp::Client
// - measurement hooks to measure server processing time

// metricHTTPSend.Add(1)
// let ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), overallProbeTimeout);
// let dc := relayhttp.NewNetcheckClient(c.logf);
// let tlsConn, tcpConn, node := dc.DialRegionTLS(ctx, reg)?;
// if ta, ok := tlsConn.RemoteAddr().(*net.TCPAddr);
// req, err := http.NewRequestWithContext(ctx, "GET", "https://"+node.HostName+"/relay/latency-check", nil);
// resp, err := hc.Do(req);

// // relays should give us a nominal status code, so anything else is probably
// // an access denied by a MITM proxy (or at the very least a signal not to
// // trust this latency check).
// if resp.StatusCode > 299 {
// return 0, ip, fmt.Errorf("unexpected status code: %d (%s)", resp.StatusCode, resp.Status)
// }
// _, err = io.Copy(io.Discard, io.LimitReader(resp.Body, 8<<10));
// result.End(c.timeNow())

// // TODO: decide best timing heuristic here.
// // Maybe the server should return the tcpinfo_rtt?
// return result.ServerProcessing, ip, nil
async fn measure_https_latency(
dns_resolver: &DnsResolver,
node: &RelayNode,
certs: Option<Vec<rustls::pki_types::CertificateDer<'static>>>,
) -> Result<(Duration, IpAddr)> {
let url = node.url.join(RELAY_PROBE_PATH)?;

// This should also use same connection establishment as relay client itself, which
// needs to be more configurable so users can do more crazy things:
// https://github.com/n0-computer/iroh/issues/2901
let mut builder = reqwest::ClientBuilder::new().redirect(reqwest::redirect::Policy::none());
if let Some(Host::Domain(domain)) = url.host() {
// Use our own resolver rather than getaddrinfo
//
// For some reason reqwest wants SocketAddr rather than IpAddr but then proceeds to
// ignore the port, extracting it from the URL instead. We supply a dummy port.
//
// The relay Client uses `.lookup_ipv4_ipv6` to connect, so use the same function
// but staggered for reliability. Ideally this tries to resolve **both** IPv4 and
// IPv6 though. But our resolver does not have a function for that yet.
let addrs: Vec<_> = dns_resolver
.lookup_ipv4_ipv6_staggered(domain, DNS_TIMEOUT, DNS_STAGGERING_MS)
.await?
.map(|ipaddr| SocketAddr::new(ipaddr, 80))
.collect();
builder = builder.resolve_to_addrs(domain, &addrs);
}
if let Some(certs) = certs {
for cert in certs {
let cert = reqwest::Certificate::from_der(&cert)?;
builder = builder.add_root_certificate(cert);
}
}
let client = builder.build()?;

let start = Instant::now();
let mut response = client.request(reqwest::Method::GET, url).send().await?;
let latency = start.elapsed();
if response.status().is_success() {
// Drain the response body to be nice to the server, up to a limit.
const MAX_BODY_SIZE: usize = 8 << 10; // 8 KiB
let mut body_size = 0;
while let Some(chunk) = response.chunk().await? {
body_size += chunk.len();
if body_size >= MAX_BODY_SIZE {
break;
}
}

// Only `None` if a different hyper HttpConnector in the request.
let remote_ip = response
.remote_addr()
.context("missing HttpInfo from HttpConnector")?
.ip();
Ok((latency, remote_ip))
} else {
Err(anyhow!(
"Error response from server: '{}'",
response.status().canonical_reason().unwrap_or_default()
))
}
}

/// Updates a netcheck [`Report`] with a new [`ProbeReport`].
Expand Down Expand Up @@ -1118,8 +1177,13 @@ fn update_report(report: &mut Report, probe_report: ProbeReport) {
mod tests {
use std::net::{Ipv4Addr, Ipv6Addr};

use testresult::TestResult;

use super::*;
use crate::defaults::staging::{default_eu_relay_node, default_na_relay_node};
use crate::{
defaults::staging::{default_eu_relay_node, default_na_relay_node},
test_utils,
};

#[test]
fn test_update_report_stun_working() {
Expand Down Expand Up @@ -1368,4 +1432,28 @@ mod tests {
panic!("Ping error: {err:#}");
}
}

#[tokio::test]
async fn test_measure_https_latency() -> TestResult {
let _logging_guard = iroh_test::logging::setup();
let (_relay_map, relay_url, server) = test_utils::run_relay_server().await?;
let dns_resolver = crate::dns::resolver();
warn!(?relay_url, "RELAY_URL");
let node = RelayNode {
stun_only: false,
stun_port: 0,
url: relay_url.clone(),
};
let (latency, ip) =
measure_https_latency(dns_resolver, &node, server.certificates()).await?;

assert!(latency > Duration::ZERO);

let relay_url_ip = relay_url
.host_str()
.context("host")?
.parse::<std::net::IpAddr>()?;
assert_eq!(ip, relay_url_ip);
Ok(())
}
}
7 changes: 1 addition & 6 deletions iroh-net/src/relay/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,12 @@ pub(crate) const SUPPORTED_WEBSOCKET_VERSION: &str = "13";
/// (over websockets and a custom upgrade protocol).
pub const RELAY_PATH: &str = "/relay";
/// The HTTP path under which the relay allows doing latency queries for testing.
pub const RELAY_PROBE_PATH: &str = "/relay/probe";
pub const RELAY_PROBE_PATH: &str = "/ping";
/// The legacy HTTP path under which the relay used to accept relaying connections.
/// We keep this for backwards compatibility.
#[cfg(feature = "iroh-relay")] // legacy paths only used on server-side for backwards compat
#[cfg_attr(iroh_docsrs, doc(cfg(feature = "iroh-relay")))]
pub(crate) const LEGACY_RELAY_PATH: &str = "/derp";
/// The legacy HTTP path under which the relay used to allow latency queries.
/// We keep this for backwards compatibility.
#[cfg(feature = "iroh-relay")] // legacy paths only used on server-side for backwards compat
#[cfg_attr(iroh_docsrs, doc(cfg(feature = "iroh-relay")))]
pub(crate) const LEGACY_RELAY_PROBE_PATH: &str = "/derp/probe";

/// The HTTP upgrade protocol used for relaying.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
Expand Down
38 changes: 27 additions & 11 deletions iroh-net/src/relay/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@
//! always attached to a handle and when the handle is dropped the tasks abort. So tasks
//! can not outlive their handle. It is also always possible to await for completion of a
//! task. Some tasks additionally have a method to do graceful shutdown.
//!
//! The relay server hosts the following services:
//!
//! - HTTPS `/relay`: The main URL endpoint to which clients connect and sends traffic over.
//! - HTTPS `/ping`: Used for netcheck probes.
//! - HTTPS `/generate_204`: Used for netcheck probes.
//! - STUN: UDP port for STUN requests/responses.
use std::{fmt, future::Future, net::SocketAddr, pin::Pin, sync::Arc};

Expand All @@ -27,10 +34,7 @@ use tokio::{
use tokio_util::task::AbortOnDropHandle;
use tracing::{debug, error, info, info_span, instrument, trace, warn, Instrument};

use crate::{
relay::http::{LEGACY_RELAY_PROBE_PATH, RELAY_PROBE_PATH},
stun,
};
use crate::{relay::http::RELAY_PROBE_PATH, stun};

pub(crate) mod actor;
pub(crate) mod client_conn;
Expand Down Expand Up @@ -182,6 +186,11 @@ pub struct Server {
relay_handle: Option<http_server::ServerHandle>,
/// The main task running the server.
supervisor: AbortOnDropHandle<Result<()>>,
/// The certificate for the server.
///
/// If the server has manual certificates configured the certificate chain will be
/// available here, this can be used by a client to authenticate the server.
certificates: Option<Vec<rustls::pki_types::CertificateDer<'static>>>,
}

impl Server {
Expand Down Expand Up @@ -227,7 +236,13 @@ impl Server {
None => None,
};

// Start the Relay server.
// Start the Relay server, but first clone the certs out.
let certificates = config.relay.as_ref().and_then(|relay| {
relay.tls.as_ref().and_then(|tls| match tls.cert {
CertConfig::LetsEncrypt { .. } => None,
CertConfig::Manual { ref certs, .. } => Some(certs.clone()),
})
});
let (relay_server, http_addr) = match config.relay {
Some(relay_config) => {
debug!("Starting Relay server");
Expand All @@ -243,11 +258,6 @@ impl Server {
.headers(headers)
.request_handler(Method::GET, "/", Box::new(root_handler))
.request_handler(Method::GET, "/index.html", Box::new(root_handler))
.request_handler(
Method::GET,
LEGACY_RELAY_PROBE_PATH,
Box::new(probe_handler),
) // backwards compat
.request_handler(Method::GET, RELAY_PROBE_PATH, Box::new(probe_handler))
.request_handler(Method::GET, "/robots.txt", Box::new(robots_handler));
let http_addr = match relay_config.tls {
Expand Down Expand Up @@ -284,7 +294,7 @@ impl Server {
}
CertConfig::Manual { private_key, certs } => {
let server_config =
server_config.with_single_cert(certs.clone(), private_key)?;
server_config.with_single_cert(certs, private_key)?;
let server_config = Arc::new(server_config);
let acceptor =
tokio_rustls::TlsAcceptor::from(server_config.clone());
Expand Down Expand Up @@ -336,6 +346,7 @@ impl Server {
https_addr: http_addr.and(relay_addr),
relay_handle,
supervisor: AbortOnDropHandle::new(task),
certificates,
})
}

Expand Down Expand Up @@ -373,6 +384,11 @@ impl Server {
pub fn stun_addr(&self) -> Option<SocketAddr> {
self.stun_addr
}

/// The certificates chain if configured with manual TLS certificates.
pub fn certificates(&self) -> Option<Vec<rustls::pki_types::CertificateDer<'static>>> {
self.certificates.clone()
}
}

/// Supervisor for the relay server tasks.
Expand Down
6 changes: 4 additions & 2 deletions iroh-net/src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ pub async fn run_relay_server() -> Result<(RelayMap, RelayUrl, Server)> {
pub async fn run_relay_server_with(
stun: Option<StunConfig>,
) -> Result<(RelayMap, RelayUrl, Server)> {
let cert = rcgen::generate_simple_self_signed(vec!["localhost".to_string()]).unwrap();
let cert =
rcgen::generate_simple_self_signed(vec!["localhost".to_string(), "127.0.0.1".to_string()])
.expect("valid");
let rustls_cert = rustls::pki_types::CertificateDer::from(cert.serialize_der().unwrap());
let private_key =
rustls::pki_types::PrivatePkcs8KeyDer::from(cert.get_key_pair().serialize_der());
Expand All @@ -67,7 +69,7 @@ pub async fn run_relay_server_with(
metrics_addr: None,
};
let server = Server::spawn(config).await.unwrap();
let url: RelayUrl = format!("https://localhost:{}", server.https_addr().unwrap().port())
let url: RelayUrl = format!("https://{}", server.https_addr().expect("configured"))
.parse()
.unwrap();
let m = RelayMap::from_nodes([RelayNode {
Expand Down

0 comments on commit 91d44dc

Please sign in to comment.