Skip to content

Commit

Permalink
Reintroduce health liveliness check (#1859)
Browse files Browse the repository at this point in the history
closes #1861
fixes #1852


### Reintroduce health liveliness check ([Issue
#1861](#1861))

Depending on their environments and cloud settings, users may or may not
be able to craft health probes that are able to make CSRF compatible
GraphQL queries.
This is one of the reasons why we reintroduced a health check in the
router.

The health liveness check endpoint is exposed on
`127.0.0.1:8088/health`, and its listen address can be changed in the
yaml configuration:

```yaml
health-check:
  listen: 127.0.0.1:8088 # default
  enabled: true # default
```

Co-authored-by: bryn <[email protected]>
  • Loading branch information
o0Ignition0o and bryn authored Sep 22, 2022
1 parent 516f28e commit adea33b
Show file tree
Hide file tree
Showing 11 changed files with 352 additions and 135 deletions.
16 changes: 16 additions & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,22 @@ We're happy to consider re-introducing this in the future (it even has a matchin
By [@abernix](https://github.com/abernix) in https://github.com/apollographql/router/pull/1858

## 🚀 Features

### Reintroduce health liveliness check ([Issue #1861](https://github.com/apollographql/router/issues/1861))

Depending on their environments and cloud settings, users may or may not be able to craft health probes that are able to make CSRF compatible GraphQL queries.
This is one of the reasons why we reintroduced a health check in the router.

The health liveness check endpoint is exposed on `127.0.0.1:8088/health`, and its listen address can be changed in the yaml configuration:

```yaml
health-check:
listen: 127.0.0.1:8088 # default
enabled: true # default
```
By [@o0Ignition0o](https://github.com/o0Ignition0o) in https://github.com/apollographql/router/pull/1859
## 🐛 Fixes
### update and validate configuration files ([Issue #1854](https://github.com/apollographql/router/issues/1854))
Expand Down
5 changes: 5 additions & 0 deletions RELEASE_CHECKLIST.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ in lieu of an official changelog.
- update the version and the appVersion to the release version. e.g.: `appVersion: "v0.9.0"`
8 Update `helm/chart/router/README.md` by running this from the repo root: `(cd helm/chart && helm-docs router)`.
(If not installed, you should [install `helm-docs`](https://github.com/norwoodj/helm-docs))
9. Update the kubernetes section of the docs:
- go to the `helm/chart/router` folder
- run
```helm template --set router.configuration.telemetry.metrics.prometheus.enabled=true --set managedFederation.apiKey="REDACTED" --set managedFederation.graphRef="REDACTED" --debug .```
- Paste the output in the `Kubernetes Configuration` example of the `docs/sources/containerization/kubernetes.mdx` file
9. Update `federation-version-support.mdx` with the latest version info. Use https://github.com/apollographql/version_matrix to generate the version matrix.
10. Update the `image` of the Docker image within `docker-compose*.yml` files inside the `dockerfiles` directory.
11. Update the license list with `cargo about generate --workspace -o licenses.html about.hbs`.
Expand Down
211 changes: 192 additions & 19 deletions apollo-router/src/axum_http_server_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ use multimap::MultiMap;
use opentelemetry::global;
use opentelemetry::trace::SpanKind;
use opentelemetry::trace::TraceContextExt;
use serde::Serialize;
use tokio::io::AsyncWriteExt;
use tokio::net::TcpListener;
#[cfg(unix)]
Expand Down Expand Up @@ -103,6 +104,19 @@ pub(crate) struct ListenersAndRouters {
pub(crate) extra: MultiMap<ListenAddr, Router>,
}

#[derive(Serialize)]
#[serde(rename_all = "UPPERCASE")]
#[allow(dead_code)]
enum HealthStatus {
Up,
Down,
}

#[derive(Serialize)]
struct Health {
status: HealthStatus,
}

pub(crate) fn make_axum_router<RF>(
service_factory: RF,
configuration: &Configuration,
Expand All @@ -113,24 +127,29 @@ where
{
ensure_listenaddrs_consistency(configuration, &endpoints)?;

endpoints.insert(
configuration.supergraph.listen.clone(),
Endpoint::new(
"/.well-known/apollo/server-health".to_string(),
service_fn(|_req: transport::Request| async move {
Ok::<_, BoxError>(
http::Response::builder()
.status(StatusCode::NOT_FOUND)
.body(
Bytes::from_static(b"The health check is no longer at this endpoint")
.into(),
)
.unwrap(),
)
})
.boxed(),
),
);
if configuration.health_check.enabled {
tracing::info!(
"healthcheck endpoint exposed at {}/health",
configuration.health_check.listen
);
endpoints.insert(
configuration.health_check.listen.clone(),
Endpoint::new(
"/health".to_string(),
service_fn(move |_req: transport::Request| {
let health = Health {
status: HealthStatus::Up,
};

async move {
Ok(http::Response::builder()
.body(serde_json::to_vec(&health).map_err(BoxError::from)?.into())?)
}
})
.boxed(),
),
);
}

ensure_endpoints_consistency(configuration, &endpoints)?;

Expand Down Expand Up @@ -176,6 +195,20 @@ fn ensure_listenaddrs_consistency(
all_ports.insert(main_port, main_ip);
}

if configuration.health_check.enabled {
if let Some((ip, port)) = configuration.health_check.listen.ip_and_port() {
if let Some(previous_ip) = all_ports.insert(port, ip) {
if ip != previous_ip {
return Err(ApolloRouterError::DifferentListenAddrsOnSamePort(
previous_ip,
ip,
port,
));
}
}
}
}

for addr in endpoints.keys() {
if let Some((ip, port)) = addr.ip_and_port() {
if let Some(previous_ip) = all_ports.insert(port, ip) {
Expand Down Expand Up @@ -1075,6 +1108,7 @@ mod tests {

use super::*;
use crate::configuration::Cors;
use crate::configuration::HealthCheck;
use crate::configuration::Homepage;
use crate::configuration::Sandbox;
use crate::configuration::Supergraph;
Expand Down Expand Up @@ -1198,6 +1232,7 @@ mod tests {
.enabled(false)
.build(),
)
.health_check(crate::configuration::HealthCheck::fake_builder().build())
.build()
.unwrap(),
),
Expand Down Expand Up @@ -2173,7 +2208,7 @@ Content-Type: application/json\r
}

#[tokio::test]
async fn test_health_check_returns_four_oh_four() {
async fn test_previous_health_check_returns_four_oh_four() {
let expectations = MockSupergraphService::new();
let (server, client) = init(expectations).await;
let url = format!(
Expand All @@ -2185,6 +2220,144 @@ Content-Type: application/json\r
assert_eq!(response.status(), StatusCode::NOT_FOUND);
}

#[tokio::test]
async fn test_health_check() {
let mut expectations = MockSupergraphService::new();
expectations.expect_service_call().once().returning(|_| {
Ok(http_ext::from_response_to_stream(
http::Response::builder()
.status(200)
.body(
graphql::Response::builder()
.data(json!({ "__typename": "Query"}))
.build(),
)
.unwrap(),
))
});

let (server, client) = init(expectations).await;
let url = format!(
"{}/health",
server.graphql_listen_address().as_ref().unwrap()
);

let response = client.get(url).send().await.unwrap();
assert_eq!(response.status(), StatusCode::OK);
assert_eq!(
json!({"status": "UP" }),
response.json::<serde_json::Value>().await.unwrap()
)
}

#[tokio::test]
async fn test_health_check_custom_listener() {
let conf = Configuration::fake_builder()
.health_check(
HealthCheck::fake_builder()
.listen(ListenAddr::SocketAddr("127.0.0.1:4012".parse().unwrap()))
.enabled(true)
.build(),
)
.build()
.unwrap();

let mut expectations = MockSupergraphService::new();
expectations.expect_service_call().once().returning(|_| {
Ok(http_ext::from_response_to_stream(
http::Response::builder()
.status(200)
.body(
graphql::Response::builder()
.data(json!({ "__typename": "Query"}))
.build(),
)
.unwrap(),
))
});

// keep the server handle around otherwise it will immediately shutdown
let (_server, client) = init_with_config(expectations, conf, MultiMap::new())
.await
.unwrap();
let url = "http://localhost:4012/health";

let response = client.get(url).send().await.unwrap();
assert_eq!(response.status(), StatusCode::OK);
assert_eq!(
json!({"status": "UP" }),
response.json::<serde_json::Value>().await.unwrap()
)
}

#[tokio::test]
async fn test_sneaky_supergraph_and_health_check_configuration() {
let conf = Configuration::fake_builder()
.health_check(
HealthCheck::fake_builder()
.listen(ListenAddr::SocketAddr("127.0.0.1:0".parse().unwrap()))
.enabled(true)
.build(),
)
.supergraph(Supergraph::fake_builder().path("/health").build()) // here be dragons
.build()
.unwrap();
let expectations = MockSupergraphService::new();
let error = init_with_config(expectations, conf, MultiMap::new())
.await
.unwrap_err();

assert_eq!(
"tried to register two endpoints on `127.0.0.1:0/health`",
error.to_string()
);
}

#[tokio::test]
async fn test_sneaky_supergraph_and_disabled_health_check_configuration() {
let conf = Configuration::fake_builder()
.health_check(
HealthCheck::fake_builder()
.listen(ListenAddr::SocketAddr("127.0.0.1:0".parse().unwrap()))
.enabled(false)
.build(),
)
.supergraph(Supergraph::fake_builder().path("/health").build())
.build()
.unwrap();
let expectations = MockSupergraphService::new();
let _ = init_with_config(expectations, conf, MultiMap::new())
.await
.unwrap();
}

#[tokio::test]
async fn test_supergraph_and_health_check_same_port_different_listener() {
let conf = Configuration::fake_builder()
.health_check(
HealthCheck::fake_builder()
.listen(ListenAddr::SocketAddr("127.0.0.1:4013".parse().unwrap()))
.enabled(true)
.build(),
)
.supergraph(
Supergraph::fake_builder()
.listen(ListenAddr::SocketAddr("0.0.0.0:4013".parse().unwrap()))
.build(),
)
.build()
.unwrap();
let expectations = MockSupergraphService::new();
let error = init_with_config(expectations, conf, MultiMap::new())
.await
.unwrap_err();

assert_eq!(
"tried to bind 0.0.0.0 and 127.0.0.1 on port 4013",
error.to_string()
);
}

#[test(tokio::test)]
async fn it_send_bad_content_type() -> Result<(), ApolloRouterError> {
let query = "query";
Expand Down
Loading

0 comments on commit adea33b

Please sign in to comment.