Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[s3inbox] run healthchecks in main process #1025

Merged
merged 11 commits into from
Sep 12, 2024
33 changes: 33 additions & 0 deletions .github/integration/tests/sda/09_healthchecks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/sh
set -e

# install tools if missing
for t in curl jq ; do
if [ ! "$(command -v $t)" ]; then
if [ "$(id -u)" != 0 ]; then
echo "$t is missing, unable to install it"
exit 1
fi

apt-get -o DPkg::Lock::Timeout=60 update >/dev/null
apt-get -o DPkg::Lock::Timeout=60 install -y "$t" >/dev/null
fi
done


# Test the s3inbox's healthchecks, GET /health and HEAD /
token="$(curl -s http://oidc:8080/tokens | jq -r '.[0]')"

response="$(curl -s -k -LI "http://s3inbox:8000" -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token")"
jbygdell marked this conversation as resolved.
Show resolved Hide resolved
if [ "$response" != "200" ]; then
echo "Bad health response from HEAD /, expected 200 got: $response"
exit 1
fi

response="$(curl -s -k -LI "http://s3inbox:8000/health" -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token")"
jbygdell marked this conversation as resolved.
Show resolved Hide resolved
if [ "$response" != "200" ]; then
echo "Bad health response from /health, expected 200 got: $response"
exit 1
fi

echo "Healthcheck tests completed successfully"
157 changes: 67 additions & 90 deletions sda/cmd/s3inbox/healthchecks.go
Original file line number Diff line number Diff line change
@@ -1,114 +1,91 @@
package main

import (
"crypto/tls"
"database/sql"
"fmt"
"net/http"
"strconv"
"time"

"github.com/heptiolabs/healthcheck"
"github.com/neicnordic/sensitive-data-archive/internal/config"
"github.com/neicnordic/sensitive-data-archive/internal/broker"
log "github.com/sirupsen/logrus"
)

// HealthCheck registers and endpoint for healthchecking the service
type HealthCheck struct {
port int
DB *sql.DB
s3URL string
brokerURL string
tlsConfig *tls.Config
serverCert string
serverKey string
}
// CheckHealth checks and tries to repair the connections to MQ, DB and S3
func (p *Proxy) CheckHealth(w http.ResponseWriter, _ *http.Request) {

// try to connect to mq, check connection and channel
var err error
if p.messenger == nil {
w.WriteHeader(http.StatusServiceUnavailable)

// NewHealthCheck creates a new healthchecker. It needs to know where to find
// the backend S3 storage and the Message Broker so it can report readiness.
func NewHealthCheck(port int, db *sql.DB, conf *config.Config, tlsConfig *tls.Config) *HealthCheck {
s3URL := conf.Inbox.S3.URL
if conf.Inbox.S3.Port != 0 {
s3URL = fmt.Sprintf("%s:%d", s3URL, conf.Inbox.S3.Port)
return
}
if conf.Inbox.S3.Readypath != "" {
s3URL += conf.Inbox.S3.Readypath
if p.messenger.IsConnClosed() {
log.Warning("connection is closed, reconnecting...")
p.messenger, err = broker.NewMQ(p.messenger.Conf)
if err != nil {
log.Warning(err)
w.WriteHeader(http.StatusServiceUnavailable)

return
}
}

brokerURL := fmt.Sprintf("%s:%d", conf.Broker.Host, conf.Broker.Port)

serverCert := conf.Server.Cert
serverKey := conf.Server.Key

return &HealthCheck{port, db, s3URL, brokerURL, tlsConfig, serverCert, serverKey}
}

// RunHealthChecks should be run as a go routine in the main app. It registers
// the healthcheck handler on the port specified in when creating a new
// healthcheck.
func (h *HealthCheck) RunHealthChecks() {
health := healthcheck.NewHandler()

health.AddLivenessCheck("goroutine-threshold", healthcheck.GoroutineCountCheck(100))

health.AddReadinessCheck("S3-backend-http", h.httpsGetCheck(h.s3URL, 5000*time.Millisecond))

health.AddReadinessCheck("broker-tcp", healthcheck.TCPDialCheck(h.brokerURL, 5000*time.Millisecond))

health.AddReadinessCheck("database", healthcheck.DatabasePingCheck(h.DB, 1*time.Second))
if p.messenger.Channel.IsClosed() {
log.Warning("channel is closed, recreating...")
err := p.messenger.CreateNewChannel()
if err != nil {
log.Warning(err)
w.WriteHeader(http.StatusServiceUnavailable)

mux := http.NewServeMux()
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodHead {
// readyEndpoint does not accept method head
r.Method = http.MethodGet
health.ReadyEndpoint(w, r)
return
}
})
mux.HandleFunc("/health", health.ReadyEndpoint)

addr := ":" + strconv.Itoa(h.port)
server := &http.Server{
Addr: addr,
Handler: mux,
ReadTimeout: 5 * time.Second,
WriteTimeout: 5 * time.Second,
IdleTimeout: 30 * time.Second,
ReadHeaderTimeout: 3 * time.Second,
}
if h.serverCert != "" && h.serverKey != "" {
if err := server.ListenAndServeTLS(h.serverCert, h.serverKey); err != nil {
panic(err)
}
} else {
if err := server.ListenAndServe(); err != nil {
panic(err)
// Ping database, reconnect if there was a connection problem
err = p.database.DB.Ping()
if err != nil {
log.Errorf("Database connection problem: %v", err)
err = p.database.Connect()
if err != nil {
w.WriteHeader(http.StatusServiceUnavailable)

return
}
}

// Check that s3 backend responds
s3URL := p.getS3ReadyPath()
err = p.httpsGetCheck(s3URL)
jbygdell marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
log.Error(err)
w.WriteHeader(http.StatusServiceUnavailable)

return
}
w.WriteHeader(http.StatusOK)
}

func (h *HealthCheck) httpsGetCheck(url string, timeout time.Duration) healthcheck.Check {
cfg := &tls.Config{MinVersion: tls.VersionTLS12}
cfg.RootCAs = h.tlsConfig.RootCAs
tr := &http.Transport{TLSClientConfig: cfg}
client := http.Client{
Transport: tr,
Timeout: timeout,
// never follow redirects
CheckRedirect: func(*http.Request, []*http.Request) error {
return http.ErrUseLastResponse
jbygdell marked this conversation as resolved.
Show resolved Hide resolved
},
// httpsGetCheck sends a request to the S3 backend and makes sure it is healthy
func (p *Proxy) httpsGetCheck(url string) error {

jbygdell marked this conversation as resolved.
Show resolved Hide resolved
resp, e := p.client.Get(url)
if e != nil {
return e
}
_ = resp.Body.Close() // ignoring error
if resp.StatusCode != 200 {
return fmt.Errorf("returned status %d", resp.StatusCode)
}

return func() error {
resp, e := client.Get(url)
if e != nil {
return e
}
_ = resp.Body.Close() // ignoring error
if resp.StatusCode != 200 {
return fmt.Errorf("returned status %d", resp.StatusCode)
}
return nil
}

return nil
func (p *Proxy) getS3ReadyPath() string {
s3URL := p.s3.URL
if p.s3.Port != 0 {
s3URL = fmt.Sprintf("%s:%d", s3URL, p.s3.Port)
}
if p.s3.Readypath != "" {
s3URL += p.s3.Readypath
}
jbygdell marked this conversation as resolved.
Show resolved Hide resolved

return s3URL
}
Loading
Loading