Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[s3inbox] run healthchecks in main process #1025

Merged
merged 11 commits into from
Sep 12, 2024
31 changes: 31 additions & 0 deletions .github/integration/tests/sda/09_healthchecks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/sh
set -e

# install tools if missing
for t in curl jq ; do
if [ ! "$(command -v $t)" ]; then
if [ "$(id -u)" != 0 ]; then
echo "$t is missing, unable to install it"
exit 1
fi

apt-get -o DPkg::Lock::Timeout=60 update >/dev/null
apt-get -o DPkg::Lock::Timeout=60 install -y "$t" >/dev/null
fi
done


# Test the s3inbox's healthchecks, GET /health and HEAD /
response="$(curl -s -k -LI "http://s3inbox:8000" -o /dev/null -w "%{http_code}\n")"
if [ "$response" != "200" ]; then
echo "Bad health response from HEAD /, expected 200 got: $response"
exit 1
fi

response="$(curl -s -k -L "http://s3inbox:8000/health" -o /dev/null -w "%{http_code}\n")"
MalinAhlberg marked this conversation as resolved.
Show resolved Hide resolved
if [ "$response" != "200" ]; then
echo "Bad health response from /health, expected 200 got: $response"
exit 1
fi

echo "Healthcheck tests completed successfully"
4 changes: 0 additions & 4 deletions charts/sda-svc/templates/inbox-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@ spec:
port: {{ template "inboxServicePort" . }}
targetPort: inbox
protocol: TCP
- name: inbox-liveness
port: 8001
targetPort: liveness-port
protocol: TCP
selector:
app: {{ template "sda.fullname" . }}-inbox
{{- end }}
Expand Down
11 changes: 4 additions & 7 deletions charts/sda-svc/templates/s3-inbox-deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -210,20 +210,17 @@ spec:
- name: inbox
containerPort: 8000
protocol: TCP
- name: liveness-port
containerPort: 8001
protocol: TCP
livenessProbe:
httpGet:
path: /live
port: liveness-port
path: /health
port: inbox
scheme: {{ ternary "HTTPS" "HTTP" ( .Values.global.tls.enabled ) }}
failureThreshold: 1
periodSeconds: 10
readinessProbe:
httpGet:
path: /ready
port: liveness-port
path: /health
port: inbox
scheme: {{ ternary "HTTPS" "HTTP" ( .Values.global.tls.enabled ) }}
failureThreshold: 1
periodSeconds: 5
Expand Down
7 changes: 0 additions & 7 deletions charts/sda-svc/templates/s3-inbox-ingress.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,6 @@ spec:
name: {{ template "sda.fullname" . }}-inbox
port:
number: {{ ternary 443 80 .Values.global.tls.enabled }}
- pathType: Exact
path: "/healthz"
backend:
service:
name: {{ template "sda.fullname" . }}-inbox
port:
number: 8001
{{- if .Values.global.tls.enabled }}
tls:
- hosts:
Expand Down
166 changes: 78 additions & 88 deletions sda/cmd/s3inbox/healthchecks.go
Original file line number Diff line number Diff line change
@@ -1,114 +1,104 @@
package main

import (
"crypto/tls"
"database/sql"
"fmt"
"net"
"net/http"
"net/url"
"path"
"strconv"
"time"

"github.com/heptiolabs/healthcheck"
"github.com/neicnordic/sensitive-data-archive/internal/config"
"github.com/neicnordic/sensitive-data-archive/internal/broker"
log "github.com/sirupsen/logrus"
)

// HealthCheck registers and endpoint for healthchecking the service
type HealthCheck struct {
port int
DB *sql.DB
s3URL string
brokerURL string
tlsConfig *tls.Config
serverCert string
serverKey string
}
// CheckHealth checks and tries to repair the connections to MQ, DB and S3
func (p *Proxy) CheckHealth(w http.ResponseWriter, _ *http.Request) {

// try to connect to mq, check connection and channel
var err error
if p.messenger == nil {
w.WriteHeader(http.StatusServiceUnavailable)

// NewHealthCheck creates a new healthchecker. It needs to know where to find
// the backend S3 storage and the Message Broker so it can report readiness.
func NewHealthCheck(port int, db *sql.DB, conf *config.Config, tlsConfig *tls.Config) *HealthCheck {
s3URL := conf.Inbox.S3.URL
if conf.Inbox.S3.Port != 0 {
s3URL = fmt.Sprintf("%s:%d", s3URL, conf.Inbox.S3.Port)
return
}
if conf.Inbox.S3.Readypath != "" {
s3URL += conf.Inbox.S3.Readypath
if p.messenger.IsConnClosed() {
log.Warning("connection is closed, reconnecting...")
p.messenger, err = broker.NewMQ(p.messenger.Conf)
if err != nil {
log.Warning(err)
w.WriteHeader(http.StatusServiceUnavailable)

return
}
}

brokerURL := fmt.Sprintf("%s:%d", conf.Broker.Host, conf.Broker.Port)
if p.messenger.Channel.IsClosed() {
log.Warning("channel is closed, recreating...")
err := p.messenger.CreateNewChannel()
if err != nil {
log.Warning(err)
w.WriteHeader(http.StatusServiceUnavailable)

serverCert := conf.Server.Cert
serverKey := conf.Server.Key
return
}
}
// Ping database, reconnect if there was a connection problem
err = p.database.DB.Ping()
if err != nil {
log.Errorf("Database connection problem: %v", err)
err = p.database.Connect()
if err != nil {
w.WriteHeader(http.StatusServiceUnavailable)

return
}
}

return &HealthCheck{port, db, s3URL, brokerURL, tlsConfig, serverCert, serverKey}
}
// Check that s3 backend responds
s3url, err := p.getS3ReadyPath()
if err != nil {
log.Errorf("Incorrect S3 health url: %v", err)
w.WriteHeader(http.StatusServiceUnavailable)

// RunHealthChecks should be run as a go routine in the main app. It registers
// the healthcheck handler on the port specified in when creating a new
// healthcheck.
func (h *HealthCheck) RunHealthChecks() {
health := healthcheck.NewHandler()
return
}
err = p.httpsGetCheck(s3url)
if err != nil {
log.Error(err)
w.WriteHeader(http.StatusServiceUnavailable)

health.AddLivenessCheck("goroutine-threshold", healthcheck.GoroutineCountCheck(100))
return
}
w.WriteHeader(http.StatusOK)
}

health.AddReadinessCheck("S3-backend-http", h.httpsGetCheck(h.s3URL, 5000*time.Millisecond))
// httpsGetCheck sends a request to the S3 backend and makes sure it is healthy
func (p *Proxy) httpsGetCheck(url string) error {
resp, e := p.client.Get(url)
if e != nil {
return e
}
_ = resp.Body.Close() // ignoring error
if resp.StatusCode != 200 {
return fmt.Errorf("returned status %d", resp.StatusCode)
}

health.AddReadinessCheck("broker-tcp", healthcheck.TCPDialCheck(h.brokerURL, 5000*time.Millisecond))
return nil
}

health.AddReadinessCheck("database", healthcheck.DatabasePingCheck(h.DB, 1*time.Second))
func (p *Proxy) getS3ReadyPath() (string, error) {

mux := http.NewServeMux()
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodHead {
// readyEndpoint does not accept method head
r.Method = http.MethodGet
health.ReadyEndpoint(w, r)
}
})
mux.HandleFunc("/health", health.ReadyEndpoint)

addr := ":" + strconv.Itoa(h.port)
server := &http.Server{
Addr: addr,
Handler: mux,
ReadTimeout: 5 * time.Second,
WriteTimeout: 5 * time.Second,
IdleTimeout: 30 * time.Second,
ReadHeaderTimeout: 3 * time.Second,
s3URL, err := url.Parse(p.s3.URL)
if err != nil {
return "", err
}
if h.serverCert != "" && h.serverKey != "" {
if err := server.ListenAndServeTLS(h.serverCert, h.serverKey); err != nil {
panic(err)
}
} else {
if err := server.ListenAndServe(); err != nil {
panic(err)
}
if p.s3.Port != 0 {
s3URL.Host = net.JoinHostPort(s3URL.Hostname(), strconv.Itoa(p.s3.Port))
}
}

func (h *HealthCheck) httpsGetCheck(url string, timeout time.Duration) healthcheck.Check {
cfg := &tls.Config{MinVersion: tls.VersionTLS12}
cfg.RootCAs = h.tlsConfig.RootCAs
tr := &http.Transport{TLSClientConfig: cfg}
client := http.Client{
Transport: tr,
Timeout: timeout,
// never follow redirects
CheckRedirect: func(*http.Request, []*http.Request) error {
return http.ErrUseLastResponse
jbygdell marked this conversation as resolved.
Show resolved Hide resolved
},
if p.s3.Readypath != "" {
s3URL.Path = path.Join(s3URL.Path, p.s3.Readypath)
}

return func() error {
resp, e := client.Get(url)
if e != nil {
return e
}
_ = resp.Body.Close() // ignoring error
if resp.StatusCode != 200 {
return fmt.Errorf("returned status %d", resp.StatusCode)
}

return nil
}
return s3URL.String(), nil
}
Loading
Loading