Skip to content

Commit

Permalink
gh-144 create health probe CR from DNSRecord reconciler
Browse files Browse the repository at this point in the history
Signed-off-by: Maskym Vavilov <[email protected]>
  • Loading branch information
maksymvavilov committed Oct 24, 2024
1 parent 0ed8ff6 commit 766495a
Show file tree
Hide file tree
Showing 9 changed files with 431 additions and 327 deletions.
2 changes: 0 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -241,14 +241,12 @@ run: DIRTY=$(shell hack/check-git-dirty.sh || echo "unknown")
run: manifests generate fmt vet ## Run a controller from your host.
go run -ldflags "-X main.gitSHA=${GIT_SHA} -X main.dirty=${DIRTY}" ./cmd/main.go --zap-devel --provider inmemory,aws,google,azure


.PHONY: run-with-probes
run-with-probes: GIT_SHA=$(shell git rev-parse HEAD || echo "unknown")
run-with-probes: DIRTY=$(shell hack/check-git-dirty.sh || echo "unknown")
run-with-probes: manifests generate fmt vet ## Run a controller from your host.
go run -ldflags "-X main.gitSHA=${GIT_SHA} -X main.dirty=${DIRTY}" ./cmd/main.go --zap-devel --provider inmemory,aws,google,azure


# If you wish built the manager image targeting other platforms you can use the --platform flag.
# (i.e. docker build --platform linux/arm64 ). However, you must enable docker buildKit for it.
# More info: https://docs.docker.com/develop/develop-images/build_enhancements/
Expand Down
4 changes: 3 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,10 @@ func main() {
var maxRequeueTime time.Duration
var providers stringSliceFlags
var dnsProbesEnabled bool
var allowInsecureCerts bool

flag.BoolVar(&dnsProbesEnabled, "enable-probes", true, "Enable DNSHealthProbes controller.")
flag.BoolVar(&allowInsecureCerts, "insecure-health-checks", true, "Allow DNSHealthProbes to use insecure certificates")

flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
Expand Down Expand Up @@ -153,7 +155,7 @@ func main() {
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
ProviderFactory: providerFactory,
}).SetupWithManager(mgr, maxRequeueTime, validFor, minRequeueTime); err != nil {
}).SetupWithManager(mgr, maxRequeueTime, validFor, minRequeueTime, dnsProbesEnabled, allowInsecureCerts); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DNSRecord")
os.Exit(1)
}
Expand Down
1 change: 1 addition & 0 deletions internal/controller/dnshealthcheckprobe_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
)

const (
ProbeOwnerLabel = "kuadrant.io/health-probes-owner"
DNSHealthCheckFinalizer = "kuadrant.io/dns-health-check-probe"
)

Expand Down
19 changes: 14 additions & 5 deletions internal/controller/dnsrecord_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ var (
randomizedValidationRequeue time.Duration
validFor time.Duration
reconcileStart metav1.Time

probesEnabled bool
allowInsecureCert bool
)

// DNSRecordReconciler reconciles a DNSRecord object
Expand Down Expand Up @@ -125,8 +128,10 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return r.updateStatus(ctx, previous, dnsRecord, false, err)
}

if err = r.ReconcileHealthChecks(ctx, dnsRecord); client.IgnoreNotFound(err) != nil {
return ctrl.Result{}, err
if probesEnabled {
if err = r.DeleteHealthChecks(ctx, dnsRecord); client.IgnoreNotFound(err) != nil {
return ctrl.Result{}, err
}
}
hadChanges, err := r.deleteRecord(ctx, dnsRecord, dnsProvider)
if err != nil {
Expand Down Expand Up @@ -227,8 +232,10 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return r.updateStatus(ctx, previous, dnsRecord, hadChanges, err)
}

if err = r.ReconcileHealthChecks(ctx, dnsRecord); err != nil {
return ctrl.Result{}, err
if probesEnabled {
if err = r.ReconcileHealthChecks(ctx, dnsRecord, allowInsecureCert); err != nil {
return ctrl.Result{}, err
}
}

return r.updateStatus(ctx, previous, dnsRecord, hadChanges, nil)
Expand Down Expand Up @@ -318,10 +325,12 @@ func (r *DNSRecordReconciler) updateStatus(ctx context.Context, previous, curren
}

// SetupWithManager sets up the controller with the Manager.
func (r *DNSRecordReconciler) SetupWithManager(mgr ctrl.Manager, maxRequeue, validForDuration, minRequeue time.Duration) error {
func (r *DNSRecordReconciler) SetupWithManager(mgr ctrl.Manager, maxRequeue, validForDuration, minRequeue time.Duration, healthProbesEnabled, allowInsecureHealthCert bool) error {
defaultRequeueTime = maxRequeue
validFor = validForDuration
defaultValidationRequeue = minRequeue
probesEnabled = healthProbesEnabled
allowInsecureCert = allowInsecureHealthCert

return ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.DNSRecord{}).
Expand Down
261 changes: 124 additions & 137 deletions internal/controller/dnsrecord_healthchecks.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,180 +2,167 @@ package controller

import (
"context"
"crypto/md5"
"fmt"
"io"
"reflect"
"strings"

"k8s.io/apimachinery/pkg/api/meta"
"github.com/go-logr/logr"
"github.com/hashicorp/go-multierror"

"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
externaldns "sigs.k8s.io/external-dns/endpoint"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/utils/ptr"
controllerruntime "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"

"github.com/kuadrant/dns-operator/api/v1alpha1"
"github.com/kuadrant/dns-operator/internal/provider"
"github.com/kuadrant/dns-operator/internal/common"
)

// healthChecksConfig represents the user configuration for the health checks
type healthChecksConfig struct {
Endpoint string
Port *int64
FailureThreshold *int64
Protocol *provider.HealthCheckProtocol
}

func (r *DNSRecordReconciler) ReconcileHealthChecks(ctx context.Context, dnsRecord *v1alpha1.DNSRecord) error {
var results []provider.HealthCheckResult
var err error
func (r *DNSRecordReconciler) ReconcileHealthChecks(ctx context.Context, dnsRecord *v1alpha1.DNSRecord, allowInsecureCerts bool) error {
logger := log.FromContext(ctx).WithName("healthchecks")
logger.Info("Reconciling healthchecks")

dnsProvider, err := r.getDNSProvider(ctx, dnsRecord)
if err != nil {
return err
// Probes enabled but no health check spec yet. Nothing to do
if dnsRecord.Spec.HealthCheck == nil {
return nil
}

healthCheckReconciler := dnsProvider.HealthCheckReconciler()

// Get the configuration for the health checks. If no configuration is
// set, ensure that the health checks are deleted
config := getHealthChecksConfig(dnsRecord)
desiredProbes := buildDesiredProbes(dnsRecord, common.GetLeafsTargets(common.MakeTreeFromDNSRecord(dnsRecord), ptr.To([]string{})), allowInsecureCerts)

for _, dnsEndpoint := range dnsRecord.Spec.Endpoints {
addresses := provider.GetExternalAddresses(dnsEndpoint, dnsRecord)
for _, address := range addresses {
probeStatus := r.getProbeStatus(address, dnsRecord)

// no config means delete the health checks
if config == nil {
result, err := healthCheckReconciler.Delete(ctx, dnsEndpoint, probeStatus)
if err != nil {
return err
}

results = append(results, result)
continue
}

// creating / updating health checks
endpointId, err := idForEndpoint(dnsRecord, dnsEndpoint, address)
if err != nil {
return err
}

spec := provider.HealthCheckSpec{
Id: endpointId,
Name: fmt.Sprintf("%s-%s-%s", dnsRecord.Spec.RootHost, dnsEndpoint.DNSName, address),
Host: &dnsRecord.Spec.RootHost,
Path: config.Endpoint,
Port: config.Port,
Protocol: config.Protocol,
FailureThreshold: config.FailureThreshold,
}

result := healthCheckReconciler.Reconcile(ctx, spec, dnsEndpoint, probeStatus, address)
results = append(results, result)
for _, probe := range desiredProbes {
// if one of them fails - health checks for this record are invalid anyway, so no sense to continue
if err := controllerruntime.SetControllerReference(dnsRecord, probe, r.Scheme); err != nil {
return err
}
}

result := r.reconcileHealthCheckStatus(results, dnsRecord)
return result
}

func (r *DNSRecordReconciler) getProbeStatus(address string, dnsRecord *v1alpha1.DNSRecord) *v1alpha1.HealthCheckStatusProbe {
if dnsRecord.Status.HealthCheck == nil || dnsRecord.Status.HealthCheck.Probes == nil {
return nil
}
for _, probeStatus := range dnsRecord.Status.HealthCheck.Probes {
if probeStatus.IPAddress == address {
return &probeStatus
if err := r.ensureProbe(ctx, probe, logger); err != nil {
return err
}
}

logger.Info("Healthecks reconciled")
return nil
}

func (r *DNSRecordReconciler) reconcileHealthCheckStatus(results []provider.HealthCheckResult, dnsRecord *v1alpha1.DNSRecord) error {
var previousCondition *metav1.Condition
probesCondition := &metav1.Condition{
Reason: "AllProbesSynced",
Type: "healthProbesSynced",
}
// DeleteHealthChecks deletes all v1alpha1.DNSHealthCheckProbe that have ProbeOwnerLabel of passed in DNSRecord
func (r *DNSRecordReconciler) DeleteHealthChecks(ctx context.Context, dnsRecord *v1alpha1.DNSRecord) error {
logger := log.FromContext(ctx).WithName("healthchecks")
logger.Info("Deleting healthchecks")

var allSynced = metav1.ConditionTrue
healthProbes := v1alpha1.DNSHealthCheckProbeList{}

if dnsRecord.Status.HealthCheck == nil {
dnsRecord.Status.HealthCheck = &v1alpha1.HealthCheckStatus{
Conditions: []metav1.Condition{},
Probes: []v1alpha1.HealthCheckStatusProbe{},
}
if err := r.List(ctx, &healthProbes, &client.ListOptions{
LabelSelector: labels.SelectorFromSet(map[string]string{
ProbeOwnerLabel: BuildOwnerLabelValue(dnsRecord),
}),
Namespace: dnsRecord.Namespace,
}); err != nil {
return err
}

previousCondition = meta.FindStatusCondition(dnsRecord.Status.HealthCheck.Conditions, "HealthProbesSynced")
if previousCondition != nil {
probesCondition = previousCondition
var deleteErrors error
for _, probe := range healthProbes.Items {
logger.V(1).Info(fmt.Sprintf("Deleting probe: %s", probe.Name))
if err := r.Delete(ctx, &probe); err != nil {
deleteErrors = multierror.Append(deleteErrors, err)
}
}
return deleteErrors
}

dnsRecord.Status.HealthCheck.Probes = []v1alpha1.HealthCheckStatusProbe{}
func (r *DNSRecordReconciler) ensureProbe(ctx context.Context, generated *v1alpha1.DNSHealthCheckProbe, logger logr.Logger) error {
current := &v1alpha1.DNSHealthCheckProbe{}

for _, result := range results {
if result.Host == "" {
continue
if err := r.Get(ctx, client.ObjectKeyFromObject(generated), current); err != nil {
if errors.IsNotFound(err) {
logger.V(1).Info(fmt.Sprintf("Creating probe: %s", generated.Name))
return r.Create(ctx, generated)
}
status := true
if result.Result == provider.HealthCheckFailed {
status = false
allSynced = metav1.ConditionFalse
}

dnsRecord.Status.HealthCheck.Probes = append(dnsRecord.Status.HealthCheck.Probes, v1alpha1.HealthCheckStatusProbe{
ID: result.ID,
IPAddress: result.IPAddress,
Host: result.Host,
Synced: status,
Conditions: []metav1.Condition{result.Condition},
})
return err
}

probesCondition.ObservedGeneration = dnsRecord.Generation
probesCondition.Status = allSynced

if allSynced == metav1.ConditionTrue {
probesCondition.Message = fmt.Sprintf("all %v probes synced successfully", len(dnsRecord.Status.HealthCheck.Probes))
probesCondition.Reason = "AllProbesSynced"
} else {
probesCondition.Reason = "UnsyncedProbes"
probesCondition.Message = "some probes have not yet successfully synced to the DNS Provider"
}
desired := current.DeepCopy()
desired.Spec = generated.Spec

//probe condition changed? - update transition time
if !reflect.DeepEqual(previousCondition, probesCondition) {
probesCondition.LastTransitionTime = metav1.Now()
if !reflect.DeepEqual(current, desired) {
logger.V(1).Info(fmt.Sprintf("Updating probe: %s", desired.Name))
if err := r.Update(ctx, desired); err != nil {
return err
}
}

dnsRecord.Status.HealthCheck.Conditions = []metav1.Condition{*probesCondition}

logger.V(1).Info(fmt.Sprintf("No updates needed for probe: %s", desired.Name))
return nil
}

func getHealthChecksConfig(dnsRecord *v1alpha1.DNSRecord) *healthChecksConfig {
if dnsRecord.Spec.HealthCheck == nil || dnsRecord.DeletionTimestamp != nil {
return nil
}
func buildDesiredProbes(dnsRecord *v1alpha1.DNSRecord, leafs *[]string, allowInsecureCerts bool) []*v1alpha1.DNSHealthCheckProbe {
var probes []*v1alpha1.DNSHealthCheckProbe

port := int64(dnsRecord.Spec.HealthCheck.Port)
failureThreshold := int64(dnsRecord.Spec.HealthCheck.FailureThreshold)
if leafs == nil {
return probes
}

return &healthChecksConfig{
Endpoint: dnsRecord.Spec.HealthCheck.Path,
Port: &port,
FailureThreshold: &failureThreshold,
Protocol: (*provider.HealthCheckProtocol)(&dnsRecord.Spec.HealthCheck.Protocol),
for _, leaf := range *leafs {
probes = append(probes, &v1alpha1.DNSHealthCheckProbe{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%s", dnsRecord.Name, leaf),
Namespace: dnsRecord.Namespace,
Labels: map[string]string{ProbeOwnerLabel: BuildOwnerLabelValue(dnsRecord)},
},
Spec: v1alpha1.DNSHealthCheckProbeSpec{
Port: dnsRecord.Spec.HealthCheck.Port,
Hostname: dnsRecord.Spec.RootHost,
Address: leaf,
Path: dnsRecord.Spec.HealthCheck.Path,
Protocol: dnsRecord.Spec.HealthCheck.Protocol,
Interval: dnsRecord.Spec.HealthCheck.Interval,
AdditionalHeadersRef: dnsRecord.Spec.HealthCheck.AdditionalHeadersRef,
FailureThreshold: dnsRecord.Spec.HealthCheck.FailureThreshold,
AllowInsecureCertificate: allowInsecureCerts,
},
})
}
return probes
}

// idForEndpoint returns a unique identifier for an endpoint
func idForEndpoint(dnsRecord *v1alpha1.DNSRecord, endpoint *externaldns.Endpoint, address string) (string, error) {
hash := md5.New()
if _, err := io.WriteString(hash, fmt.Sprintf("%s/%s@%s:%s-%v", dnsRecord.Name, endpoint.SetIdentifier, endpoint.DNSName, address, dnsRecord.Generation)); err != nil {
return "", fmt.Errorf("unexpected error creating ID for endpoint %s", endpoint.SetIdentifier)
// BuildOwnerLabelValue ensures label value does not exceed the 63 char limit
// It adds namespace and name of the record,
// if the resulting string longer than 63 chars it attempts the following in
// a stated order and uses the first solution that produces value of less than 63 characters:
//
// 1. Trims namespace part to get under the limit.
// Result: "short-namespace_hostname"
//
// 2. Uses the first two subdomains of host instead of the name (e.g. will use "pat.the" from "pat.the.cat.com").
// Result: "original-namespace_pat.the"
//
// 3. Uses GetUIDHash() of v1alpha1.DNSRecord struct
// Result: "UIDHash"
func BuildOwnerLabelValue(record *v1alpha1.DNSRecord) string {
value := fmt.Sprintf("%s_%s", record.Namespace, record.Name)

// using the name of the dns record (hostname) and namespace is likely to exceed a 63 char limit
if len(value) > 63 {
// determine how much we need to remove
overshoot := len(value) - 63

// if we can fix it by trimming NS
if len(record.Namespace) > overshoot {
value = fmt.Sprintf("%s_%s", record.Namespace[:len(record.Namespace)-overshoot], record.Name)
} else {
// trimming namespace is not an option - too long hostname
// the name of the probe will be fine - it has a limit of 253
shortHost := strings.Join(strings.Split(record.Name, ".")[:3], ".")

// if this the case we can reduce just host
if len(record.Name)-len(shortHost) > overshoot {
value = fmt.Sprintf("%s_%s", record.Namespace, shortHost)
} else {
// we can't deal with it just by shortening one of them
// default to UID of record
value = record.GetUIDHash()
}
}
}
return fmt.Sprintf("%x", hash.Sum(nil)), nil
return value
}
Loading

0 comments on commit 766495a

Please sign in to comment.