Skip to content

Commit

Permalink
report healtchecks into DNSRecord status
Browse files Browse the repository at this point in the history
Signed-off-by: Maskym Vavilov <[email protected]>
  • Loading branch information
maksymvavilov committed Oct 24, 2024
1 parent 3053750 commit bf50c50
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 37 deletions.
7 changes: 7 additions & 0 deletions api/v1alpha1/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,10 @@ type ConditionType string
type ConditionReason string

const ConditionTypeReady ConditionType = "Ready"
const ConditionReasonProviderSuccess ConditionReason = "ProviderSuccess"
const ConditionReasonAwaitingValidation ConditionReason = "AwaitingValidation"

const ConditionTypeHealthy ConditionType = "Healthy"
const ConditionReasonHealthy ConditionReason = "AllChecksPassed"
const ConditionReasonPartiallyHealthy ConditionReason = "SomeChecksPassed"
const ConditionReasonUnhealthy ConditionReason = "HealthChecksFailed"
88 changes: 59 additions & 29 deletions internal/controller/dnsrecord_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
reason := "DNSProviderError"
message := fmt.Sprintf("The dns provider could not be loaded: %v", err)
setDNSRecordCondition(dnsRecord, string(v1alpha1.ConditionTypeReady), metav1.ConditionFalse, reason, message)
return r.updateStatus(ctx, previous, dnsRecord, false, err)
return r.updateStatus(ctx, previous, dnsRecord, false, false, err)
}

if probesEnabled {
Expand Down Expand Up @@ -174,7 +174,7 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
logger.Error(err, "Failed to validate record")
setDNSRecordCondition(dnsRecord, string(v1alpha1.ConditionTypeReady), metav1.ConditionFalse,
"ValidationError", fmt.Sprintf("validation of DNSRecord failed: %v", err))
return r.updateStatus(ctx, previous, dnsRecord, false, err)
return r.updateStatus(ctx, previous, dnsRecord, false, false, err)
}

//Ensure an Owner ID has been assigned to the record (OwnerID set in the status)
Expand All @@ -197,14 +197,14 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
if err != nil {
setDNSRecordCondition(dnsRecord, string(v1alpha1.ConditionTypeReady), metav1.ConditionFalse,
"DNSProviderError", fmt.Sprintf("The dns provider could not be loaded: %v", err))
return r.updateStatus(ctx, previous, dnsRecord, false, err)
return r.updateStatus(ctx, previous, dnsRecord, false, false, err)
}

z, err := p.DNSZoneForHost(ctx, dnsRecord.Spec.RootHost)
if err != nil {
setDNSRecordCondition(dnsRecord, string(v1alpha1.ConditionTypeReady), metav1.ConditionFalse,
"DNSProviderError", fmt.Sprintf("Unable to find suitable zone in provider: %v", provider.SanitizeError(err)))
return r.updateStatus(ctx, previous, dnsRecord, false, err)
return r.updateStatus(ctx, previous, dnsRecord, false, false, err)
}

//Add zone id/domainName to status
Expand All @@ -220,16 +220,16 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
if err != nil {
setDNSRecordCondition(dnsRecord, string(v1alpha1.ConditionTypeReady), metav1.ConditionFalse,
"DNSProviderError", fmt.Sprintf("The dns provider could not be loaded: %v", err))
return r.updateStatus(ctx, previous, dnsRecord, false, err)
return r.updateStatus(ctx, previous, dnsRecord, false, false, err)
}

// Publish the record
hadChanges, err := r.publishRecord(ctx, dnsRecord, dnsProvider)
hadChanges, hasHealthyProbes, err := r.publishRecord(ctx, dnsRecord, dnsProvider)
if err != nil {
logger.Error(err, "Failed to publish record")
setDNSRecordCondition(dnsRecord, string(v1alpha1.ConditionTypeReady), metav1.ConditionFalse,
"ProviderError", fmt.Sprintf("The DNS provider failed to ensure the record: %v", provider.SanitizeError(err)))
return r.updateStatus(ctx, previous, dnsRecord, hadChanges, err)
return r.updateStatus(ctx, previous, dnsRecord, hadChanges, hasHealthyProbes, err)
}

if probesEnabled {
Expand All @@ -238,7 +238,7 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}
}

return r.updateStatus(ctx, previous, dnsRecord, hadChanges, nil)
return r.updateStatus(ctx, previous, dnsRecord, hadChanges, hasHealthyProbes, nil)
}

// setLogger Updates the given Logger with record/zone metadata from the given DNSRecord.
Expand All @@ -252,7 +252,7 @@ func (r *DNSRecordReconciler) setLogger(ctx context.Context, logger logr.Logger,
return log.IntoContext(ctx, logger), logger
}

func (r *DNSRecordReconciler) updateStatus(ctx context.Context, previous, current *v1alpha1.DNSRecord, hadChanges bool, specErr error) (reconcile.Result, error) {
func (r *DNSRecordReconciler) updateStatus(ctx context.Context, previous, current *v1alpha1.DNSRecord, hadChanges, hasHealthyProbes bool, specErr error) (reconcile.Result, error) {
var requeueTime time.Duration
logger := log.FromContext(ctx)

Expand Down Expand Up @@ -283,7 +283,6 @@ func (r *DNSRecordReconciler) updateStatus(ctx context.Context, previous, curren
logger.V(1).Info("Changes needed on the same generation of record")
}
requeueTime = randomizedValidationRequeue
setDNSRecordCondition(current, string(v1alpha1.ConditionTypeReady), metav1.ConditionFalse, "AwaitingValidation", "Awaiting validation")
} else {
logger.Info("All records are already up to date")
// reset the valid for from randomized value to a fixed value once validation succeeds
Expand All @@ -293,9 +292,10 @@ func (r *DNSRecordReconciler) updateStatus(ctx context.Context, previous, curren
// uses current.Status.ValidFor as the last requeue duration. Double it.
requeueTime = exponentialRequeueTime(current.Status.ValidFor)
}
setDNSRecordCondition(current, string(v1alpha1.ConditionTypeReady), metav1.ConditionTrue, "ProviderSuccess", "Provider ensured the dns record")
}

setStatusConditions(current, hadChanges, hasHealthyProbes)

// valid for is always a requeue time
current.Status.ValidFor = requeueTime.String()

Expand Down Expand Up @@ -365,7 +365,7 @@ func (r *DNSRecordReconciler) SetupWithManager(mgr ctrl.Manager, maxRequeue, val
func (r *DNSRecordReconciler) deleteRecord(ctx context.Context, dnsRecord *v1alpha1.DNSRecord, dnsProvider provider.Provider) (bool, error) {
logger := log.FromContext(ctx)

hadChanges, err := r.applyChanges(ctx, dnsRecord, dnsProvider, true)
hadChanges, _, err := r.applyChanges(ctx, dnsRecord, dnsProvider, true)
if err != nil {
if strings.Contains(err.Error(), "was not found") || strings.Contains(err.Error(), "notFound") {
logger.Info("Record not found in zone, continuing")
Expand All @@ -382,21 +382,22 @@ func (r *DNSRecordReconciler) deleteRecord(ctx context.Context, dnsRecord *v1alp
}

// publishRecord publishes record(s) to the DNSPRovider(i.e. route53) zone (dnsRecord.Status.ZoneID).
func (r *DNSRecordReconciler) publishRecord(ctx context.Context, dnsRecord *v1alpha1.DNSRecord, dnsProvider provider.Provider) (bool, error) {
// returns if it had changes, if record is healthy and an error. If had no changes - the healthy bool can be ignored
func (r *DNSRecordReconciler) publishRecord(ctx context.Context, dnsRecord *v1alpha1.DNSRecord, dnsProvider provider.Provider) (bool, bool, error) {
logger := log.FromContext(ctx)

if prematurely, _ := recordReceivedPrematurely(dnsRecord); prematurely {
logger.V(1).Info("Skipping DNSRecord - is still valid")
return false, nil
return false, true, nil
}

hadChanges, err := r.applyChanges(ctx, dnsRecord, dnsProvider, false)
hadChanges, isHealthy, err := r.applyChanges(ctx, dnsRecord, dnsProvider, false)
if err != nil {
return hadChanges, err
return hadChanges, isHealthy, err
}
logger.Info("Published DNSRecord to zone")

return hadChanges, nil
return hadChanges, isHealthy, nil
}

// recordReceivedPrematurely returns true if current reconciliation loop started before
Expand Down Expand Up @@ -432,6 +433,35 @@ func exponentialRequeueTime(lastRequeueTime string) time.Duration {
return newRequeue
}

// setStatusConditions sets healthy and ready condition on given DNSRecord
func setStatusConditions(record *v1alpha1.DNSRecord, hadChanges, hasHealthyProbes bool) {
// we get here only when spec err is nil - can trust hadChanges bool

// give precedence to AwaitingValidation condition
if hadChanges {
setDNSRecordCondition(record, string(v1alpha1.ConditionTypeReady), metav1.ConditionFalse, string(v1alpha1.ConditionReasonAwaitingValidation), "Awaiting validation")
return
}

// at least one of the probes is healthy
if hasHealthyProbes {
if len(record.Spec.Endpoints) == len(record.Status.Endpoints) {
// all probes are healthy
setDNSRecordCondition(record, string(v1alpha1.ConditionTypeHealthy), metav1.ConditionTrue, string(v1alpha1.ConditionReasonHealthy), "All healthchecks succeeded")
} else {
// at least one of the probes is healthy
setDNSRecordCondition(record, string(v1alpha1.ConditionTypeHealthy), metav1.ConditionFalse, string(v1alpha1.ConditionReasonPartiallyHealthy), "At least one healthcheck succeeded")
}
// in any case - hostname will resolve so the record is ready
setDNSRecordCondition(record, string(v1alpha1.ConditionTypeReady), metav1.ConditionTrue, string(v1alpha1.ConditionReasonProviderSuccess), "Provider ensured the dns record")
return
}
// none of the probes is healthy - mirror the healthy condition
setDNSRecordCondition(record, string(v1alpha1.ConditionTypeHealthy), metav1.ConditionFalse, string(v1alpha1.ConditionReasonUnhealthy), "None of the healthchecks succeeded")
setDNSRecordCondition(record, string(v1alpha1.ConditionTypeReady), metav1.ConditionFalse, string(v1alpha1.ConditionReasonUnhealthy), "None of the healthchecks succeeded")

}

// setDNSRecordCondition adds or updates a given condition in the DNSRecord status.
func setDNSRecordCondition(dnsRecord *v1alpha1.DNSRecord, conditionType string, status metav1.ConditionStatus, reason, message string) {
cond := metav1.Condition{
Expand Down Expand Up @@ -469,7 +499,7 @@ func (r *DNSRecordReconciler) getDNSProvider(ctx context.Context, dnsRecord *v1a

// applyChanges creates the Plan and applies it to the registry. Returns true only if the Plan had no errors and there were changes to apply.
// The error is nil only if the changes were successfully applied or there were no changes to be made.
func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alpha1.DNSRecord, dnsProvider provider.Provider, isDelete bool) (bool, error) {
func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alpha1.DNSRecord, dnsProvider provider.Provider, isDelete bool) (bool, bool, error) {
logger := log.FromContext(ctx)
rootDomainName := dnsRecord.Spec.RootHost
zoneDomainFilter := externaldnsendpoint.NewDomainFilter([]string{dnsRecord.Status.ZoneDomainName})
Expand All @@ -480,13 +510,13 @@ func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alp
dnsRecord.Status.OwnerID, txtRegistryCacheInterval, txtRegistryWildcardReplacement, managedDNSRecordTypes,
excludeDNSRecordTypes, txtRegistryEncryptEnabled, []byte(txtRegistryEncryptAESKey))
if err != nil {
return false, err
return false, false, err
}

policyID := "sync"
policy, exists := externaldnsplan.Policies[policyID]
if !exists {
return false, fmt.Errorf("unknown policy: %s", policyID)
return false, false, fmt.Errorf("unknown policy: %s", policyID)
}

//If we are deleting set the expected endpoints to an empty array
Expand All @@ -497,25 +527,25 @@ func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alp
//zoneEndpoints = Records in the current dns provider zone
zoneEndpoints, err := registry.Records(ctx)
if err != nil {
return false, err
return false, false, err
}

//specEndpoints = Records that this DNSRecord expects to exist
specEndpoints, err := registry.AdjustEndpoints(dnsRecord.Spec.Endpoints)
if err != nil {
return false, fmt.Errorf("adjusting specEndpoints: %w", err)
return false, false, fmt.Errorf("adjusting specEndpoints: %w", err)
}

//healthySpecEndpoints = Records that this DNSRecord expects to exist, that do not have matching unhealthy probes
healthySpecEndpoints, _, err := r.removeUnhealthyEndpoints(ctx, specEndpoints, dnsRecord)
// healthySpecEndpoints = Records that this DNSRecord expects to exist, that do not have matching unhealthy probes
healthySpecEndpoints, hasHealthyEPs, err := r.removeUnhealthyEndpoints(ctx, specEndpoints, dnsRecord)
if err != nil {
return false, fmt.Errorf("removing unhealthy specEndpoints: %w", err)
return false, false, fmt.Errorf("removing unhealthy specEndpoints: %w", err)
}

//statusEndpoints = Records that were created/updated by this DNSRecord last
statusEndpoints, err := registry.AdjustEndpoints(dnsRecord.Status.Endpoints)
if err != nil {
return false, fmt.Errorf("adjusting statusEndpoints: %w", err)
return false, false, fmt.Errorf("adjusting statusEndpoints: %w", err)
}

// add related endpoints to the record
Expand All @@ -534,16 +564,16 @@ func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alp

plan = plan.Calculate()
if err = plan.Error(); err != nil {
return false, err
return false, hasHealthyEPs, err
}
dnsRecord.Status.DomainOwners = plan.Owners
dnsRecord.Status.Endpoints = healthySpecEndpoints
if plan.Changes.HasChanges() {
logger.Info("Applying changes")
err = registry.ApplyChanges(ctx, plan.Changes)
return true, err
return true, hasHealthyEPs, err
}
return false, nil
return false, hasHealthyEPs, nil
}

// filterEndpoints takes a list of zoneEndpoints and removes from it all endpoints
Expand Down
41 changes: 38 additions & 3 deletions internal/controller/dnsrecord_healthchecks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ var _ = Describe("DNSRecordReconciler_HealthChecks", func() {
ContainElement(MatchFields(IgnoreExtras, Fields{
"Type": Equal(string(v1alpha1.ConditionTypeReady)),
"Status": Equal(metav1.ConditionTrue),
"Reason": Equal("ProviderSuccess"),
"Reason": Equal(string(v1alpha1.ConditionReasonProviderSuccess)),
"Message": Equal("Provider ensured the dns record"),
})),
)
Expand Down Expand Up @@ -183,6 +183,18 @@ var _ = Describe("DNSRecordReconciler_HealthChecks", func() {
It("Should remove unhealthy endpoints", func() {
//Create default test dnsrecord
Expect(k8sClient.Create(ctx, dnsRecord)).To(Succeed())
Eventually(func(g Gomega) {
err := k8sClient.Get(ctx, client.ObjectKeyFromObject(dnsRecord), dnsRecord)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(dnsRecord.Status.Conditions).To(
ContainElement(MatchFields(IgnoreExtras, Fields{
"Type": Equal(string(v1alpha1.ConditionTypeHealthy)),
"Status": Equal(metav1.ConditionTrue),
"Reason": Equal(string(v1alpha1.ConditionReasonHealthy)),
"Message": Equal("All healthchecks succeeded"),
})),
)
}, TestTimeoutMedium, time.Second).Should(Succeed())

By("Making one of the probes unhealthy")
Eventually(func(g Gomega) {
Expand Down Expand Up @@ -210,7 +222,7 @@ var _ = Describe("DNSRecordReconciler_HealthChecks", func() {
g.Expect(updated).To(BeTrue())
}, TestTimeoutMedium, time.Second).Should(Succeed())

By("Ensure unhealthy endpoints are gone")
By("Ensure unhealthy endpoints are gone and status is updated")
Eventually(func(g Gomega) {
Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(dnsRecord), dnsRecord)).To(Succeed())

Expand All @@ -233,6 +245,14 @@ var _ = Describe("DNSRecordReconciler_HealthChecks", func() {
"ProviderSpecific": Equal(externaldnsendpoint.ProviderSpecific{{Name: "geo-code", Value: "GEO-EU"}}),
})),
))
g.Expect(dnsRecord.Status.Conditions).To(
ContainElement(MatchFields(IgnoreExtras, Fields{
"Type": Equal(string(v1alpha1.ConditionTypeHealthy)),
"Status": Equal(metav1.ConditionFalse),
"Reason": Equal(string(v1alpha1.ConditionReasonPartiallyHealthy)),
"Message": Equal("At least one healthcheck succeeded"),
})),
)

}, TestTimeoutMedium, time.Second).Should(Succeed())

Expand Down Expand Up @@ -261,11 +281,26 @@ var _ = Describe("DNSRecordReconciler_HealthChecks", func() {
}, TestTimeoutMedium, time.Second).Should(Succeed())

// we don't remove EPs if this leads to empty EPs
By("Ensure endpoints are not changed ")
By("Ensure endpoints are not changed and status is updated")
Eventually(func(g Gomega) {
record := &v1alpha1.DNSRecord{}
Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(dnsRecord), record)).To(Succeed())
g.Expect(dnsRecord.Status.Endpoints).To(BeEquivalentTo(record.Status.Endpoints))

g.Expect(record.Status.Conditions).To(ContainElements(
MatchFields(IgnoreExtras, Fields{
"Type": Equal(string(v1alpha1.ConditionTypeHealthy)),
"Status": Equal(metav1.ConditionFalse),
"Reason": Equal(string(v1alpha1.ConditionReasonUnhealthy)),
"Message": Equal("None of the healthchecks succeeded"),
}),
MatchFields(IgnoreExtras, Fields{
"Type": Equal(string(v1alpha1.ConditionTypeReady)),
"Status": Equal(metav1.ConditionFalse),
"Reason": Equal(string(v1alpha1.ConditionReasonUnhealthy)),
"Message": Equal("None of the healthchecks succeeded"),
}),
))
}, TestTimeoutMedium, time.Second).Should(Succeed())
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@ metadata:
name: ${testID}
namespace: ${testNamespace}
spec:
healthCheck:
endpoint: "/health"
port: 80
protocol: "HTTPS"
failureThreshold: 3
endpoints:
- dnsName: 14byhk-2k52h1.klb.${testHostname}
recordTTL: 60
Expand Down

0 comments on commit bf50c50

Please sign in to comment.