From 0c4acf9fba59668463c200c31690ee309c4347b0 Mon Sep 17 00:00:00 2001 From: Maskym Vavilov Date: Wed, 27 Mar 2024 12:28:41 +0000 Subject: [PATCH] GH-36 dns record lifecycle --- api/v1alpha1/dnsrecord_types.go | 6 ++ cmd/main.go | 16 ++- internal/common/conditions/conditions.go | 37 +++++++ internal/controller/dnsrecord_controller.go | 109 +++++++++++++------- 4 files changed, 130 insertions(+), 38 deletions(-) diff --git a/api/v1alpha1/dnsrecord_types.go b/api/v1alpha1/dnsrecord_types.go index 63228984..118c8d72 100644 --- a/api/v1alpha1/dnsrecord_types.go +++ b/api/v1alpha1/dnsrecord_types.go @@ -63,6 +63,12 @@ type DNSRecordStatus struct { // +optional ObservedGeneration int64 `json:"observedGeneration,omitempty"` + // + QueuedAt time.Time `json:"queuedAt,omitempty"` + QueuedFor time.Time `json:"queuedFor,omitempty"` + ValidFor time.Duration `json:"validFor,omitempty"` + WriteCounter int64 `json:"failCounter,omitempty"` + // endpoints are the last endpoints that were successfully published by the provider // // Provides a simple mechanism to store the current provider records in order to diff --git a/cmd/main.go b/cmd/main.go index 208ed3bf..175a33c9 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -43,6 +43,11 @@ var ( setupLog = ctrl.Log.WithName("setup") ) +const ( + RequeueDuration = "15m" + ValidityDuration = "14m" +) + func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) @@ -54,11 +59,20 @@ func main() { var metricsAddr string var enableLeaderElection bool var probeAddr string + var requeueTime string + var validFor string + // TODO use duration type vars flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.BoolVar(&enableLeaderElection, "leader-elect", false, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") + flag.StringVar(&requeueTime, "requeue-time", RequeueDuration, + "Duration for the validation reconciliation loop. "+ + "Controls how ofter record is reconciled") + flag.StringVar(&validFor, "valid-for", ValidityDuration, + "Duration when the record is considered to hold valid information"+ + "Controls if we commit to the full reconcile loop") opts := zap.Options{ Development: true, } @@ -94,7 +108,7 @@ func main() { Client: mgr.GetClient(), Scheme: mgr.GetScheme(), ProviderFactory: providerFactory, - }).SetupWithManager(mgr); err != nil { + }).SetupWithManager(mgr, requeueTime, validFor); err != nil { setupLog.Error(err, "unable to create controller", "controller", "DNSRecord") os.Exit(1) } diff --git a/internal/common/conditions/conditions.go b/internal/common/conditions/conditions.go index 2a0b11d2..58107ac0 100644 --- a/internal/common/conditions/conditions.go +++ b/internal/common/conditions/conditions.go @@ -3,4 +3,41 @@ package conditions type ConditionType string type ConditionReason string +// TODO move to the API const ConditionTypeReady ConditionType = "Ready" + +/* +when queued for - time of the next full reconcile / repeat reconcile +fail counter +time when queued for the reconcile +expiration time for validity of data + +11:00 i've queued +11:15 when to reconcile + + +scenarios: +full reconcile +hit when now > queued + valid +generation changes + +short one +when now < queued + valid + + + +fail counter +if gen has not changed and i need to write it is a fail + + + + + +reset counter when gen changes +when validation loop succeeds + + + + + +*/ diff --git a/internal/controller/dnsrecord_controller.go b/internal/controller/dnsrecord_controller.go index edc00af6..8a614032 100644 --- a/internal/controller/dnsrecord_controller.go +++ b/internal/controller/dnsrecord_controller.go @@ -18,8 +18,10 @@ package controller import ( "context" + "errors" "fmt" "strings" + "time" "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -45,7 +47,13 @@ const ( DNSRecordFinalizer = "kuadrant.io/dns-record" ) -var Clock clock.Clock = clock.RealClock{} +var ( + defaultRequeueTime time.Duration + validationRequeueTime = time.Second * 5 + validFor time.Duration + reconcileStart = time.Time{} + Clock clock.Clock = clock.RealClock{} +) // DNSRecordReconciler reconciles a DNSRecord object type DNSRecordReconciler struct { @@ -61,19 +69,21 @@ type DNSRecordReconciler struct { func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := log.FromContext(ctx) + reconcileStart = time.Now() previous := &v1alpha1.DNSRecord{} err := r.Client.Get(ctx, client.ObjectKey{Namespace: req.Namespace, Name: req.Name}, previous) if err != nil { - if err := client.IgnoreNotFound(err); err == nil { + if err = client.IgnoreNotFound(err); err == nil { return ctrl.Result{}, nil } else { return ctrl.Result{}, err } } dnsRecord := previous.DeepCopy() + dnsRecord.Status.ValidFor = validFor if dnsRecord.DeletionTimestamp != nil && !dnsRecord.DeletionTimestamp.IsZero() { - if err := r.deleteRecord(ctx, dnsRecord); err != nil { + if err = r.deleteRecord(ctx, dnsRecord); err != nil { logger.Error(err, "Failed to delete DNSRecord") return ctrl.Result{}, err } @@ -89,44 +99,43 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } if !controllerutil.ContainsFinalizer(dnsRecord, DNSRecordFinalizer) { + dnsRecord.Status.QueuedAt = reconcileStart + dnsRecord.Status.QueuedFor = reconcileStart.Add(validationRequeueTime) logger.Info("Adding Finalizer", "name", DNSRecordFinalizer) controllerutil.AddFinalizer(dnsRecord, DNSRecordFinalizer) err = r.Update(ctx, dnsRecord) if err != nil { return ctrl.Result{}, err } - return ctrl.Result{Requeue: true}, nil + return ctrl.Result{RequeueAfter: validationRequeueTime}, nil } var reason, message string - status := metav1.ConditionTrue - reason = "ProviderSuccess" - message = "Provider ensured the dns record" err = dnsRecord.Validate() if err != nil { - status = metav1.ConditionFalse reason = "ValidationError" message = fmt.Sprintf("validation of DNSRecord failed: %v", err) - setDNSRecordCondition(dnsRecord, string(conditions.ConditionTypeReady), status, reason, message) - return r.updateStatus(ctx, previous, dnsRecord) + setDNSRecordCondition(dnsRecord, string(conditions.ConditionTypeReady), metav1.ConditionFalse, reason, message) + return r.updateStatus(ctx, previous, dnsRecord, 0) } + // Publish the record - err = r.publishRecord(ctx, dnsRecord) + requeueAfter, err := r.publishRecord(ctx, dnsRecord) if err != nil { - status = metav1.ConditionFalse reason = "ProviderError" message = fmt.Sprintf("The DNS provider failed to ensure the record: %v", provider.SanitizeError(err)) - setDNSRecordCondition(dnsRecord, string(conditions.ConditionTypeReady), status, reason, message) - return r.updateStatus(ctx, previous, dnsRecord) + setDNSRecordCondition(dnsRecord, string(conditions.ConditionTypeReady), metav1.ConditionFalse, reason, message) + return r.updateStatus(ctx, previous, dnsRecord, 0) } // success - setDNSRecordCondition(dnsRecord, string(conditions.ConditionTypeReady), status, reason, message) dnsRecord.Status.ObservedGeneration = dnsRecord.Generation dnsRecord.Status.Endpoints = dnsRecord.Spec.Endpoints - return r.updateStatus(ctx, previous, dnsRecord) + return r.updateStatus(ctx, previous, dnsRecord, requeueAfter) } -func (r *DNSRecordReconciler) updateStatus(ctx context.Context, previous, current *v1alpha1.DNSRecord) (reconcile.Result, error) { +func (r *DNSRecordReconciler) updateStatus(ctx context.Context, previous, current *v1alpha1.DNSRecord, requeueTime time.Duration) (reconcile.Result, error) { + current.Status.QueuedFor = reconcileStart.Add(requeueTime) + if !equality.Semantic.DeepEqual(previous.Status, current.Status) { updateError := r.Status().Update(ctx, current) if apierrors.IsConflict(updateError) { @@ -134,11 +143,19 @@ func (r *DNSRecordReconciler) updateStatus(ctx context.Context, previous, curren } return ctrl.Result{}, updateError } - return ctrl.Result{}, nil + return ctrl.Result{RequeueAfter: requeueTime}, nil } // SetupWithManager sets up the controller with the Manager. -func (r *DNSRecordReconciler) SetupWithManager(mgr ctrl.Manager) error { +func (r *DNSRecordReconciler) SetupWithManager(mgr ctrl.Manager, requeueIn, valid string) error { + var err error + if defaultRequeueTime, err = time.ParseDuration(requeueIn); err != nil { + return errors.New("error parsing requeue-time time flag. Consider using appropriate format i.e. \"10m\" or \"600s\"") + } + if validFor, err = time.ParseDuration(valid); err != nil { + return errors.New("error parsing valid-for time flag. Consider using appropriate format i.e. \"10m\" or \"600s\"") + } + return ctrl.NewControllerManagedBy(mgr). For(&v1alpha1.DNSRecord{}). Complete(r) @@ -166,7 +183,7 @@ func (r *DNSRecordReconciler) deleteRecord(ctx context.Context, dnsRecord *v1alp return fmt.Errorf("the managed zone is not in a ready state : %s", managedZone.Name) } - err = r.applyChanges(ctx, dnsRecord, managedZone, true) + _, err = r.applyChanges(ctx, dnsRecord, managedZone, true) if err != nil { if strings.Contains(err.Error(), "was not found") || strings.Contains(err.Error(), "notFound") { logger.Info("Record not found in managed zone, continuing", "dnsRecord", dnsRecord.Name, "managedZone", managedZone.Name) @@ -184,7 +201,7 @@ func (r *DNSRecordReconciler) deleteRecord(ctx context.Context, dnsRecord *v1alp // publishRecord publishes record(s) to the DNSPRovider(i.e. route53) configured by the ManagedZone assigned to this // DNSRecord (dnsRecord.Status.ParentManagedZone). -func (r *DNSRecordReconciler) publishRecord(ctx context.Context, dnsRecord *v1alpha1.DNSRecord) error { +func (r *DNSRecordReconciler) publishRecord(ctx context.Context, dnsRecord *v1alpha1.DNSRecord) (time.Duration, error) { logger := log.FromContext(ctx) managedZone := &v1alpha1.ManagedZone{ ObjectMeta: metav1.ObjectMeta{ @@ -194,26 +211,34 @@ func (r *DNSRecordReconciler) publishRecord(ctx context.Context, dnsRecord *v1al } err := r.Get(ctx, client.ObjectKeyFromObject(managedZone), managedZone, &client.GetOptions{}) if err != nil { - return err + return 0, err } managedZoneReady := meta.IsStatusConditionTrue(managedZone.Status.Conditions, "Ready") if !managedZoneReady { - return fmt.Errorf("the managed zone is not in a ready state : %s", managedZone.Name) + return 0, fmt.Errorf("the managed zone is not in a ready state : %s", managedZone.Name) } - if dnsRecord.Generation == dnsRecord.Status.ObservedGeneration { + // cut off here for the short reconcile loop + if !generationChanged(dnsRecord) && reconcileStart.Before(dnsRecord.Status.QueuedAt.Add(validFor)) { logger.V(3).Info("Skipping managed zone to which the DNS dnsRecord is already published", "dnsRecord", dnsRecord.Name, "managedZone", managedZone.Name) - return nil + return validFor, nil + } + if generationChanged(dnsRecord) { + dnsRecord.Status.WriteCounter = 0 } - err = r.applyChanges(ctx, dnsRecord, managedZone, false) + requeueAfter, err := r.applyChanges(ctx, dnsRecord, managedZone, false) if err != nil { - return err + return 0, err } logger.Info("Published DNSRecord to manage zone", "dnsRecord", dnsRecord.Name, "managedZone", managedZone.Name) - return nil + return requeueAfter, nil +} + +func generationChanged(record *v1alpha1.DNSRecord) bool { + return record.Generation != record.Status.ObservedGeneration } // setDNSRecordCondition adds or updates a given condition in the DNSRecord status.. @@ -228,7 +253,7 @@ func setDNSRecordCondition(dnsRecord *v1alpha1.DNSRecord, conditionType string, meta.SetStatusCondition(&dnsRecord.Status.Conditions, cond) } -func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alpha1.DNSRecord, managedZone *v1alpha1.ManagedZone, isDelete bool) error { +func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alpha1.DNSRecord, managedZone *v1alpha1.ManagedZone, isDelete bool) (time.Duration, error) { logger := log.FromContext(ctx) filterDomain, _ := strings.CutPrefix(managedZone.Spec.DomainName, v1alpha1.WildcardPrefix) if dnsRecord.Spec.RootHost != nil { @@ -244,7 +269,7 @@ func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alp logger.V(3).Info("applyChanges", "zone", managedZone.Spec.DomainName, "rootDomainFilter", rootDomainFilter, "providerConfig", providerConfig) dnsProvider, err := r.ProviderFactory.ProviderFor(ctx, managedZone, providerConfig) if err != nil { - return err + return 0, err } managedDNSRecordTypes := []string{externaldnsendpoint.RecordTypeA, externaldnsendpoint.RecordTypeAAAA, externaldnsendpoint.RecordTypeCNAME} @@ -252,13 +277,13 @@ func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alp registry, err := dnsRecord.GetRegistry(dnsProvider, managedDNSRecordTypes, excludeDNSRecordTypes) if err != nil { - return err + return 0, err } policyID := "sync" policy, exists := externaldnsplan.Policies[policyID] if !exists { - return fmt.Errorf("unknown policy: %s", policyID) + return 0, fmt.Errorf("unknown policy: %s", policyID) } //If we are deleting set the expected endpoints to an empty array @@ -269,19 +294,19 @@ func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alp //zoneEndpoints = Records in the current dns provider zone zoneEndpoints, err := registry.Records(ctx) if err != nil { - return err + return 0, err } //specEndpoints = Records that this DNSRecord expects to exist specEndpoints, err := registry.AdjustEndpoints(dnsRecord.Spec.Endpoints) if err != nil { - return fmt.Errorf("adjusting specEndpoints: %w", err) + return 0, fmt.Errorf("adjusting specEndpoints: %w", err) } //statusEndpoints = Records that were created/updated by this DNSRecord last statusEndpoints, err := registry.AdjustEndpoints(dnsRecord.Status.Endpoints) if err != nil { - return fmt.Errorf("adjusting statusEndpoints: %w", err) + return 0, fmt.Errorf("adjusting statusEndpoints: %w", err) } //Note: All endpoint lists should be in the same provider specific format at this point @@ -302,15 +327,25 @@ func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alp plan = plan.Calculate() + dnsRecord.Status.ValidFor = defaultRequeueTime if plan.Changes.HasChanges() { + // generation has not changed but there are changes. + // implies that they were overridden - bump write counter + if !generationChanged(dnsRecord) { + dnsRecord.Status.WriteCounter++ + } + dnsRecord.Status.ValidFor = validationRequeueTime + setDNSRecordCondition(dnsRecord, string(conditions.ConditionTypeReady), metav1.ConditionFalse, "Awaiting validation", "Awaiting validation") logger.Info("Applying changes") err = registry.ApplyChanges(ctx, plan.Changes) if err != nil { - return err + return dnsRecord.Status.ValidFor, err } } else { logger.Info("All records are already up to date") + dnsRecord.Status.WriteCounter = 0 + setDNSRecordCondition(dnsRecord, string(conditions.ConditionTypeReady), metav1.ConditionTrue, "ProviderSuccess", "Provider ensured the dns record") } - return nil + return dnsRecord.Status.ValidFor, nil }