Skip to content

Commit

Permalink
Merge pull request #8 from adrianhernandez-stratio/0.1/EOS-5172
Browse files Browse the repository at this point in the history
[EOS-5172] Capsule error 500 with more than one replica
  • Loading branch information
Adrián Hernández authored Sep 15, 2021
2 parents 0a4915f + e81c86f commit 2280556
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## 0.1.0 (August 19, 2021)

* [EOS-5172] Capsule error 500 when has deployed with more than one replica
* Adapt to Stratio CICD flow
* Add system-user-group as exception of capsule-user-group
* Using v0.0.5 tag from upstream as base
40 changes: 36 additions & 4 deletions controllers/secret/tls.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ import (
"context"
"crypto/x509"
"encoding/pem"
"syscall"
"fmt"
"os"
"time"

"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
Expand Down Expand Up @@ -82,7 +84,7 @@ func (r TLSReconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctr
r.Log.Info("Missing Capsule TLS certificate")
rq = 6 * 30 * 24 * time.Hour

opts := cert.NewCertOpts(time.Now().Add(rq), "capsule-webhook-service.capsule-system.svc")
opts := cert.NewCertOpts(time.Now().Add(rq), fmt.Sprintf("capsule-webhook-service.%s.svc", r.Namespace))
var crt, key *bytes.Buffer
crt, key, err = ca.GenerateCertificate(opts)
if err != nil {
Expand Down Expand Up @@ -124,8 +126,38 @@ func (r TLSReconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctr
}

if instance.Name == tlsSecretName && res == controllerutil.OperationResultUpdated {
r.Log.Info("Capsule TLS certificates has been updated, we need to restart the Controller")
_ = syscall.Kill(syscall.Getpid(), syscall.SIGINT)
r.Log.Info("Capsule TLS certificates has been updated, Controller pods must be restarted to load new certificate")

hostname, _ := os.Hostname()
leaderPod := &corev1.Pod{}
if err = r.Client.Get(ctx, types.NamespacedName{Namespace: os.Getenv("NAMESPACE"), Name: hostname}, leaderPod); err != nil {
r.Log.Error(err, "cannot retrieve the leader Pod, probably running in out of the cluster mode")

return reconcile.Result{}, nil
}

podList := &corev1.PodList{}
if err = r.Client.List(ctx, podList, client.MatchingLabels(leaderPod.ObjectMeta.Labels)); err != nil {
r.Log.Error(err, "cannot retrieve list of Capsule pods requiring restart upon TLS update")

return reconcile.Result{}, nil
}

for _, p := range podList.Items {
nonLeaderPod := p
// Skipping this Pod, must be deleted at the end
if nonLeaderPod.GetName() == leaderPod.GetName() {
continue
}

if err = r.Client.Delete(ctx, &nonLeaderPod); err != nil {
r.Log.Error(err, "cannot delete the non-leader Pod due to TLS update")
}
}

if err = r.Client.Delete(ctx, leaderPod); err != nil {
r.Log.Error(err, "cannot delete the leader Pod due to TLS update")
}
}

r.Log.Info("Reconciliation completed, processing back in " + rq.String())
Expand Down

0 comments on commit 2280556

Please sign in to comment.