From ebbe15ff96e8abfbec180c96bb13a9877cb68814 Mon Sep 17 00:00:00 2001 From: Robert Vasek Date: Thu, 28 Sep 2023 17:51:57 +0200 Subject: [PATCH] automount: shut down automount daemon with SIGKILL automount daemon unmounts the autofs root in /cvmfs upon receiving SIGTERM. This makes it impossible to reconnect the daemon to the mount later, so all consumer Pods will loose their mounts CVMFS, without the possibility of restoring them (unless these Pods are restarted too). The implication is that the nodeplugin is just being restarted, and will be needed again. SIGKILL is handled differently in automount, as this forces the daemon to skip the cleanup at exit, leaving the autofs mount behind and making it possible to reconnect to it later. We make a use of this, and unless the admin doesn't explicitly ask for cleanup with AUTOFS_TRY_CLEAN_AT_EXIT env var, no cleanup is done. Cherry-pick f1d7ee247cd6c46eb34ba406c6a8e680fd210467 (#122) --- cmd/automount-runner/main.go | 2 + docs/uninstalling.md | 20 ++++++++ internal/cvmfs/automount/automount.go | 69 ++++++++++++++++++++++++--- internal/cvmfs/env/env.go | 47 ++++++++++++++++++ 4 files changed, 132 insertions(+), 6 deletions(-) create mode 100644 docs/uninstalling.md create mode 100644 internal/cvmfs/env/env.go diff --git a/cmd/automount-runner/main.go b/cmd/automount-runner/main.go index 57e98b57..ae13122c 100644 --- a/cmd/automount-runner/main.go +++ b/cmd/automount-runner/main.go @@ -22,6 +22,7 @@ import ( "os" "github.com/cvmfs-contrib/cvmfs-csi/internal/cvmfs/automount" + "github.com/cvmfs-contrib/cvmfs-csi/internal/cvmfs/env" "github.com/cvmfs-contrib/cvmfs-csi/internal/log" cvmfsversion "github.com/cvmfs-contrib/cvmfs-csi/internal/version" @@ -54,6 +55,7 @@ func main() { log.Infof("automount-runner for CVMFS CSI plugin version %s", cvmfsversion.FullVersion()) log.Infof("Command line arguments %v", os.Args) + log.Infof("Environment variables %s", env.StringAutofsTryCleanAtExit()) err := automount.Init(&automount.Opts{ UnmountTimeoutSeconds: *unmountTimeoutSeconds, diff --git a/docs/uninstalling.md b/docs/uninstalling.md new file mode 100644 index 00000000..4f8af4d3 --- /dev/null +++ b/docs/uninstalling.md @@ -0,0 +1,20 @@ +# Uninstalling cvmfs-csi driver + +The nodeplugin Pods store various resources on the node hosts they are running on: +* autofs mount and the respective inner CVMFS mounts, +* CVMFS client cache. + +By default, the nodeplugin Pod leaves autofs and its respective inner mounts on the node +in `/var/cvmfs`. They may need to be unmounted recursively. To do that, you can set +`AUTOFS_TRY_CLEAN_AT_EXIT` environment variable to `true` in nodeplugin's DaemonSet and restart +the Pods. On the next exit, they will be unmounted. + + ``` + kubectl set env daemonset -l app=cvmfs-csi,component=nodeplugin AUTOFS_TRY_CLEAN_AT_EXIT=true + # Restarting nodeplugin Pods needs attention, as this may break existing mounts. + # They will be restored once the Pods come back up. + kubectl delete pods -l app=cvmfs-csi,component=nodeplugin + ``` + +The CVMFS client cache is stored by default in `/var/lib/cvmfs.csi.cern.ch/cache`. +This directory is not deleted automatically, and manual intervention is currently needed. diff --git a/internal/cvmfs/automount/automount.go b/internal/cvmfs/automount/automount.go index 6c428c30..a838885f 100644 --- a/internal/cvmfs/automount/automount.go +++ b/internal/cvmfs/automount/automount.go @@ -24,8 +24,10 @@ import ( goexec "os/exec" "os/signal" "path" + "sync/atomic" "syscall" + "github.com/cvmfs-contrib/cvmfs-csi/internal/cvmfs/env" "github.com/cvmfs-contrib/cvmfs-csi/internal/exec" "github.com/cvmfs-contrib/cvmfs-csi/internal/log" ) @@ -245,6 +247,19 @@ func RunBlocking() error { if log.LevelEnabled(log.LevelDebug) { args = append(args, "--verbose") + + // Log info about autofs mount in /cvmfs. + + isAutofs, err := IsAutofs("/cvmfs") + if err != nil { + log.Fatalf("Failed to stat /cvmfs: %v", err) + } + + if isAutofs { + log.Debugf("autofs already mounted in /cvmfs, automount daemon will reconnect...") + } else { + log.Debugf("autofs not mounted in /cvmfs, automount daemon will mount it now...") + } } if log.LevelEnabled(log.LevelTrace) { @@ -276,20 +291,62 @@ func RunBlocking() error { // Catch SIGTERM and SIGKILL and forward it to the automount process. - sigCh := make(chan os.Signal, 1) + autofsTryCleanAtExit := env.GetAutofsTryCleanAtExit() + + sigCh := make(chan os.Signal, 2) defer close(sigCh) + var exitedWithSigTerm atomic.Bool + go func() { for { - if sig, more := <-sigCh; more { - cmd.Process.Signal(sig) - } else { + sig, more := <-sigCh + if !more { break } + + if !autofsTryCleanAtExit && sig == syscall.SIGTERM { + // automount daemon unmounts the autofs root in /cvmfs upon + // receiving SIGTERM. This makes it impossible to reconnect + // the daemon to the mount later, so all consumer Pods will + // loose their mounts CVMFS, without the possibility of restoring + // them (unless these Pods are restarted too). The implication + // is that the nodeplugin is just being restarted, and will be + // needed again. + // + // SIGKILL is handled differently in automount, as this forces + // the daemon to skip the cleanup at exit, leaving the autofs + // mount behind and making it possible to reconnect to it later. + // We make a use of this, and unless the admin doesn't explicitly + // ask for cleanup with AUTOFS_TRY_CLEAN_AT_EXIT env var, no cleanup + // is done. + // + // Also, we intentionally don't unmount the existing autofs-managed + // mounts inside /cvmfs, so that any existing consumers receive ENOTCONN + // (due to broken FUSE mounts), so that accidental `mkdir -p` won't + // succeed. They are cleaned by the daemon on startup. + // + // TODO: remove this once the automount daemon supports skipping + // cleanup (via a command line flag). + + log.Debugf("Sending SIGKILL to automount daemon") + + exitedWithSigTerm.Store(true) + cmd.Process.Signal(syscall.SIGKILL) + break + } + + cmd.Process.Signal(sig) } }() - signal.Notify(sigCh, syscall.SIGTERM, syscall.SIGKILL) + shutdownSignals := []os.Signal{ + syscall.SIGINT, + syscall.SIGTERM, + syscall.SIGKILL, + } + + signal.Notify(sigCh, shutdownSignals...) // Start automount daemon. @@ -303,7 +360,7 @@ func RunBlocking() error { cmd.Wait() - if cmd.ProcessState.ExitCode() != 0 { + if !exitedWithSigTerm.Load() && cmd.ProcessState.ExitCode() != 0 { log.Fatalf(fmt.Sprintf("automount[%d] has exited unexpectedly: %s", cmd.Process.Pid, cmd.ProcessState)) } diff --git a/internal/cvmfs/env/env.go b/internal/cvmfs/env/env.go new file mode 100644 index 00000000..70aebf84 --- /dev/null +++ b/internal/cvmfs/env/env.go @@ -0,0 +1,47 @@ +// Copyright CERN. +// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package env + +import ( + "fmt" + "os" + "strconv" +) + +const ( + // Boolean value. By default, when exiting, automount daemon is sent + // SIGKILL signal forcing it to skip its clean up procedure, leaving + // the autofs mount behind. This is needed for the daemon to be able + // to reconnect to the autofs mount when the nodeplugin Pod is being + // restarted. + // + // Setting the value of this environment value to TRUE overrides this, + // and allows the daemon to do the clean up. This is useful when + // e.g. uninstalling the eosxd-csi driver. + AutofsTryCleanAtExit = "AUTOFS_TRY_CLEAN_AT_EXIT" +) + +func GetAutofsTryCleanAtExit() bool { + strVal := os.Getenv(AutofsTryCleanAtExit) + boolVal, _ := strconv.ParseBool(strVal) + + return boolVal +} + +func StringAutofsTryCleanAtExit() string { + return fmt.Sprintf("%s=\"%v\"", AutofsTryCleanAtExit, GetAutofsTryCleanAtExit()) +}