diff --git a/cmd/automount-runner/main.go b/cmd/automount-runner/main.go index 57e98b57..ae13122c 100644 --- a/cmd/automount-runner/main.go +++ b/cmd/automount-runner/main.go @@ -22,6 +22,7 @@ import ( "os" "github.com/cvmfs-contrib/cvmfs-csi/internal/cvmfs/automount" + "github.com/cvmfs-contrib/cvmfs-csi/internal/cvmfs/env" "github.com/cvmfs-contrib/cvmfs-csi/internal/log" cvmfsversion "github.com/cvmfs-contrib/cvmfs-csi/internal/version" @@ -54,6 +55,7 @@ func main() { log.Infof("automount-runner for CVMFS CSI plugin version %s", cvmfsversion.FullVersion()) log.Infof("Command line arguments %v", os.Args) + log.Infof("Environment variables %s", env.StringAutofsTryCleanAtExit()) err := automount.Init(&automount.Opts{ UnmountTimeoutSeconds: *unmountTimeoutSeconds, diff --git a/docs/uninstalling.md b/docs/uninstalling.md new file mode 100644 index 00000000..4f8af4d3 --- /dev/null +++ b/docs/uninstalling.md @@ -0,0 +1,20 @@ +# Uninstalling cvmfs-csi driver + +The nodeplugin Pods store various resources on the node hosts they are running on: +* autofs mount and the respective inner CVMFS mounts, +* CVMFS client cache. + +By default, the nodeplugin Pod leaves autofs and its respective inner mounts on the node +in `/var/cvmfs`. They may need to be unmounted recursively. To do that, you can set +`AUTOFS_TRY_CLEAN_AT_EXIT` environment variable to `true` in nodeplugin's DaemonSet and restart +the Pods. On the next exit, they will be unmounted. + + ``` + kubectl set env daemonset -l app=cvmfs-csi,component=nodeplugin AUTOFS_TRY_CLEAN_AT_EXIT=true + # Restarting nodeplugin Pods needs attention, as this may break existing mounts. + # They will be restored once the Pods come back up. + kubectl delete pods -l app=cvmfs-csi,component=nodeplugin + ``` + +The CVMFS client cache is stored by default in `/var/lib/cvmfs.csi.cern.ch/cache`. +This directory is not deleted automatically, and manual intervention is currently needed. diff --git a/internal/cvmfs/automount/automount.go b/internal/cvmfs/automount/automount.go index 6c428c30..a838885f 100644 --- a/internal/cvmfs/automount/automount.go +++ b/internal/cvmfs/automount/automount.go @@ -24,8 +24,10 @@ import ( goexec "os/exec" "os/signal" "path" + "sync/atomic" "syscall" + "github.com/cvmfs-contrib/cvmfs-csi/internal/cvmfs/env" "github.com/cvmfs-contrib/cvmfs-csi/internal/exec" "github.com/cvmfs-contrib/cvmfs-csi/internal/log" ) @@ -245,6 +247,19 @@ func RunBlocking() error { if log.LevelEnabled(log.LevelDebug) { args = append(args, "--verbose") + + // Log info about autofs mount in /cvmfs. + + isAutofs, err := IsAutofs("/cvmfs") + if err != nil { + log.Fatalf("Failed to stat /cvmfs: %v", err) + } + + if isAutofs { + log.Debugf("autofs already mounted in /cvmfs, automount daemon will reconnect...") + } else { + log.Debugf("autofs not mounted in /cvmfs, automount daemon will mount it now...") + } } if log.LevelEnabled(log.LevelTrace) { @@ -276,20 +291,62 @@ func RunBlocking() error { // Catch SIGTERM and SIGKILL and forward it to the automount process. - sigCh := make(chan os.Signal, 1) + autofsTryCleanAtExit := env.GetAutofsTryCleanAtExit() + + sigCh := make(chan os.Signal, 2) defer close(sigCh) + var exitedWithSigTerm atomic.Bool + go func() { for { - if sig, more := <-sigCh; more { - cmd.Process.Signal(sig) - } else { + sig, more := <-sigCh + if !more { break } + + if !autofsTryCleanAtExit && sig == syscall.SIGTERM { + // automount daemon unmounts the autofs root in /cvmfs upon + // receiving SIGTERM. This makes it impossible to reconnect + // the daemon to the mount later, so all consumer Pods will + // loose their mounts CVMFS, without the possibility of restoring + // them (unless these Pods are restarted too). The implication + // is that the nodeplugin is just being restarted, and will be + // needed again. + // + // SIGKILL is handled differently in automount, as this forces + // the daemon to skip the cleanup at exit, leaving the autofs + // mount behind and making it possible to reconnect to it later. + // We make a use of this, and unless the admin doesn't explicitly + // ask for cleanup with AUTOFS_TRY_CLEAN_AT_EXIT env var, no cleanup + // is done. + // + // Also, we intentionally don't unmount the existing autofs-managed + // mounts inside /cvmfs, so that any existing consumers receive ENOTCONN + // (due to broken FUSE mounts), so that accidental `mkdir -p` won't + // succeed. They are cleaned by the daemon on startup. + // + // TODO: remove this once the automount daemon supports skipping + // cleanup (via a command line flag). + + log.Debugf("Sending SIGKILL to automount daemon") + + exitedWithSigTerm.Store(true) + cmd.Process.Signal(syscall.SIGKILL) + break + } + + cmd.Process.Signal(sig) } }() - signal.Notify(sigCh, syscall.SIGTERM, syscall.SIGKILL) + shutdownSignals := []os.Signal{ + syscall.SIGINT, + syscall.SIGTERM, + syscall.SIGKILL, + } + + signal.Notify(sigCh, shutdownSignals...) // Start automount daemon. @@ -303,7 +360,7 @@ func RunBlocking() error { cmd.Wait() - if cmd.ProcessState.ExitCode() != 0 { + if !exitedWithSigTerm.Load() && cmd.ProcessState.ExitCode() != 0 { log.Fatalf(fmt.Sprintf("automount[%d] has exited unexpectedly: %s", cmd.Process.Pid, cmd.ProcessState)) } diff --git a/internal/cvmfs/env/env.go b/internal/cvmfs/env/env.go new file mode 100644 index 00000000..70aebf84 --- /dev/null +++ b/internal/cvmfs/env/env.go @@ -0,0 +1,47 @@ +// Copyright CERN. +// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package env + +import ( + "fmt" + "os" + "strconv" +) + +const ( + // Boolean value. By default, when exiting, automount daemon is sent + // SIGKILL signal forcing it to skip its clean up procedure, leaving + // the autofs mount behind. This is needed for the daemon to be able + // to reconnect to the autofs mount when the nodeplugin Pod is being + // restarted. + // + // Setting the value of this environment value to TRUE overrides this, + // and allows the daemon to do the clean up. This is useful when + // e.g. uninstalling the eosxd-csi driver. + AutofsTryCleanAtExit = "AUTOFS_TRY_CLEAN_AT_EXIT" +) + +func GetAutofsTryCleanAtExit() bool { + strVal := os.Getenv(AutofsTryCleanAtExit) + boolVal, _ := strconv.ParseBool(strVal) + + return boolVal +} + +func StringAutofsTryCleanAtExit() string { + return fmt.Sprintf("%s=\"%v\"", AutofsTryCleanAtExit, GetAutofsTryCleanAtExit()) +}