Skip to content

Commit

Permalink
automount: shut down automount daemon with SIGKILL
Browse files Browse the repository at this point in the history
automount daemon unmounts the autofs root in /cvmfs upon
receiving SIGTERM. This makes it impossible to reconnect
the daemon to the mount later, so all consumer Pods will
loose their mounts CVMFS, without the possibility of restoring
them (unless these Pods are restarted too). The implication
is that the nodeplugin is just being restarted, and will be
needed again.

SIGKILL is handled differently in automount, as this forces
the daemon to skip the cleanup at exit, leaving the autofs
mount behind and making it possible to reconnect to it later.
We make a use of this, and unless the admin doesn't explicitly
ask for cleanup with AUTOFS_TRY_CLEAN_AT_EXIT env var, no cleanup
is done.

Cherry-pick f1d7ee2 (#122)
  • Loading branch information
gman0 committed Sep 30, 2023
1 parent 729bbcc commit ebbe15f
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 6 deletions.
2 changes: 2 additions & 0 deletions cmd/automount-runner/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"os"

"github.com/cvmfs-contrib/cvmfs-csi/internal/cvmfs/automount"
"github.com/cvmfs-contrib/cvmfs-csi/internal/cvmfs/env"
"github.com/cvmfs-contrib/cvmfs-csi/internal/log"
cvmfsversion "github.com/cvmfs-contrib/cvmfs-csi/internal/version"

Expand Down Expand Up @@ -54,6 +55,7 @@ func main() {

log.Infof("automount-runner for CVMFS CSI plugin version %s", cvmfsversion.FullVersion())
log.Infof("Command line arguments %v", os.Args)
log.Infof("Environment variables %s", env.StringAutofsTryCleanAtExit())

err := automount.Init(&automount.Opts{
UnmountTimeoutSeconds: *unmountTimeoutSeconds,
Expand Down
20 changes: 20 additions & 0 deletions docs/uninstalling.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Uninstalling cvmfs-csi driver

The nodeplugin Pods store various resources on the node hosts they are running on:
* autofs mount and the respective inner CVMFS mounts,
* CVMFS client cache.

By default, the nodeplugin Pod leaves autofs and its respective inner mounts on the node
in `/var/cvmfs`. They may need to be unmounted recursively. To do that, you can set
`AUTOFS_TRY_CLEAN_AT_EXIT` environment variable to `true` in nodeplugin's DaemonSet and restart
the Pods. On the next exit, they will be unmounted.

```
kubectl set env daemonset -l app=cvmfs-csi,component=nodeplugin AUTOFS_TRY_CLEAN_AT_EXIT=true
# Restarting nodeplugin Pods needs attention, as this may break existing mounts.
# They will be restored once the Pods come back up.
kubectl delete pods -l app=cvmfs-csi,component=nodeplugin
```

The CVMFS client cache is stored by default in `/var/lib/cvmfs.csi.cern.ch/cache`.
This directory is not deleted automatically, and manual intervention is currently needed.
69 changes: 63 additions & 6 deletions internal/cvmfs/automount/automount.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ import (
goexec "os/exec"
"os/signal"
"path"
"sync/atomic"
"syscall"

"github.com/cvmfs-contrib/cvmfs-csi/internal/cvmfs/env"
"github.com/cvmfs-contrib/cvmfs-csi/internal/exec"
"github.com/cvmfs-contrib/cvmfs-csi/internal/log"
)
Expand Down Expand Up @@ -245,6 +247,19 @@ func RunBlocking() error {

if log.LevelEnabled(log.LevelDebug) {
args = append(args, "--verbose")

// Log info about autofs mount in /cvmfs.

isAutofs, err := IsAutofs("/cvmfs")
if err != nil {
log.Fatalf("Failed to stat /cvmfs: %v", err)
}

if isAutofs {
log.Debugf("autofs already mounted in /cvmfs, automount daemon will reconnect...")
} else {
log.Debugf("autofs not mounted in /cvmfs, automount daemon will mount it now...")
}
}

if log.LevelEnabled(log.LevelTrace) {
Expand Down Expand Up @@ -276,20 +291,62 @@ func RunBlocking() error {

// Catch SIGTERM and SIGKILL and forward it to the automount process.

sigCh := make(chan os.Signal, 1)
autofsTryCleanAtExit := env.GetAutofsTryCleanAtExit()

sigCh := make(chan os.Signal, 2)
defer close(sigCh)

var exitedWithSigTerm atomic.Bool

go func() {
for {
if sig, more := <-sigCh; more {
cmd.Process.Signal(sig)
} else {
sig, more := <-sigCh
if !more {
break
}

if !autofsTryCleanAtExit && sig == syscall.SIGTERM {
// automount daemon unmounts the autofs root in /cvmfs upon
// receiving SIGTERM. This makes it impossible to reconnect
// the daemon to the mount later, so all consumer Pods will
// loose their mounts CVMFS, without the possibility of restoring
// them (unless these Pods are restarted too). The implication
// is that the nodeplugin is just being restarted, and will be
// needed again.
//
// SIGKILL is handled differently in automount, as this forces
// the daemon to skip the cleanup at exit, leaving the autofs
// mount behind and making it possible to reconnect to it later.
// We make a use of this, and unless the admin doesn't explicitly
// ask for cleanup with AUTOFS_TRY_CLEAN_AT_EXIT env var, no cleanup
// is done.
//
// Also, we intentionally don't unmount the existing autofs-managed
// mounts inside /cvmfs, so that any existing consumers receive ENOTCONN
// (due to broken FUSE mounts), so that accidental `mkdir -p` won't
// succeed. They are cleaned by the daemon on startup.
//
// TODO: remove this once the automount daemon supports skipping
// cleanup (via a command line flag).

log.Debugf("Sending SIGKILL to automount daemon")

exitedWithSigTerm.Store(true)
cmd.Process.Signal(syscall.SIGKILL)
break
}

cmd.Process.Signal(sig)
}
}()

signal.Notify(sigCh, syscall.SIGTERM, syscall.SIGKILL)
shutdownSignals := []os.Signal{
syscall.SIGINT,
syscall.SIGTERM,
syscall.SIGKILL,
}

signal.Notify(sigCh, shutdownSignals...)

// Start automount daemon.

Expand All @@ -303,7 +360,7 @@ func RunBlocking() error {

cmd.Wait()

if cmd.ProcessState.ExitCode() != 0 {
if !exitedWithSigTerm.Load() && cmd.ProcessState.ExitCode() != 0 {
log.Fatalf(fmt.Sprintf("automount[%d] has exited unexpectedly: %s", cmd.Process.Pid, cmd.ProcessState))
}

Expand Down
47 changes: 47 additions & 0 deletions internal/cvmfs/env/env.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright CERN.
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

package env

import (
"fmt"
"os"
"strconv"
)

const (
// Boolean value. By default, when exiting, automount daemon is sent
// SIGKILL signal forcing it to skip its clean up procedure, leaving
// the autofs mount behind. This is needed for the daemon to be able
// to reconnect to the autofs mount when the nodeplugin Pod is being
// restarted.
//
// Setting the value of this environment value to TRUE overrides this,
// and allows the daemon to do the clean up. This is useful when
// e.g. uninstalling the eosxd-csi driver.
AutofsTryCleanAtExit = "AUTOFS_TRY_CLEAN_AT_EXIT"
)

func GetAutofsTryCleanAtExit() bool {
strVal := os.Getenv(AutofsTryCleanAtExit)
boolVal, _ := strconv.ParseBool(strVal)

return boolVal
}

func StringAutofsTryCleanAtExit() string {
return fmt.Sprintf("%s=\"%v\"", AutofsTryCleanAtExit, GetAutofsTryCleanAtExit())
}

0 comments on commit ebbe15f

Please sign in to comment.