Skip to content

Commit

Permalink
Merge pull request #124 from gman0/backport-122
Browse files Browse the repository at this point in the history
(Backport #122) automount: shut down automount daemon with SIGKILL
  • Loading branch information
gman0 authored Sep 30, 2023
2 parents 729bbcc + ebbe15f commit d39df5a
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 6 deletions.
2 changes: 2 additions & 0 deletions cmd/automount-runner/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"os"

"github.com/cvmfs-contrib/cvmfs-csi/internal/cvmfs/automount"
"github.com/cvmfs-contrib/cvmfs-csi/internal/cvmfs/env"
"github.com/cvmfs-contrib/cvmfs-csi/internal/log"
cvmfsversion "github.com/cvmfs-contrib/cvmfs-csi/internal/version"

Expand Down Expand Up @@ -54,6 +55,7 @@ func main() {

log.Infof("automount-runner for CVMFS CSI plugin version %s", cvmfsversion.FullVersion())
log.Infof("Command line arguments %v", os.Args)
log.Infof("Environment variables %s", env.StringAutofsTryCleanAtExit())

err := automount.Init(&automount.Opts{
UnmountTimeoutSeconds: *unmountTimeoutSeconds,
Expand Down
20 changes: 20 additions & 0 deletions docs/uninstalling.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Uninstalling cvmfs-csi driver

The nodeplugin Pods store various resources on the node hosts they are running on:
* autofs mount and the respective inner CVMFS mounts,
* CVMFS client cache.

By default, the nodeplugin Pod leaves autofs and its respective inner mounts on the node
in `/var/cvmfs`. They may need to be unmounted recursively. To do that, you can set
`AUTOFS_TRY_CLEAN_AT_EXIT` environment variable to `true` in nodeplugin's DaemonSet and restart
the Pods. On the next exit, they will be unmounted.

```
kubectl set env daemonset -l app=cvmfs-csi,component=nodeplugin AUTOFS_TRY_CLEAN_AT_EXIT=true
# Restarting nodeplugin Pods needs attention, as this may break existing mounts.
# They will be restored once the Pods come back up.
kubectl delete pods -l app=cvmfs-csi,component=nodeplugin
```

The CVMFS client cache is stored by default in `/var/lib/cvmfs.csi.cern.ch/cache`.
This directory is not deleted automatically, and manual intervention is currently needed.
69 changes: 63 additions & 6 deletions internal/cvmfs/automount/automount.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ import (
goexec "os/exec"
"os/signal"
"path"
"sync/atomic"
"syscall"

"github.com/cvmfs-contrib/cvmfs-csi/internal/cvmfs/env"
"github.com/cvmfs-contrib/cvmfs-csi/internal/exec"
"github.com/cvmfs-contrib/cvmfs-csi/internal/log"
)
Expand Down Expand Up @@ -245,6 +247,19 @@ func RunBlocking() error {

if log.LevelEnabled(log.LevelDebug) {
args = append(args, "--verbose")

// Log info about autofs mount in /cvmfs.

isAutofs, err := IsAutofs("/cvmfs")
if err != nil {
log.Fatalf("Failed to stat /cvmfs: %v", err)
}

if isAutofs {
log.Debugf("autofs already mounted in /cvmfs, automount daemon will reconnect...")
} else {
log.Debugf("autofs not mounted in /cvmfs, automount daemon will mount it now...")
}
}

if log.LevelEnabled(log.LevelTrace) {
Expand Down Expand Up @@ -276,20 +291,62 @@ func RunBlocking() error {

// Catch SIGTERM and SIGKILL and forward it to the automount process.

sigCh := make(chan os.Signal, 1)
autofsTryCleanAtExit := env.GetAutofsTryCleanAtExit()

sigCh := make(chan os.Signal, 2)
defer close(sigCh)

var exitedWithSigTerm atomic.Bool

go func() {
for {
if sig, more := <-sigCh; more {
cmd.Process.Signal(sig)
} else {
sig, more := <-sigCh
if !more {
break
}

if !autofsTryCleanAtExit && sig == syscall.SIGTERM {
// automount daemon unmounts the autofs root in /cvmfs upon
// receiving SIGTERM. This makes it impossible to reconnect
// the daemon to the mount later, so all consumer Pods will
// loose their mounts CVMFS, without the possibility of restoring
// them (unless these Pods are restarted too). The implication
// is that the nodeplugin is just being restarted, and will be
// needed again.
//
// SIGKILL is handled differently in automount, as this forces
// the daemon to skip the cleanup at exit, leaving the autofs
// mount behind and making it possible to reconnect to it later.
// We make a use of this, and unless the admin doesn't explicitly
// ask for cleanup with AUTOFS_TRY_CLEAN_AT_EXIT env var, no cleanup
// is done.
//
// Also, we intentionally don't unmount the existing autofs-managed
// mounts inside /cvmfs, so that any existing consumers receive ENOTCONN
// (due to broken FUSE mounts), so that accidental `mkdir -p` won't
// succeed. They are cleaned by the daemon on startup.
//
// TODO: remove this once the automount daemon supports skipping
// cleanup (via a command line flag).

log.Debugf("Sending SIGKILL to automount daemon")

exitedWithSigTerm.Store(true)
cmd.Process.Signal(syscall.SIGKILL)
break
}

cmd.Process.Signal(sig)
}
}()

signal.Notify(sigCh, syscall.SIGTERM, syscall.SIGKILL)
shutdownSignals := []os.Signal{
syscall.SIGINT,
syscall.SIGTERM,
syscall.SIGKILL,
}

signal.Notify(sigCh, shutdownSignals...)

// Start automount daemon.

Expand All @@ -303,7 +360,7 @@ func RunBlocking() error {

cmd.Wait()

if cmd.ProcessState.ExitCode() != 0 {
if !exitedWithSigTerm.Load() && cmd.ProcessState.ExitCode() != 0 {
log.Fatalf(fmt.Sprintf("automount[%d] has exited unexpectedly: %s", cmd.Process.Pid, cmd.ProcessState))
}

Expand Down
47 changes: 47 additions & 0 deletions internal/cvmfs/env/env.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright CERN.
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

package env

import (
"fmt"
"os"
"strconv"
)

const (
// Boolean value. By default, when exiting, automount daemon is sent
// SIGKILL signal forcing it to skip its clean up procedure, leaving
// the autofs mount behind. This is needed for the daemon to be able
// to reconnect to the autofs mount when the nodeplugin Pod is being
// restarted.
//
// Setting the value of this environment value to TRUE overrides this,
// and allows the daemon to do the clean up. This is useful when
// e.g. uninstalling the eosxd-csi driver.
AutofsTryCleanAtExit = "AUTOFS_TRY_CLEAN_AT_EXIT"
)

func GetAutofsTryCleanAtExit() bool {
strVal := os.Getenv(AutofsTryCleanAtExit)
boolVal, _ := strconv.ParseBool(strVal)

return boolVal
}

func StringAutofsTryCleanAtExit() string {
return fmt.Sprintf("%s=\"%v\"", AutofsTryCleanAtExit, GetAutofsTryCleanAtExit())
}

0 comments on commit d39df5a

Please sign in to comment.