From 521ee24616fa85bf2490243cd3c71f92e5bd26b0 Mon Sep 17 00:00:00 2001
From: Scott Moser <smoser@brickies.net>
Date: Wed, 27 Sep 2023 11:28:32 -0700
Subject: [PATCH] fix: Update ExtractSingleSquash, adding policy.

Overall, there are 3 "good things" done by this change:
1. Fix bug in the current code which tries mounting with each option
   every time.  The problem with doing that is really that the kernel
   mount option didn't work very well.  It would fail with "already
   mounted", and then squashfuse would end up getting used to mount over
   the top.

2. Fix race conditions in the current code.

   overlay.Unpack starts a thread pool and tries to unpack all layers
   at once.  That is fine if there are no duplicate layers.  But
      if there are duplicate layers used by a stacker.yaml file, then
   there were races on extraction.  The end result really was that things
   would get mounted more than once.

   Example stacker that shows this:

    l1:
      from:
        type: docker
        url: docker://busybox:latest
      run: |
        echo build layer 1

    l2:
      from:
        type: docker
        url: docker://busybox:latest
      run: |
        echo build layer 1

  There, the busybox layer would get extracted multiple times.

  The code here has a single lock on ExtractSingleSquash, it would
  be better to have lock being taken per extractDir.

3. Allow the user to control the list of extractors.

   If they knew that they could not use kernel mounts (or could, but
   didn't want to) or wanted to use unsquashfs they can now do that.

   STACKER_SQUASHFS_EXTRACT_POLICY=kmount stacker build ..

   or

   STACKER_SQUASHFS_EXTRACT_POLICY="squashfuse kmount" stacker build ...

   This adds a SquashExtractor interface, with 3 implementers
   (KernelExtractor, SquashFuseExtractor, UnsquashfsExtractor).

   A ExtractPolicy is basically a list of Extractors to try.
   The first time ExtractPolicy is used it will try each of the Extractors
   in order.  It then stores the result in .Extractor and uses that
   subsequently.

Signed-off-by: Scott Moser <smoser@brickies.net>
---
 pkg/overlay/pack.go      |   3 +-
 pkg/squashfs/squashfs.go | 391 +++++++++++++++++++++++++++++++++------
 pkg/types/layer_type.go  |   7 +
 3 files changed, 342 insertions(+), 59 deletions(-)

diff --git a/pkg/overlay/pack.go b/pkg/overlay/pack.go
index 32555dd4..1a5d150c 100644
--- a/pkg/overlay/pack.go
+++ b/pkg/overlay/pack.go
@@ -207,7 +207,8 @@ func (o *overlay) initializeBasesInOutput(name string, layerTypes []types.LayerT
 						return err
 					}
 				} else {
-					log.Debugf("converting between %v and %v", sourceLayerType, layerType)
+					log.Debugf("creating layer %s (type=%s) by converting layer %s (type=%s)",
+						layerType.LayerName(name), layerType, sourceLayerType.LayerName(name), sourceLayerType)
 					err = ConvertAndOutput(o.config, cacheTag, name, layerType)
 					if err != nil {
 						return err
diff --git a/pkg/squashfs/squashfs.go b/pkg/squashfs/squashfs.go
index bf7d8a12..50795d52 100644
--- a/pkg/squashfs/squashfs.go
+++ b/pkg/squashfs/squashfs.go
@@ -18,13 +18,24 @@ import (
 	"github.com/pkg/errors"
 	"golang.org/x/sys/unix"
 	"stackerbuild.io/stacker/pkg/log"
+	"stackerbuild.io/stacker/pkg/mount"
 )
 
 var checkZstdSupported sync.Once
 var zstdIsSuspported bool
 
-var tryKernelMountSquash bool = true
-var kernelSquashMountFailed error = errors.New("kernel squash mount failed")
+const strTrue, strFalse, strTryOnce = "true", "false", "try-once"
+
+var extract sync.Mutex
+
+var extractPolicyOnce sync.Once
+var extractPolicyErr error
+
+var exPolInfo struct {
+	once   sync.Once
+	err    error
+	policy *ExtractPolicy
+}
 
 // ExcludePaths represents a list of paths to exclude in a squashfs listing.
 // Users should do something like filepath.Walk() over the whole filesystem,
@@ -168,52 +179,30 @@ func MakeSquashfs(tempdir string, rootfs string, eps *ExcludePaths, verity Verit
 	return blob, GenerateSquashfsMediaType(compression, verity), rootHash, nil
 }
 
-// maybeKernelSquashMount - try to mount squashfile with kernel mount
-//
-//	if global tryKernelMountSquash is false, do not try
-//	if environment variable STACKER_ALLOW_SQUASHFS_KERNEL_MOUNTS is "false", do not try.
-//	try.  If it fails, log message and set tryKernelMountSquash=false.
-func maybeKernelSquashMount(squashFile, extractDir string) (bool, error) {
-	if !tryKernelMountSquash {
+func isMountedAtDir(src, dest string) (bool, error) {
+	dstat, err := os.Stat(dest)
+	if os.IsNotExist(err) {
 		return false, nil
 	}
-
-	const strTrue, strFalse = "true", "false"
-	const envName = "STACKER_ALLOW_SQUASHFS_KERNEL_MOUNTS"
-	envVal := os.Getenv(envName)
-	if envVal == strFalse {
-		log.Debugf("Not trying kernel mounts per %s=%s", envName, envVal)
-		tryKernelMountSquash = false
+	if !dstat.IsDir() {
 		return false, nil
-	} else if envVal != strTrue && envVal != "" {
-		return false, errors.Errorf("%s must be '%s' or '%s', found '%s'", envName, strTrue, strFalse, envVal)
 	}
-
-	ecmd := []string{"mount", "-tsquashfs", "-oloop,ro", squashFile, extractDir}
-	var output bytes.Buffer
-	cmd := exec.Command(ecmd[0], ecmd[1:]...)
-	cmd.Stdin = nil
-	cmd.Stdout = &output
-	cmd.Stderr = cmd.Stdout
-	err := cmd.Run()
-	if err == nil {
-		return true, nil
-	}
-	exitError, ok := err.(*exec.ExitError)
-	if !ok {
-		tryKernelMountSquash = false
-		return false, errors.Errorf("Unexpected error (no-rc), in exec (%v): %v", ecmd, err)
+	mounts, err := mount.ParseMounts("/proc/self/mountinfo")
+	if err != nil {
+		return false, err
 	}
 
-	status, ok := exitError.Sys().(syscall.WaitStatus)
-	if !ok {
-		tryKernelMountSquash = false
-		return false, errors.Errorf("Unexpected error (no-status) in exec (%v): %v", ecmd, err)
+	fdest, err := filepath.Abs(dest)
+	if err != nil {
+		return false, err
+	}
+	for _, m := range mounts {
+		if m.Target == fdest {
+			return true, nil
+		}
 	}
 
-	// we can't really tell why the mount failed. mount(8) does not give a lot specific rc exits.
-	log.Debugf("maybeKernelSquashMount(%s) exited %d: %s", squashFile, status.ExitStatus(), strings.TrimRight(output.String(), "\n"))
-	return false, kernelSquashMountFailed
+	return false, nil
 }
 
 func findSquashfusePath() string {
@@ -300,36 +289,309 @@ func squashFuse(squashFile, extractDir string) (*exec.Cmd, error) {
 	return cmd, nil
 }
 
-func ExtractSingleSquash(squashFile string, extractDir string) error {
+type ExtractPolicy struct {
+	Extractors  []SquashExtractor
+	Extractor   SquashExtractor
+	Excuses     map[string]error
+	initialized bool
+	mutex       sync.Mutex
+}
+
+type SquashExtractor interface {
+	Name() string
+	IsAvailable() error
+	// not needed/just a TOCTOU race?
+	// IsDone(path, dest string) (bool, error)
+	Mount(path, dest string) (bool, error)
+}
+
+func NewExtractPolicy(args ...string) (*ExtractPolicy, error) {
+	p := &ExtractPolicy{
+		Extractors: []SquashExtractor{},
+		Excuses:    map[string]error{},
+	}
+
+	allEx := []SquashExtractor{
+		&KernelExtractor{},
+		&SquashFuseExtractor{},
+		&UnsquashfsExtractor{},
+	}
+	byName := map[string]SquashExtractor{}
+	for _, i := range allEx {
+		byName[i.Name()] = i
+	}
+
+	for _, i := range args {
+		extractor, ok := byName[i]
+		if !ok {
+			return nil, errors.Errorf("Unknown extractor: '%s'", i)
+		}
+		excuse := extractor.IsAvailable()
+		if excuse != nil {
+			p.Excuses[i] = excuse
+			continue
+		}
+		p.Extractors = append(p.Extractors, extractor)
+	}
+	return p, nil
+}
+
+type UnsquashfsExtractor struct {
+	mutex sync.Mutex
+}
+
+func (k *UnsquashfsExtractor) Name() string {
+	return "unsquashfs"
+}
+
+func (k *UnsquashfsExtractor) IsAvailable() error {
+	if which("unsquashfs") == "" {
+		return errors.Errorf("no 'unsquashfs' in PATH")
+	}
+	return nil
+}
+
+func (k *UnsquashfsExtractor) Mount(squashFile, extractDir string) (bool, error) {
+	k.mutex.Lock()
+	defer k.mutex.Unlock()
+
+	// check if already extracted
+	empty, err := isEmptyDir(extractDir)
+	if err != nil {
+		return false, err
+	}
+	if !empty {
+		return true, nil
+	}
+
+	log.Debugf("unsquashfs %s -> %s", squashFile, extractDir)
+	cmd := exec.Command("unsquashfs", "-f", "-d", extractDir, squashFile)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Stdin = nil
+	err = cmd.Run()
+
+	// on failure, remove the directory
+	if err != nil {
+		if rmErr := os.RemoveAll(extractDir); rmErr != nil {
+			log.Debugf("Failed to remove %s after failed extraction of %s: %v", extractDir, squashFile, rmErr)
+		}
+		return false, err
+	}
+
+	// assert that extraction must create files. This way we can assume non-empty dir above
+	// was populated by unsquashfs.
+	empty, err = isEmptyDir(extractDir)
+	if err != nil {
+		return false,
+			errors.Errorf("Failed to read %s after successful extraction of %s: %v",
+				extractDir, squashFile, err)
+	}
+	if empty {
+		return false, errors.Errorf("%s was an empty fs image", squashFile)
+	}
+
+	return true, nil
+}
+
+type KernelExtractor struct {
+	mutex sync.Mutex
+}
+
+func (k *KernelExtractor) Name() string {
+	return "kmount"
+}
+
+func (k *KernelExtractor) IsAvailable() error {
+	if !amHostRoot() {
+		return errors.Errorf("not host root")
+	}
+	return nil
+}
+
+func (k *KernelExtractor) Mount(squashFile, extractDir string) (bool, error) {
+	k.mutex.Lock()
+	defer k.mutex.Unlock()
+
+	if mounted, err := isMountedAtDir(squashFile, extractDir); err != nil {
+		return false, err
+	} else if mounted {
+		return true, nil
+	}
+
+	ecmd := []string{"mount", "-tsquashfs", "-oloop,ro", squashFile, extractDir}
+	var output bytes.Buffer
+	cmd := exec.Command(ecmd[0], ecmd[1:]...)
+	cmd.Stdin = nil
+	cmd.Stdout = &output
+	cmd.Stderr = cmd.Stdout
+	err := cmd.Run()
+	if err == nil {
+		return true, nil
+	}
+
+	var retErr error
+
+	exitError, ok := err.(*exec.ExitError)
+	if !ok {
+		retErr = errors.Errorf("kmount(%s) had unexpected error (no-rc), in exec (%v): %v",
+			squashFile, ecmd, err)
+	} else if status, ok := exitError.Sys().(syscall.WaitStatus); !ok {
+		retErr = errors.Errorf("kmount(%s) had unexpected error (no-status), in exec (%v): %v",
+			squashFile, ecmd, err)
+	} else {
+		retErr = errors.Errorf("kmount(%s) exited %d: %v", squashFile, status.ExitStatus(), output.String())
+	}
+
+	return false, retErr
+}
+
+type SquashFuseExtractor struct {
+	sqfuse string
+	mutex  sync.Mutex
+}
+
+func (k *SquashFuseExtractor) Name() string {
+	return "squashfuse"
+}
+
+func (k *SquashFuseExtractor) IsAvailable() error {
+	if findSquashfusePath() == "" {
+		return errors.Errorf("no 'squashfuse' in PATH")
+	}
+	return nil
+}
+
+func (k *SquashFuseExtractor) Mount(squashFile, extractDir string) (bool, error) {
+	k.mutex.Lock()
+	defer k.mutex.Unlock()
+
+	if mounted, err := isMountedAtDir(squashFile, extractDir); mounted && err == nil {
+		log.Debugf("[%s] %s already mounted -> %s", k.Name(), squashFile, extractDir)
+		return true, nil
+	} else if err != nil {
+		return false, err
+	}
+
+	cmd, err := squashFuse(squashFile, extractDir)
+	if err != nil {
+		return false, err
+	}
+
+	log.Debugf("squashFuse mounted (%d) %s -> %s", cmd.Process.Pid, squashFile, extractDir)
+	if err := cmd.Process.Release(); err != nil {
+		return false, errors.Errorf("Failed to release process %s: %v", cmd, err)
+	}
+	return true, nil
+}
+
+// ExtractSingleSquashPolicy - extract squashfile to extractDir
+func ExtractSingleSquashPolicy(squashFile, extractDir string, policy *ExtractPolicy) error {
+	const initName = "init"
+	if policy == nil {
+		return errors.Errorf("policy cannot be nil")
+	}
+
+	// avoid taking a lock if already initialized (possibly premature optimization)
+	if !policy.initialized {
+		policy.mutex.Lock()
+		// We may have been waiting on the initializer. If so, then the policy will now be initialized.
+		// if not, then we are the initializer.
+		if !policy.initialized {
+			defer policy.mutex.Unlock()
+			defer func() {
+				policy.initialized = true
+			}()
+		} else {
+			policy.mutex.Unlock()
+		}
+	}
+
 	err := os.MkdirAll(extractDir, 0755)
 	if err != nil {
 		return err
 	}
 
-	if mounted, err := maybeKernelSquashMount(squashFile, extractDir); err == nil && mounted {
-		return nil
-	} else if err != kernelSquashMountFailed {
+	fdest, err := filepath.Abs(extractDir)
+	if err != nil {
 		return err
 	}
 
-	cmd, err := squashFuse(squashFile, extractDir)
-	if err == nil {
-		if err := cmd.Process.Release(); err != nil {
-			return errors.Errorf("Failed to release process %s: %v", cmd, err)
+	if policy.initialized {
+		if err, ok := policy.Excuses[initName]; ok {
+			return err
+		}
+		mounted, err := policy.Extractor.Mount(squashFile, fdest)
+		if err == nil {
+			if !mounted {
+				return errors.Errorf("%s.Mount(%s, %s) returned unmounted with err=nil",
+					policy.Extractor.Name(), squashFile, fdest)
+			}
+			return nil
 		}
-		return nil
-	} else if err != squashNotFound {
 		return err
 	}
-	if p := which("unsquashfs"); p != "" {
-		log.Debugf("Extracting %s -> %s with unsquashfs -f -d %s %s", extractDir, squashFile, extractDir, squashFile)
-		cmd := exec.Command("unsquashfs", "-f", "-d", extractDir, squashFile)
-		cmd.Stdout = os.Stdout
-		cmd.Stderr = os.Stderr
-		cmd.Stdin = nil
-		return cmd.Run()
+
+	// At this point we are the initialzer
+	if policy.Excuses == nil {
+		policy.Excuses = map[string]error{}
+	}
+
+	if policy.Extractors == nil || len(policy.Extractors) == 0 {
+		policy.Excuses[initName] = errors.Errorf("policy had no extractors")
+		return policy.Excuses[initName]
+	}
+
+	var extractor SquashExtractor
+	var mounted bool
+	allExcuses := []string{}
+	for _, extractor = range policy.Extractors {
+		mounted, err = extractor.Mount(squashFile, fdest)
+		if mounted && err == nil {
+			policy.Extractor = extractor
+			log.Debugf("Selected squashfs extractor %s", extractor.Name())
+			return nil
+		} else if !mounted && err == nil {
+			err = errors.Errorf("%s returned unmounted without error", extractor.Name())
+			policy.Excuses[initName] = err
+			return err
+		}
+		policy.Excuses[extractor.Name()] = err
+	}
+
+	for n, exc := range policy.Excuses {
+		allExcuses = append(allExcuses, fmt.Sprintf("%s: %v", n, exc))
+	}
+
+	// nothing worked. populate Excuses[initName]
+	policy.Excuses[initName] = errors.Errorf("No suitable extractor found:\n  " + strings.Join(allExcuses, "\n  "))
+	return policy.Excuses[initName]
+}
+
+// ExtractSingleSquash - extract the squashFile to extractDir
+// Initialize a extractPolicy struct and then call ExtractSingleSquashPolicy
+// wik()th that.
+func ExtractSingleSquash(squashFile string, extractDir string) error {
+	exPolInfo.once.Do(func() {
+		const envName = "STACKER_SQUASHFS_EXTRACT_POLICY"
+		const defPolicy = "kmount squashfuse unsquashfs"
+		val := os.Getenv(envName)
+		if val == "" {
+			val = defPolicy
+		}
+		exPolInfo.policy, exPolInfo.err = NewExtractPolicy(strings.Fields(val)...)
+		if extractPolicyErr == nil {
+			for k, v := range exPolInfo.policy.Excuses {
+				log.Debugf(" squashfs extractor %s is not available: %v", k, v)
+			}
+		}
+	})
+
+	if exPolInfo.err != nil {
+		return exPolInfo.err
 	}
-	return errors.Errorf("Unable to extract squash archive %s", squashFile)
+
+	return ExtractSingleSquashPolicy(squashFile, extractDir, exPolInfo.policy)
 }
 
 func mksquashfsSupportsZstd() bool {
@@ -353,6 +615,19 @@ func mksquashfsSupportsZstd() bool {
 	return zstdIsSuspported
 }
 
+func isEmptyDir(path string) (bool, error) {
+	fh, err := os.Open(path)
+	if err != nil {
+		return false, err
+	}
+
+	_, err = fh.ReadDir(1)
+	if err == io.EOF {
+		return true, nil
+	}
+	return false, err
+}
+
 // which - like the unix utility, return empty string for not-found.
 // this might fit well in lib/, but currently lib's test imports
 // squashfs creating a import loop.
diff --git a/pkg/types/layer_type.go b/pkg/types/layer_type.go
index 6d1a1353..652b2749 100644
--- a/pkg/types/layer_type.go
+++ b/pkg/types/layer_type.go
@@ -15,6 +15,13 @@ type LayerType struct {
 	Verity squashfs.VerityMetadata
 }
 
+func (lt LayerType) String() string {
+	if lt.Verity {
+		return fmt.Sprintf(lt.Type + "+verity")
+	}
+	return lt.Type
+}
+
 func (lt LayerType) MarshalText() ([]byte, error) {
 	return []byte(fmt.Sprintf("%s+%v", lt.Type, lt.Verity)), nil
 }