Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use faster xz lib for decompression #2139

Merged
merged 1 commit into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,12 @@ require (
github.com/spf13/afero v1.11.0
github.com/spf13/cobra v1.8.1
github.com/stretchr/testify v1.9.0
github.com/ulikunitz/xz v0.5.12
github.com/wagoodman/go-partybus v0.0.0-20230516145632-8ccac152c651
github.com/wagoodman/go-presenter v0.0.0-20211015174752-f9c01afc824b
github.com/wagoodman/go-progress v0.0.0-20230925121702-07e42b3cdba0
golang.org/x/exp v0.0.0-20231108232855-2478ac86f678
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8
golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8
gorm.io/gorm v1.25.12
)

Expand Down Expand Up @@ -220,12 +222,10 @@ require (
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
github.com/tidwall/sjson v1.2.5 // indirect
github.com/ulikunitz/xz v0.5.12 // indirect
github.com/vbatts/go-mtree v0.5.4 // indirect
github.com/vbatts/tar-split v0.11.3 // indirect
github.com/vifraa/gopom v1.0.0 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
github.com/zclconf/go-cty v1.14.0 // indirect
github.com/zyedidia/generic v1.2.2-0.20230320175451-4410d2372cb1 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1083,8 +1083,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/exp v0.0.0-20231108232855-2478ac86f678 h1:mchzmB1XO2pMaKFRqk/+MV3mgGG96aqaPXaMifQU47w=
golang.org/x/exp v0.0.0-20231108232855-2478ac86f678/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 h1:aAcj0Da7eBAtrTp03QXWvm88pSyOt+UgdZw2BFZ+lEw=
golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8/go.mod h1:CQ1k9gNrJ50XIzaKCRR2hssIjF07kZFEiieALBM/ARQ=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
Expand Down
25 changes: 23 additions & 2 deletions internal/file/getter.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ import (

"github.com/hashicorp/go-getter"
"github.com/hashicorp/go-getter/helper/url"
"github.com/spf13/afero"
"github.com/wagoodman/go-progress"

"github.com/anchore/clio"
"github.com/anchore/grype/internal/stringutil"
"github.com/anchore/stereoscope/pkg/file"
)

var (
Expand Down Expand Up @@ -111,14 +113,33 @@ func withProgress(monitor *progress.Manual) func(client *getter.Client) error {
}

func mapToGetterClientOptions(monitors []*progress.Manual) []getter.ClientOption {
// TODO: This function is no longer needed once a generic `map` method is available.

var result []getter.ClientOption

for _, monitor := range monitors {
result = append(result, withProgress(monitor))
}

// derived from https://github.com/hashicorp/go-getter/blob/v2.2.3/decompress.go#L23-L63
fileSizeLimit := int64(5 * file.GB)

dec := getter.LimitedDecompressors(0, fileSizeLimit)
fs := afero.NewOsFs()
xzd := &xzDecompressor{
FileSizeLimit: fileSizeLimit,
Fs: fs,
}
txzd := &tarXzDecompressor{
FilesLimit: 0,
FileSizeLimit: fileSizeLimit,
Fs: fs,
}

dec["xz"] = xzd
dec["tar.xz"] = txzd
dec["txz"] = txzd

result = append(result, getter.WithDecompressors(dec))

return result
}

Expand Down
220 changes: 220 additions & 0 deletions internal/file/tar_xz_decompressor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
package file

import (
"archive/tar"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"

"github.com/spf13/afero"
"github.com/xi2/xz"
)

// Note: this is a copy of the TarXzDecompressor from https://github.com/hashicorp/go-getter/blob/v2.2.3/decompress_txz.go
// with the xz lib swapped out (for performance). A few adjustments were made:
// - refactored to use afero filesystem abstraction
// - fixed some linting issues

// TarXzDecompressor is an implementation of Decompressor that can
// decompress tar.xz files.
type tarXzDecompressor struct {
// FileSizeLimit limits the total size of all
// decompressed files.
//
// The zero value means no limit.
FileSizeLimit int64

// FilesLimit limits the number of files that are
// allowed to be decompressed.
//
// The zero value means no limit.
FilesLimit int

Fs afero.Fs
}

func (d *tarXzDecompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error {
// If we're going into a directory we should make that first
mkdir := dst
if !dir {
mkdir = filepath.Dir(dst)
}
if err := d.Fs.MkdirAll(mkdir, mode(0755, umask)); err != nil {
return err
}

// File first
f, err := d.Fs.Open(src)
if err != nil {
return err
}
defer f.Close()

// xz compression is second
txzR, err := xz.NewReader(f, 0)
if err != nil {
return fmt.Errorf("error opening an xz reader for %s: %s", src, err)
}

return untar(d.Fs, txzR, dst, src, dir, umask, d.FileSizeLimit, d.FilesLimit)
}

// untar is a shared helper for untarring an archive. The reader should provide
// an uncompressed view of the tar archive.
func untar(fs afero.Fs, input io.Reader, dst, src string, dir bool, umask os.FileMode, fileSizeLimit int64, filesLimit int) error { // nolint:funlen,gocognit
tarR := tar.NewReader(input)
done := false
dirHdrs := []*tar.Header{}
now := time.Now()

var (
fileSize int64
filesCount int
)

for {
if filesLimit > 0 {
filesCount++
if filesCount > filesLimit {
return fmt.Errorf("tar archive contains too many files: %d > %d", filesCount, filesLimit)
}
}

hdr, err := tarR.Next()
if err == io.EOF {
if !done {
// Empty archive
return fmt.Errorf("empty archive: %s", src)
}

break
}
if err != nil {
return err
}

switch hdr.Typeflag {
case tar.TypeSymlink, tar.TypeLink:
// to prevent any potential indirect traversal attacks
continue
case tar.TypeXGlobalHeader, tar.TypeXHeader:
// don't unpack extended headers as files
continue
}

path := dst
if dir {
// Disallow parent traversal
if containsDotDot(hdr.Name) {
return fmt.Errorf("entry contains '..': %s", hdr.Name)
}

path = filepath.Join(path, hdr.Name) // nolint:gosec // hdr.Name is checked above
}

fileInfo := hdr.FileInfo()

fileSize += fileInfo.Size()

if fileSizeLimit > 0 && fileSize > fileSizeLimit {
return fmt.Errorf("tar archive larger than limit: %d", fileSizeLimit)
}

if fileInfo.IsDir() {
if !dir {
return fmt.Errorf("expected a single file: %s", src)
}

// A directory, just make the directory and continue unarchiving...
if err := fs.MkdirAll(path, mode(0755, umask)); err != nil {
return err
}

// Record the directory information so that we may set its attributes
// after all files have been extracted
dirHdrs = append(dirHdrs, hdr)

continue
}
// There is no ordering guarantee that a file in a directory is
// listed before the directory
dstPath := filepath.Dir(path)

// Check that the directory exists, otherwise create it
if _, err := fs.Stat(dstPath); os.IsNotExist(err) {
if err := fs.MkdirAll(dstPath, mode(0755, umask)); err != nil {
return err
}
}

// We have a file. If we already decoded, then it is an error
if !dir && done {
return fmt.Errorf("expected a single file, got multiple: %s", src)
}

// Mark that we're done so future in single file mode errors
done = true

// Size limit is tracked using the returned file info.
err = copyReader(fs, path, tarR, hdr.FileInfo().Mode(), umask, 0)
if err != nil {
return err
}

// Set the access and modification time if valid, otherwise default to current time
aTime := now
mTime := now
if hdr.AccessTime.Unix() > 0 {
aTime = hdr.AccessTime
}
if hdr.ModTime.Unix() > 0 {
mTime = hdr.ModTime
}
if err := fs.Chtimes(path, aTime, mTime); err != nil {
return err
}
}

// Perform a final pass over extracted directories to update metadata
for _, dirHdr := range dirHdrs {
path := filepath.Join(dst, dirHdr.Name) // nolint:gosec // hdr.Name is checked above
// Chmod the directory since they might be created before we know the mode flags
if err := fs.Chmod(path, mode(dirHdr.FileInfo().Mode(), umask)); err != nil {
return err
}
// Set the mtime/atime attributes since they would have been changed during extraction
aTime := now
mTime := now
if dirHdr.AccessTime.Unix() > 0 {
aTime = dirHdr.AccessTime
}
if dirHdr.ModTime.Unix() > 0 {
mTime = dirHdr.ModTime
}
if err := fs.Chtimes(path, aTime, mTime); err != nil {
return err
}
}

return nil
}

// containsDotDot checks if the filepath value v contains a ".." entry.
// This will check filepath components by splitting along / or \. This
// function is copied directly from the Go net/http implementation.
func containsDotDot(v string) bool {
if !strings.Contains(v, "..") {
return false
}
for _, ent := range strings.FieldsFunc(v, isSlashRune) {
if ent == ".." {
return true
}
}
return false
}

func isSlashRune(r rune) bool { return r == '/' || r == '\\' }
Loading
Loading