diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 92fa156..641968d 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -2,28 +2,26 @@ name: Go on: push: - branches: [ main ] + branches: [main] pull_request: jobs: - build: strategy: matrix: - go: ["1.16", "1.17", "1.18", "1.19"] + go: ["1.17", "1.18", "1.19", "1.20"] os: ["ubuntu-latest", "windows-latest", "macos-latest"] runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - name: Set up Go - uses: actions/setup-go@v2 - with: - stable: false + - name: Set up Go + uses: actions/setup-go@v4 + with: go-version: ${{matrix.go}} - - name: Build - run: go build -v ./... + - name: Build + run: go build -v ./... - - name: Test - run: go test -v ./... + - name: Test + run: go test -v ./... diff --git a/README.md b/README.md index f970f0f..ed779b2 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ ## Usage -⚠️ go-tarfs needs go>=1.16 +⚠️ go-tarfs needs go>=1.17 Install: ```sh diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore new file mode 100644 index 0000000..d874ad6 --- /dev/null +++ b/benchmarks/.gitignore @@ -0,0 +1 @@ +*.tar diff --git a/benchmarks/benchmarks_test.go b/benchmarks/benchmarks_test.go new file mode 100644 index 0000000..f4fff32 --- /dev/null +++ b/benchmarks/benchmarks_test.go @@ -0,0 +1,270 @@ +package tarfs + +import ( + "archive/tar" + "io" + "io/fs" + "math/rand" + "os" + "testing" + + "github.com/nlepage/go-tarfs" +) + +const chars = "abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVXYZ0123456789" + +var ( + randomFileName = make(map[string]string) +) + +func TestMain(m *testing.M) { + rand.Seed(3827653748965) + generateTarFile("many-small-files.tar", 10000, 1, 10000) + generateTarFile("few-large-files.tar", 10, 10000000, 100000000) + os.Exit(m.Run()) +} + +func BenchmarkOpenTarThenReadFile_ManySmallFiles(b *testing.B) { + fileName := randomFileName["many-small-files.tar"] + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + openTarThenReadFile("many-small-files.tar", fileName) + } +} + +func BenchmarkOpenTarThenReadFile_ManySmallFiles_DisableSeek(b *testing.B) { + fileName := randomFileName["many-small-files.tar"] + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + openTarThenReadFile("many-small-files.tar", fileName, tarfs.DisableSeek(true)) + } +} + +func BenchmarkOpenTarThenReadFile_FewLargeFiles(b *testing.B) { + fileName := randomFileName["few-large-files.tar"] + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + openTarThenReadFile("few-large-files.tar", fileName) + } +} + +func BenchmarkOpenTarThenReadFile_FewLargeFiles_DisableSeek(b *testing.B) { + fileName := randomFileName["few-large-files.tar"] + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + openTarThenReadFile("few-large-files.tar", fileName, tarfs.DisableSeek(true)) + } +} + +func BenchmarkReadFile_ManySmallFiles(b *testing.B) { + benchmarkReadFile(b, "many-small-files.tar") +} + +func BenchmarkReadFile_ManySmallFiles_DisableSeek(b *testing.B) { + benchmarkReadFile(b, "many-small-files.tar", tarfs.DisableSeek(true)) +} + +func BenchmarkReadFile_FewLargeFiles(b *testing.B) { + benchmarkReadFile(b, "few-large-files.tar") +} + +func BenchmarkReadFile_FewLargeFiles_DisableSeek(b *testing.B) { + benchmarkReadFile(b, "few-large-files.tar", tarfs.DisableSeek(true)) +} + +func BenchmarkOpenReadAndCloseFile_ManySmallFiles(b *testing.B) { + benchmarkOpenReadAndCloseFile(b, "many-small-files.tar") +} + +func BenchmarkOpenReadAndCloseFile_ManySmallFiles_DisableSeek(b *testing.B) { + benchmarkOpenReadAndCloseFile(b, "many-small-files.tar", tarfs.DisableSeek(true)) +} + +func BenchmarkOpenReadAndCloseFile_FewLargeFiles(b *testing.B) { + benchmarkOpenReadAndCloseFile(b, "few-large-files.tar") +} + +func BenchmarkOpenReadAndCloseFile_FewLargeFiles_DisableSeek(b *testing.B) { + benchmarkOpenReadAndCloseFile(b, "few-large-files.tar", tarfs.DisableSeek(true)) +} + +func BenchmarkOpenAndCloseFile_ManySmallFiles(b *testing.B) { + benchmarkOpenAndCloseFile(b, "many-small-files.tar") +} + +func BenchmarkOpenAndCloseFile_ManySmallFiles_DisableSeek(b *testing.B) { + benchmarkOpenAndCloseFile(b, "many-small-files.tar", tarfs.DisableSeek(true)) +} + +func BenchmarkOpenAndCloseFile_FewLargeFiles(b *testing.B) { + benchmarkOpenAndCloseFile(b, "few-large-files.tar") +} + +func BenchmarkOpenAndCloseFile_FewLargeFiles_DisableSeek(b *testing.B) { + benchmarkOpenAndCloseFile(b, "few-large-files.tar", tarfs.DisableSeek(true)) +} + +func benchmarkReadFile(b *testing.B, tarFileName string, options ...tarfs.Option) { + tf, err := os.Open(tarFileName) + if err != nil { + panic(err) + } + defer tf.Close() + + tfs, err := tarfs.New(tf, options...) + if err != nil { + panic(err) + } + + fileName := randomFileName[tarFileName] + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + if _, err := fs.ReadFile(tfs, fileName); err != nil { + panic(err) + } + } +} + +func benchmarkOpenAndCloseFile(b *testing.B, tarFileName string, options ...tarfs.Option) { + tf, err := os.Open(tarFileName) + if err != nil { + panic(err) + } + defer tf.Close() + + tfs, err := tarfs.New(tf, options...) + if err != nil { + panic(err) + } + + fileName := randomFileName[tarFileName] + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + f, err := tfs.Open(fileName) + if err != nil { + panic(err) + } + if err := f.Close(); err != nil { + panic(err) + } + } +} + +func benchmarkOpenReadAndCloseFile(b *testing.B, tarFileName string, options ...tarfs.Option) { + tf, err := os.Open(tarFileName) + if err != nil { + panic(err) + } + defer tf.Close() + + tfs, err := tarfs.New(tf, options...) + if err != nil { + panic(err) + } + + fileName := randomFileName[tarFileName] + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + f, err := tfs.Open(fileName) + if err != nil { + panic(err) + } + st, err := f.Stat() + if err != nil { + panic(err) + } + buf := make([]byte, st.Size()) + if _, err := io.ReadFull(f, buf); err != nil { + panic(err) + } + if err := f.Close(); err != nil { + panic(err) + } + } +} + +func openTarThenReadFile(tarName, fileName string, options ...tarfs.Option) { + tf, err := os.Open(tarName) + if err != nil { + panic(err) + } + defer tf.Close() + + var tfs fs.FS + + tfs, err = tarfs.New(tf, options...) + if err != nil { + panic(err) + } + + if _, err := fs.ReadFile(tfs, fileName); err != nil { + panic(err) + } +} + +func generateTarFile(tarName string, numFiles int, minSize, maxSize int) { + f, err := os.Create(tarName) + if err != nil { + panic(err) + } + defer f.Close() + + w := tar.NewWriter(f) + buf := make([]byte, 1024) + randomFileIndex := rand.Intn(numFiles) + defer w.Close() + + for i := 0; i < numFiles; i++ { + nameLength := rand.Intn(100) + 10 + fileName := "" + for j := 0; j < nameLength; j++ { + fileName += string(chars[rand.Intn(len(chars))]) + } + + if i == randomFileIndex { + randomFileName[tarName] = fileName + } + + bytesToWrite := rand.Intn(maxSize-minSize) + minSize + + if err := w.WriteHeader(&tar.Header{ + Name: fileName, + Typeflag: tar.TypeReg, + Size: int64(bytesToWrite), + }); err != nil { + panic(err) + } + + for bytesToWrite != 0 { + if _, err := rand.Read(buf); err != nil { + panic(err) + } + + if bytesToWrite < 1024 { + if _, err := w.Write(buf[:bytesToWrite]); err != nil { + panic(err) + } + bytesToWrite = 0 + } else { + if _, err := w.Write(buf); err != nil { + panic(err) + } + bytesToWrite -= 1024 + } + } + } +} diff --git a/errors.go b/errors.go index e1b5f87..2640f50 100644 --- a/errors.go +++ b/errors.go @@ -11,10 +11,22 @@ var ( ErrDir = errors.New("is a directory") ) -func newErrNotDir(op, name string) error { - return &fs.PathError{Op: op, Path: name, Err: ErrNotDir} +func newErrNotDir(op, path string) error { + return newErr(op, path, ErrNotDir) } -func newErrDir(op, name string) error { - return &fs.PathError{Op: op, Path: name, Err: ErrDir} +func newErrDir(op, path string) error { + return newErr(op, path, ErrDir) +} + +func newErrClosed(op, path string) error { + return newErr(op, path, fs.ErrClosed) +} + +func newErrNotExist(op, path string) error { + return newErr(op, path, fs.ErrNotExist) +} + +func newErr(op, path string, err error) error { + return &fs.PathError{Op: op, Path: path, Err: err} } diff --git a/example_stat_test.go b/example_stat_test.go index 745b556..4a40ab9 100644 --- a/example_stat_test.go +++ b/example_stat_test.go @@ -16,7 +16,7 @@ func Example_stat() { } defer tf.Close() - tfs, err := tarfs.New(tf) + tfs, err := tarfs.New(tf, tarfs.DisableSeek(true)) if err != nil { panic(err) } diff --git a/file.go b/file.go index bc3584e..5f56fd8 100644 --- a/file.go +++ b/file.go @@ -1,149 +1,107 @@ package tarfs import ( - "bytes" - "fmt" "io" "io/fs" - "time" ) type file struct { - fs.DirEntry - r io.ReadSeeker + entry + r io.Reader readDirPos int -} - -func newFile(e fs.DirEntry) *file { - switch e := e.(type) { - case *entry: - return &file{e, bytes.NewReader(e.b), 0} - case *fakeDirEntry: - return &file{e, nil, 0} - default: - panic(fmt.Sprintf("unknown entry type: %T", e)) - } + closed bool } var _ fs.File = &file{} func (f *file) Stat() (fs.FileInfo, error) { + const op = "stat" + + if f.closed { + return nil, newErrClosed(op, f.Name()) + } + return f.Info() } func (f *file) Read(b []byte) (int, error) { + const op = "read" + + if f.closed { + return 0, newErrClosed(op, f.Name()) + } + if f.IsDir() { - return 0, newErrDir("read", f.Name()) + return 0, newErrDir(op, f.Name()) } return f.r.Read(b) } func (f *file) Close() error { - return nil -} - -var _ io.Seeker = &file{} + const op = "close" -func (f *file) Seek(offset int64, whence int) (int64, error) { - if f.IsDir() { - return 0, newErrDir("seek", f.Name()) + if f.closed { + return newErrClosed(op, f.Name()) } - return f.r.Seek(offset, whence) + f.r = nil // FIXME update documentation, this is no more a noop + f.closed = true + + return nil } var _ fs.ReadDirFile = &file{} func (f *file) ReadDir(n int) ([]fs.DirEntry, error) { - if !f.IsDir() { - return nil, newErrNotDir("readdir", f.Name()) + const op = "readdir" + + if f.closed { + return nil, newErrClosed(op, f.Name()) } - var entries = f.DirEntry.(entries).get() + allEntries, err := f.entry.entries(op, f.Name()) + if err != nil { + return nil, err + } - if f.readDirPos >= len(entries) { + if f.readDirPos >= len(allEntries) { if n <= 0 { return nil, nil } return nil, io.EOF } - if n > 0 && f.readDirPos+n <= len(entries) { - entries = entries[f.readDirPos : f.readDirPos+n] - f.readDirPos += n - } else { - entries = entries[f.readDirPos:] - f.readDirPos += len(entries) + if n <= 0 || f.readDirPos+n > len(allEntries) { + n = len(allEntries) - f.readDirPos } - return entries, nil -} + entries := make([]fs.DirEntry, n) -type rootFile struct { - tfs *tarfs - readDirPos int -} + copy(entries, allEntries[f.readDirPos:]) -var _ fs.File = &rootFile{} + f.readDirPos += n -func (rf *rootFile) Stat() (fs.FileInfo, error) { - return rf, nil + return entries, nil } -func (*rootFile) Read([]byte) (int, error) { - return 0, newErrDir("read", ".") +type fileSeeker struct { + file + seeker io.Seeker } -func (*rootFile) Close() error { - return nil -} +var _ io.ReadSeeker = &fileSeeker{} -var _ fs.ReadDirFile = &rootFile{} +func (f *fileSeeker) Seek(offset int64, whence int) (int64, error) { + const op = "seek" -func (rf *rootFile) ReadDir(n int) ([]fs.DirEntry, error) { - if rf.readDirPos >= len(rf.tfs.rootEntries) { - if n <= 0 { - return nil, nil - } - return nil, io.EOF + if f.closed { + return 0, newErrClosed(op, f.Name()) } - var entries []fs.DirEntry - - if n > 0 && rf.readDirPos+n <= len(rf.tfs.rootEntries) { - entries = rf.tfs.rootEntries[rf.readDirPos : rf.readDirPos+n] - rf.readDirPos += n - } else { - entries = rf.tfs.rootEntries[rf.readDirPos:] - rf.readDirPos += len(entries) + if f.IsDir() { + return 0, newErrDir(op, f.Name()) } - return entries, nil -} - -var _ fs.FileInfo = &rootFile{} - -func (rf *rootFile) Name() string { - return "." -} - -func (rf *rootFile) Size() int64 { - return 0 -} - -func (rf *rootFile) Mode() fs.FileMode { - return fs.ModeDir | 0755 -} - -func (rf *rootFile) ModTime() time.Time { - return time.Time{} -} - -func (rf *rootFile) IsDir() bool { - return true -} - -func (rf *rootFile) Sys() interface{} { - return nil + return f.seeker.Seek(offset, whence) } diff --git a/fs.go b/fs.go index 33bf0f2..3c3e4cb 100644 --- a/fs.go +++ b/fs.go @@ -11,16 +11,53 @@ import ( "time" ) +const ( + blockSize = 512 // Size of each block in a tar stream +) + type tarfs struct { - entries map[string]fs.DirEntry - rootEntries []fs.DirEntry - rootEntry fs.DirEntry + entries map[string]fs.DirEntry +} + +type options struct { + disableSeek bool +} + +type Option func(*options) + +func DisableSeek(disable bool) func(*options) { + return func(o *options) { + o.disableSeek = disable + } } // New creates a new tar fs.FS from r -func New(r io.Reader) (fs.FS, error) { - tr := tar.NewReader(r) - tfs := &tarfs{make(map[string]fs.DirEntry), make([]fs.DirEntry, 0, 10), nil} +func New(r io.Reader, opts ...Option) (fs.FS, error) { + o := &options{} + for _, opt := range opts { + opt(o) + } + + tfs := &tarfs{make(map[string]fs.DirEntry)} + tfs.entries["."] = newDirEntry(fs.FileInfoToDirEntry(fakeDirFileInfo("."))) + + ra, isReaderAt := r.(readReaderAt) + if !isReaderAt { + buf, err := io.ReadAll(r) + if err != nil { + return nil, err + } + ra = bytes.NewReader(buf) + } + + var cr readCounterIface + if rs, isReadSeeker := ra.(io.ReadSeeker); isReadSeeker { + cr = &readSeekCounter{ReadSeeker: rs} + } else { + cr = &readCounter{Reader: ra} + } + + tr := tar.NewReader(cr) for { h, err := tr.Next() @@ -36,36 +73,76 @@ func New(r io.Reader) (fs.FS, error) { continue } - buf := bytes.NewBuffer(make([]byte, 0, int(h.Size))) - if _, err := io.Copy(buf, tr); err != nil { - return nil, err - } - - e := &entry{h, buf.Bytes(), nil} + de := fs.FileInfoToDirEntry(h.FileInfo()) - tfs.append(name, e) + if h.FileInfo().IsDir() { + tfs.append(name, newDirEntry(de)) + } else { + tfs.append(name, ®Entry{de, o.disableSeek, name, ra, cr.Count() - blockSize}) + } } return tfs, nil } +type readReaderAt interface { + io.Reader + io.ReaderAt +} + +type readCounterIface interface { + io.Reader + Count() int64 +} + +type readCounter struct { + io.Reader + off int64 +} + +func (cr *readCounter) Read(p []byte) (n int, err error) { + n, err = cr.Reader.Read(p) + cr.off += int64(n) + return +} + +func (cr *readCounter) Count() int64 { + return cr.off +} + +type readSeekCounter struct { + io.ReadSeeker + off int64 +} + +func (cr *readSeekCounter) Read(p []byte) (n int, err error) { + n, err = cr.ReadSeeker.Read(p) + cr.off += int64(n) + return +} + +func (cr *readSeekCounter) Seek(offset int64, whence int) (abs int64, err error) { + abs, err = cr.ReadSeeker.Seek(offset, whence) + cr.off = abs + return +} + +func (cr *readSeekCounter) Count() int64 { + return cr.off +} + func (tfs *tarfs) append(name string, e fs.DirEntry) { tfs.entries[name] = e dir := path.Dir(name) - if dir == "." { - tfs.rootEntries = append(tfs.rootEntries, e) - return - } - if parent, ok := tfs.entries[dir]; ok { - parent := parent.(entries) + parent := parent.(*dirEntry) parent.append(e) return } - parent := &fakeDirEntry{path.Base(dir), nil} + parent := newDirEntry(fs.FileInfoToDirEntry(fakeDirFileInfo(path.Base(dir)))) tfs.append(dir, parent) @@ -75,83 +152,47 @@ func (tfs *tarfs) append(name string, e fs.DirEntry) { var _ fs.FS = &tarfs{} func (tfs *tarfs) Open(name string) (fs.File, error) { - if name == "." { - if tfs.rootEntry == nil { - return &rootFile{tfs: tfs}, nil - } - return newFile(tfs.rootEntry), nil - } + const op = "open" - e, err := tfs.get(name, "open") + e, err := tfs.get(op, name) if err != nil { return nil, err } - return newFile(e), nil + return e.open() } var _ fs.ReadDirFS = &tarfs{} func (tfs *tarfs) ReadDir(name string) ([]fs.DirEntry, error) { - if name == "." { - return tfs.rootEntries, nil - } - - e, err := tfs.get(name, "readdir") + e, err := tfs.get("readdir", name) if err != nil { return nil, err } - if !e.IsDir() { - return nil, newErrNotDir("readdir", name) - } - - entries := e.(entries).get() - - sort.Slice(entries, func(i, j int) bool { return entries[i].Name() < entries[j].Name() }) - - return entries, nil + return e.readdir(name) } var _ fs.ReadFileFS = &tarfs{} func (tfs *tarfs) ReadFile(name string) ([]byte, error) { - if name == "." { - return nil, newErrDir("readfile", name) - } - - e, err := tfs.get(name, "readfile") + e, err := tfs.get("readfile", name) if err != nil { return nil, err } - if e.IsDir() { - return nil, newErrDir("readfile", name) - } - - ee := e.(*entry) - - buf := make([]byte, len(ee.b)) - copy(buf, ee.b) - return buf, nil + return e.readfile(name) } var _ fs.StatFS = &tarfs{} func (tfs *tarfs) Stat(name string) (fs.FileInfo, error) { - if name == "." { - if tfs.rootEntry == nil { - return &rootFile{tfs: tfs}, nil - } - return tfs.rootEntry.Info() - } - - e, err := tfs.get(name, "stat") + e, err := tfs.get("stat", name) if err != nil { return nil, err } - return e.(fileInfo).FileInfo() + return e.Info() } var _ fs.GlobFS = &tarfs{} @@ -172,20 +213,21 @@ func (tfs *tarfs) Glob(pattern string) (matches []string, _ error) { var _ fs.SubFS = &tarfs{} func (tfs *tarfs) Sub(dir string) (fs.FS, error) { + const op = "sub" + if dir == "." { return tfs, nil } - e, err := tfs.get(dir, "sub") + e, err := tfs.get(op, dir) if err != nil { return nil, err } - if !e.IsDir() { - return nil, newErrNotDir("sub", dir) - } + subfs := &tarfs{make(map[string]fs.DirEntry)} + + subfs.entries["."] = e - subfs := &tarfs{make(map[string]fs.DirEntry), e.(entries).get(), e} prefix := dir + "/" for name, file := range tfs.entries { if strings.HasPrefix(name, prefix) { @@ -196,121 +238,181 @@ func (tfs *tarfs) Sub(dir string) (fs.FS, error) { return subfs, nil } -func (tfs *tarfs) get(name, op string) (fs.DirEntry, error) { - if !fs.ValidPath(name) { - return nil, &fs.PathError{Op: op, Path: name, Err: fs.ErrInvalid} +func (tfs *tarfs) get(op, path string) (entry, error) { + if !fs.ValidPath(path) { + return nil, newErr(op, path, fs.ErrInvalid) } - e, ok := tfs.entries[name] + e, ok := tfs.entries[path] if !ok { - return nil, &fs.PathError{Op: op, Path: name, Err: fs.ErrNotExist} + return nil, newErrNotExist(op, path) } - return e, nil + return e.(entry), nil } -type entries interface { - append(fs.DirEntry) - get() []fs.DirEntry +type entry interface { + fs.DirEntry + size() int64 + readdir(path string) ([]fs.DirEntry, error) + readfile(path string) ([]byte, error) + entries(op, path string) ([]fs.DirEntry, error) + open() (fs.File, error) } -type fileInfo interface { - FileInfo() (fs.FileInfo, error) +type regEntry struct { + fs.DirEntry + disableSeek bool + name string + ra io.ReaderAt + offset int64 } -type entry struct { - h *tar.Header - b []byte - entries []fs.DirEntry -} +var _ entry = ®Entry{} -var _ fs.DirEntry = &entry{} +func (e *regEntry) size() int64 { + info, _ := e.Info() // err is necessarily nil + return info.Size() +} -func (e *entry) Name() string { - return e.h.FileInfo().Name() +func (e *regEntry) readdir(path string) ([]fs.DirEntry, error) { + return nil, newErrNotDir("readdir", path) } -func (e *entry) IsDir() bool { - return e.h.FileInfo().IsDir() +func (e *regEntry) readfile(path string) ([]byte, error) { + f, err := e.openReader() + if err != nil { + return nil, err + } + + b := bytes.NewBuffer(make([]byte, 0, e.size())) + if _, err := io.Copy(b, f); err != nil { + return nil, err + } + return b.Bytes(), nil } -func (e *entry) Type() fs.FileMode { - return e.h.FileInfo().Mode() & fs.ModeType +func (e *regEntry) entries(op, path string) ([]fs.DirEntry, error) { + return nil, newErrNotDir(op, path) } -func (e *entry) Info() (fs.FileInfo, error) { - return e.h.FileInfo(), nil +func (e *regEntry) openReader() (io.Reader, error) { + tr := tar.NewReader(io.NewSectionReader(e.ra, e.offset, 1<<63-1)) + + if _, err := tr.Next(); err != nil { + return nil, err + } + return tr, nil } -var _ entries = &entry{} +func (e *regEntry) open() (fs.File, error) { + tr, err := e.openReader() + if err != nil { + return nil, err + } + if e.disableSeek { + return &file{e, tr, -1, false}, nil + } -func (e *entry) append(c fs.DirEntry) { - e.entries = append(e.entries, c) + b := bytes.NewBuffer(make([]byte, 0, e.size())) + if _, err := io.Copy(b, tr); err != nil { + return nil, err + } + r := bytes.NewReader(b.Bytes()) + return &fileSeeker{ + file: file{e, r, -1, false}, + seeker: r, + }, nil } -func (e *entry) get() []fs.DirEntry { - return e.entries +type dirEntry struct { + fs.DirEntry + _entries []fs.DirEntry + sorted bool } -var _ fileInfo = &entry{} +func newDirEntry(e fs.DirEntry) *dirEntry { + return &dirEntry{e, make([]fs.DirEntry, 0, 10), false} +} -func (e *entry) FileInfo() (fs.FileInfo, error) { - return e.h.FileInfo(), nil +func (e *dirEntry) append(c fs.DirEntry) { + e._entries = append(e._entries, c) } -type fakeDirEntry struct { - name string - entries []fs.DirEntry +var _ entry = &dirEntry{} + +func (e *dirEntry) size() int64 { + return 0 } -var _ fs.DirEntry = &fakeDirEntry{} +func (e *dirEntry) readdir(path string) ([]fs.DirEntry, error) { + if !e.sorted { + sort.Sort(entriesByName(e._entries)) + } + + entries := make([]fs.DirEntry, len(e._entries)) -func (e *fakeDirEntry) Name() string { - return e.name + copy(entries, e._entries) + + return entries, nil } -func (*fakeDirEntry) IsDir() bool { - return true +func (e *dirEntry) readfile(path string) ([]byte, error) { + return nil, newErrDir("readfile", path) } -func (*fakeDirEntry) Type() fs.FileMode { - return fs.ModeDir +func (e *dirEntry) entries(op, path string) ([]fs.DirEntry, error) { + if !e.sorted { + sort.Sort(entriesByName(e._entries)) + } + + return e._entries, nil } -func (e *fakeDirEntry) Info() (fs.FileInfo, error) { - return e, nil +func (e *dirEntry) open() (fs.File, error) { + return &file{e, nil, 0, false}, nil } -var _ fs.FileInfo = &fakeDirEntry{} +type fakeDirFileInfo string -func (*fakeDirEntry) Mode() fs.FileMode { - return fs.ModeDir +var _ fs.FileInfo = fakeDirFileInfo("") + +func (e fakeDirFileInfo) Name() string { + return string(e) } -func (*fakeDirEntry) Size() int64 { +func (fakeDirFileInfo) Size() int64 { return 0 } -func (*fakeDirEntry) ModTime() time.Time { +func (fakeDirFileInfo) Mode() fs.FileMode { + return fs.ModeDir +} + +func (fakeDirFileInfo) ModTime() time.Time { return time.Time{} } -func (*fakeDirEntry) Sys() interface{} { +func (fakeDirFileInfo) IsDir() bool { + return true +} + +func (fakeDirFileInfo) Sys() interface{} { return nil } -var _ entries = &fakeDirEntry{} +type entriesByName []fs.DirEntry -func (e *fakeDirEntry) append(c fs.DirEntry) { - e.entries = append(e.entries, c) -} +var _ sort.Interface = entriesByName{} -func (e *fakeDirEntry) get() []fs.DirEntry { - return e.entries +func (entries entriesByName) Less(i, j int) bool { + return entries[i].Name() < entries[j].Name() } -var _ fileInfo = &fakeDirEntry{} +func (entries entriesByName) Len() int { + return len(entries) +} -func (e *fakeDirEntry) FileInfo() (fs.FileInfo, error) { - return e, nil +func (entries entriesByName) Swap(i, j int) { + entries[i], entries[j] = entries[j], entries[i] } diff --git a/fs_test.go b/fs_test.go index bc493ca..54da346 100644 --- a/fs_test.go +++ b/fs_test.go @@ -1,6 +1,7 @@ package tarfs import ( + "io" "io/fs" "os" "testing" @@ -17,7 +18,7 @@ func TestFS(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) err = fstest.TestFS(tfs, "bar", "foo", "dir1", "dir1/dir11", "dir1/dir11/file111", "dir1/file11", "dir1/file12", "dir2", "dir2/dir21", "dir2/dir21/file211", "dir2/dir21/file212") @@ -31,7 +32,7 @@ func TestOpenInvalid(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, name := range []string{"/foo", "./foo", "foo/", "foo/../foo", "foo//bar"} { @@ -47,7 +48,7 @@ func TestOpenNotExist(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, name := range []string{"baz", "qwe", "foo/bar", "file11"} { @@ -63,7 +64,7 @@ func TestOpenThenStat(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, file := range []struct { @@ -92,6 +93,38 @@ func TestOpenThenStat(t *testing.T) { } } +func TestOpenThenReadAll(t *testing.T) { + require, assert := require.New(t), assert.New(t) + + f, err := os.Open("test.tar") + require.NoError(err) + defer f.Close() + + tfs, err := New(f, DisableSeek(true)) + require.NoError(err) + + for _, file := range []struct { + path string + content []byte + }{ + {"foo", []byte("foo")}, + {"bar", []byte("bar")}, + {"dir1/file11", []byte("file11")}, + } { + f, err := tfs.Open(file.path) + if !assert.NoErrorf(err, "when tarfs.Open(%#v)", file.path) { + continue + } + + content, err := io.ReadAll(f) + if !assert.NoErrorf(err, "when io.ReadAll(file{%#v})", file.path) { + continue + } + + assert.Equalf(file.content, content, "content of %#v", file.path) + } +} + func TestReadDir(t *testing.T) { require, assert := require.New(t), assert.New(t) @@ -99,7 +132,7 @@ func TestReadDir(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, dir := range []struct { @@ -126,7 +159,7 @@ func TestReadDirNotDir(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, name := range []string{"foo", "dir1/file12"} { @@ -142,20 +175,27 @@ func TestReadFile(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) - for name, content := range map[string]string{ - "dir1/dir11/file111": "file111", - "dir2/dir21/file212": "file212", - "foo": "foo", + for _, file := range []struct { + path string + content string + }{ + {"bar", "bar"}, + {"dir1/dir11/file111", "file111"}, + {"dir1/file11", "file11"}, + {"dir1/file12", "file12"}, + {"dir2/dir21/file211", "file211"}, + {"dir2/dir21/file212", "file212"}, + {"foo", "foo"}, } { - b, err := fs.ReadFile(tfs, name) - if !assert.NoErrorf(err, "when fs.ReadFile(tfs, %#v)", name) { + b, err := fs.ReadFile(tfs, file.path) + if !assert.NoErrorf(err, "when fs.ReadFile(tfs, %#v)", file.path) { continue } - assert.Equalf(content, string(b), "in %#v", name) + assert.Equalf(file.content, string(b), "in %#v", file.path) } } @@ -166,7 +206,7 @@ func TestStat(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, file := range []struct { @@ -197,12 +237,12 @@ func TestGlob(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for pattern, expected := range map[string][]string{ "*/*2*": {"dir1/file12", "dir2/dir21"}, - "*": {"bar", "dir1", "dir2", "foo"}, + "*": {"bar", "dir1", "dir2", "foo", "."}, "*/*/*": {"dir1/dir11/file111", "dir2/dir21/file211", "dir2/dir21/file212"}, "*/*/*/*": nil, } { @@ -222,7 +262,7 @@ func TestSubThenReadDir(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, dir := range []struct { @@ -254,7 +294,7 @@ func TestSubThenReadFile(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) name := "dir2" @@ -278,7 +318,7 @@ func TestReadOnDir(t *testing.T) { require.NoError(err) defer tf.Close() - tfs, err := New(tf) + tfs, err := New(tf, DisableSeek(true)) require.NoError(err) var dirs = []string{"dir1", "dir2/dir21", "."} @@ -297,72 +337,32 @@ func TestReadOnDir(t *testing.T) { } } -func TestWalkDir_WithDotDirInArchive(t *testing.T) { +func TestWithDotDirInArchive(t *testing.T) { require := require.New(t) - tf, err := os.Open("test-with-dot-dir.tar") + f, err := os.Open("test-with-dot-dir.tar") require.NoError(err) - defer tf.Close() + defer f.Close() - tfs, err := New(tf) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) - paths := make([]string, 0, 12) - - err = fs.WalkDir(tfs, ".", func(path string, d fs.DirEntry, err error) error { - paths = append(paths, path) - return nil - }) + err = fstest.TestFS(tfs, "bar", "foo", "dir1", "dir1/dir11", "dir1/dir11/file111", "dir1/file11", "dir1/file12", "dir2", "dir2/dir21", "dir2/dir21/file211", "dir2/dir21/file212") require.NoError(err) - - require.ElementsMatch([]string{ - ".", - "bar", - "foo", - "dir1", - "dir1/dir11", - "dir1/dir11/file111", - "dir1/file11", - "dir1/file12", - "dir2", - "dir2/dir21", - "dir2/dir21/file211", - "dir2/dir21/file212", - }, paths) } -func TestWalkDir_WithNoDirEntriesInArchive(t *testing.T) { +func TestWithNoDirEntriesInArchive(t *testing.T) { require := require.New(t) - tf, err := os.Open("test-no-directory-entries.tar") + f, err := os.Open("test-no-directory-entries.tar") require.NoError(err) - defer tf.Close() + defer f.Close() - tfs, err := New(tf) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) - paths := make([]string, 0, 12) - - err = fs.WalkDir(tfs, ".", func(path string, d fs.DirEntry, err error) error { - paths = append(paths, path) - return nil - }) + err = fstest.TestFS(tfs, "bar", "foo", "dir1", "dir1/dir11", "dir1/dir11/file111", "dir1/file11", "dir1/file12", "dir2", "dir2/dir21", "dir2/dir21/file211", "dir2/dir21/file212") require.NoError(err) - - require.ElementsMatch([]string{ - ".", - "bar", - "foo", - "dir1", - "dir1/dir11", - "dir1/dir11/file111", - "dir1/file11", - "dir1/file12", - "dir2", - "dir2/dir21", - "dir2/dir21/file211", - "dir2/dir21/file212", - }, paths) } func TestSparse(t *testing.T) { @@ -372,7 +372,7 @@ func TestSparse(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) err = fstest.TestFS(tfs, "file1", "file2") diff --git a/go.mod b/go.mod index 79f46b5..a805672 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,11 @@ module github.com/nlepage/go-tarfs -go 1.16 +go 1.17 require github.com/stretchr/testify v1.7.1 + +require ( + github.com/davecgh/go-spew v1.1.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect +) diff --git a/go.sum b/go.sum index 307d5b4..2dca7c9 100644 --- a/go.sum +++ b/go.sum @@ -2,10 +2,7 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=