From 2b152d06f10ec430a9c31982b403a6acdc134eb8 Mon Sep 17 00:00:00 2001 From: Joe Lanford Date: Thu, 3 Aug 2023 15:28:05 -0400 Subject: [PATCH] make io.Seeker implementation optional Signed-off-by: Joe Lanford --- benchmarks/benchmarks_test.go | 88 +++++++++++++++++++++++++++++++++-- example_stat_test.go | 2 +- file.go | 39 +++++++++------- fs.go | 71 +++++++++++++++++++++------- fs_test.go | 32 ++++++------- 5 files changed, 176 insertions(+), 56 deletions(-) diff --git a/benchmarks/benchmarks_test.go b/benchmarks/benchmarks_test.go index a5cdeae..de65884 100644 --- a/benchmarks/benchmarks_test.go +++ b/benchmarks/benchmarks_test.go @@ -2,6 +2,7 @@ package tarfs import ( "archive/tar" + "io" "io/fs" "math/rand" "os" @@ -33,6 +34,16 @@ func BenchmarkOpenTarThenReadFile_ManySmallFiles(b *testing.B) { } } +func BenchmarkOpenTarThenReadFile_ManySmallFiles_DisableSeek(b *testing.B) { + fileName := randomFileName["many-small-files.tar"] + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + openTarThenReadFile("many-small-files.tar", fileName, tarfs.DisableSeek(true)) + } +} + func BenchmarkOpenTarThenReadFile_FewLargeFiles(b *testing.B) { fileName := randomFileName["few-large-files.tar"] @@ -43,22 +54,56 @@ func BenchmarkOpenTarThenReadFile_FewLargeFiles(b *testing.B) { } } +func BenchmarkOpenTarThenReadFile_FewLargeFiles_DisableSeek(b *testing.B) { + fileName := randomFileName["few-large-files.tar"] + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + openTarThenReadFile("few-large-files.tar", fileName, tarfs.DisableSeek(true)) + } +} + func BenchmarkReadFile_ManySmallFiles(b *testing.B) { benchmarkReadFile(b, "many-small-files.tar") } +func BenchmarkReadFile_ManySmallFiles_DisableSeek(b *testing.B) { + benchmarkReadFile(b, "many-small-files.tar", tarfs.DisableSeek(true)) +} + func BenchmarkReadFile_FewLargeFiles(b *testing.B) { benchmarkReadFile(b, "few-large-files.tar") } -func benchmarkReadFile(b *testing.B, tarFileName string) { +func BenchmarkReadFile_FewLargeFiles_DisableSeek(b *testing.B) { + benchmarkReadFile(b, "few-large-files.tar", tarfs.DisableSeek(true)) +} + +func BenchmarkOpenAndReadFile_ManySmallFiles(b *testing.B) { + benchmarkOpenAndReadFile(b, "many-small-files.tar") +} + +func BenchmarkOpenAndReadFile_ManySmallFiles_DisableSeek(b *testing.B) { + benchmarkOpenAndReadFile(b, "many-small-files.tar", tarfs.DisableSeek(true)) +} + +func BenchmarkOpenAndReadFile_FewLargeFiles(b *testing.B) { + benchmarkOpenAndReadFile(b, "few-large-files.tar") +} + +func BenchmarkOpenAndReadFile_FewLargeFiles_DisableSeek(b *testing.B) { + benchmarkOpenAndReadFile(b, "few-large-files.tar", tarfs.DisableSeek(true)) +} + +func benchmarkReadFile(b *testing.B, tarFileName string, options ...tarfs.Option) { tf, err := os.Open(tarFileName) if err != nil { panic(err) } defer tf.Close() - tfs, err := tarfs.New(tf) + tfs, err := tarfs.New(tf, options...) if err != nil { panic(err) } @@ -74,7 +119,42 @@ func benchmarkReadFile(b *testing.B, tarFileName string) { } } -func openTarThenReadFile(tarName, fileName string) { +func benchmarkOpenAndReadFile(b *testing.B, tarFileName string, options ...tarfs.Option) { + tf, err := os.Open(tarFileName) + if err != nil { + panic(err) + } + defer tf.Close() + + tfs, err := tarfs.New(tf, options...) + if err != nil { + panic(err) + } + + fileName := randomFileName[tarFileName] + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + f, err := tfs.Open(fileName) + if err != nil { + panic(err) + } + st, err := f.Stat() + if err != nil { + panic(err) + } + buf := make([]byte, st.Size()) + if _, err := io.ReadFull(f, buf); err != nil { + panic(err) + } + if err := f.Close(); err != nil { + panic(err) + } + } +} + +func openTarThenReadFile(tarName, fileName string, options ...tarfs.Option) { tf, err := os.Open(tarName) if err != nil { panic(err) @@ -83,7 +163,7 @@ func openTarThenReadFile(tarName, fileName string) { var tfs fs.FS - tfs, err = tarfs.New(tf) + tfs, err = tarfs.New(tf, options...) if err != nil { panic(err) } diff --git a/example_stat_test.go b/example_stat_test.go index 745b556..4a40ab9 100644 --- a/example_stat_test.go +++ b/example_stat_test.go @@ -16,7 +16,7 @@ func Example_stat() { } defer tf.Close() - tfs, err := tarfs.New(tf) + tfs, err := tarfs.New(tf, tarfs.DisableSeek(true)) if err != nil { panic(err) } diff --git a/file.go b/file.go index b2c23c3..11eb59d 100644 --- a/file.go +++ b/file.go @@ -7,7 +7,7 @@ import ( type file struct { entry - r io.ReadSeeker + r io.Reader readDirPos int closed bool } @@ -51,22 +51,6 @@ func (f *file) Close() error { return nil } -var _ io.Seeker = &file{} - -func (f *file) Seek(offset int64, whence int) (int64, error) { - const op = "seek" - - if f.closed { - return 0, newErrClosed(op, f.Name()) - } - - if f.IsDir() { - return 0, newErrDir(op, f.Name()) - } - - return f.r.Seek(offset, whence) -} - var _ fs.ReadDirFile = &file{} func (f *file) ReadDir(n int) ([]fs.DirEntry, error) { @@ -100,3 +84,24 @@ func (f *file) ReadDir(n int) ([]fs.DirEntry, error) { return entries, nil } + +type fileSeeker struct { + file + seeker io.Seeker +} + +var _ io.Seeker = &fileSeeker{} + +func (f *fileSeeker) Seek(offset int64, whence int) (int64, error) { + const op = "seek" + + if f.closed { + return 0, newErrClosed(op, f.Name()) + } + + if f.IsDir() { + return 0, newErrDir(op, f.Name()) + } + + return f.seeker.Seek(offset, whence) +} diff --git a/fs.go b/fs.go index a76dfd6..0d4452f 100644 --- a/fs.go +++ b/fs.go @@ -19,8 +19,25 @@ type tarfs struct { entries map[string]fs.DirEntry } +type options struct { + disableSeek bool +} + +type Option func(*options) + +func DisableSeek(disable bool) func(*options) { + return func(o *options) { + o.disableSeek = disable + } +} + // New creates a new tar fs.FS from r -func New(r io.Reader) (fs.FS, error) { +func New(r io.Reader, opts ...Option) (fs.FS, error) { + o := &options{} + for _, opt := range opts { + opt(o) + } + tfs := &tarfs{make(map[string]fs.DirEntry)} tfs.entries["."] = newDirEntry(fs.FileInfoToDirEntry(fakeDirFileInfo("."))) @@ -61,7 +78,7 @@ func New(r io.Reader) (fs.FS, error) { if h.FileInfo().IsDir() { tfs.append(name, newDirEntry(de)) } else { - tfs.append(name, ®Entry{de, name, ra, cr.Count() - blockSize}) + tfs.append(name, ®Entry{de, o.disableSeek, name, ra, cr.Count() - blockSize}) } } @@ -240,14 +257,15 @@ type entry interface { readdir(path string) ([]fs.DirEntry, error) readfile(path string) ([]byte, error) entries(op, path string) ([]fs.DirEntry, error) - open() (*file, error) + open() (fs.File, error) } type regEntry struct { fs.DirEntry - name string - ra io.ReaderAt - offset int64 + disableSeek bool + name string + ra io.ReaderAt + offset int64 } var _ entry = ®Entry{} @@ -262,32 +280,49 @@ func (e *regEntry) readdir(path string) ([]fs.DirEntry, error) { } func (e *regEntry) readfile(path string) ([]byte, error) { - tr := tar.NewReader(io.NewSectionReader(e.ra, e.offset, 1<<63-1)) - - if _, err := tr.Next(); err != nil { + f, err := e.openReader() + if err != nil { return nil, err } - b := bytes.NewBuffer(make([]byte, 0, e.size())) - - if _, err := io.Copy(b, tr); err != nil { + buf := make([]byte, 0, e.size()) + if _, err := io.ReadFull(f, buf); err != nil { return nil, err } - - return b.Bytes(), nil + return buf, nil } func (e *regEntry) entries(op, path string) ([]fs.DirEntry, error) { return nil, newErrNotDir(op, path) } -func (e *regEntry) open() (*file, error) { - b, err := e.readfile("") +func (e *regEntry) openReader() (io.Reader, error) { + tr := tar.NewReader(io.NewSectionReader(e.ra, e.offset, 1<<63-1)) + + if _, err := tr.Next(); err != nil { + return nil, err + } + return tr, nil +} + +func (e *regEntry) open() (fs.File, error) { + tr, err := e.openReader() if err != nil { return nil, err } + if e.disableSeek { + return &file{e, tr, -1, false}, nil + } - return &file{e, bytes.NewReader(b), -1, false}, nil + b := bytes.NewBuffer(make([]byte, 0, e.size())) + if _, err := io.Copy(b, tr); err != nil { + return nil, err + } + r := bytes.NewReader(b.Bytes()) + return &fileSeeker{ + file: file{e, r, -1, false}, + seeker: r, + }, nil } type dirEntry struct { @@ -334,7 +369,7 @@ func (e *dirEntry) entries(op, path string) ([]fs.DirEntry, error) { return e._entries, nil } -func (e *dirEntry) open() (*file, error) { +func (e *dirEntry) open() (fs.File, error) { return &file{e, nil, 0, false}, nil } diff --git a/fs_test.go b/fs_test.go index 4a470c9..54da346 100644 --- a/fs_test.go +++ b/fs_test.go @@ -18,7 +18,7 @@ func TestFS(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) err = fstest.TestFS(tfs, "bar", "foo", "dir1", "dir1/dir11", "dir1/dir11/file111", "dir1/file11", "dir1/file12", "dir2", "dir2/dir21", "dir2/dir21/file211", "dir2/dir21/file212") @@ -32,7 +32,7 @@ func TestOpenInvalid(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, name := range []string{"/foo", "./foo", "foo/", "foo/../foo", "foo//bar"} { @@ -48,7 +48,7 @@ func TestOpenNotExist(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, name := range []string{"baz", "qwe", "foo/bar", "file11"} { @@ -64,7 +64,7 @@ func TestOpenThenStat(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, file := range []struct { @@ -100,7 +100,7 @@ func TestOpenThenReadAll(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, file := range []struct { @@ -132,7 +132,7 @@ func TestReadDir(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, dir := range []struct { @@ -159,7 +159,7 @@ func TestReadDirNotDir(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, name := range []string{"foo", "dir1/file12"} { @@ -175,7 +175,7 @@ func TestReadFile(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, file := range []struct { @@ -206,7 +206,7 @@ func TestStat(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, file := range []struct { @@ -237,7 +237,7 @@ func TestGlob(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for pattern, expected := range map[string][]string{ @@ -262,7 +262,7 @@ func TestSubThenReadDir(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) for _, dir := range []struct { @@ -294,7 +294,7 @@ func TestSubThenReadFile(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) name := "dir2" @@ -318,7 +318,7 @@ func TestReadOnDir(t *testing.T) { require.NoError(err) defer tf.Close() - tfs, err := New(tf) + tfs, err := New(tf, DisableSeek(true)) require.NoError(err) var dirs = []string{"dir1", "dir2/dir21", "."} @@ -344,7 +344,7 @@ func TestWithDotDirInArchive(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) err = fstest.TestFS(tfs, "bar", "foo", "dir1", "dir1/dir11", "dir1/dir11/file111", "dir1/file11", "dir1/file12", "dir2", "dir2/dir21", "dir2/dir21/file211", "dir2/dir21/file212") @@ -358,7 +358,7 @@ func TestWithNoDirEntriesInArchive(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) err = fstest.TestFS(tfs, "bar", "foo", "dir1", "dir1/dir11", "dir1/dir11/file111", "dir1/file11", "dir1/file12", "dir2", "dir2/dir21", "dir2/dir21/file211", "dir2/dir21/file212") @@ -372,7 +372,7 @@ func TestSparse(t *testing.T) { require.NoError(err) defer f.Close() - tfs, err := New(f) + tfs, err := New(f, DisableSeek(true)) require.NoError(err) err = fstest.TestFS(tfs, "file1", "file2")