Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: experiment with custom git archive command #424

Draft
wants to merge 29 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
a926a1a
wip
keegancsmith Sep 16, 2022
e7e65e6
fix test
keegancsmith Sep 16, 2022
ccf69b3
same output as git archive for "tar t"
keegancsmith Sep 16, 2022
950b26a
capture state in archiveWriter struct for better readability
keegancsmith Sep 16, 2022
89c6970
set mode
keegancsmith Sep 16, 2022
db2d110
do not do dotgit detection since it brakes bare repos
keegancsmith Sep 17, 2022
91ffa5b
cpu_profile flag
keegancsmith Sep 19, 2022
ee1cc02
introduce manual stack for better profile output
keegancsmith Sep 19, 2022
04081cc
try out keepdescriptors
keegancsmith Sep 19, 2022
6500eb1
add memprofile
keegancsmith Sep 20, 2022
753864c
optionally buffer output if GIT_SG_BUFFER is set
keegancsmith Sep 20, 2022
cebf65b
add experimental GIT_SG_FILTER which just filters git archive
keegancsmith Sep 20, 2022
83325cf
getting started on git-cat-file integration
keegancsmith Sep 21, 2022
15902a2
add contents method for git-cat-file
keegancsmith Sep 21, 2022
0027f41
handle missing refs
keegancsmith Sep 21, 2022
c95ac4f
factor out common logic in cat-file
keegancsmith Sep 21, 2022
5daae67
add hash native API to catfile
keegancsmith Sep 21, 2022
801d2a4
make archive writer based on tree entries instead of object.TreeEntry
keegancsmith Sep 21, 2022
f7a1f9c
move archive code into own file
keegancsmith Sep 21, 2022
a4cdae0
wip interface to allow swapping out backend for archive writer
keegancsmith Sep 21, 2022
ae81031
implement TreeEntries for cat-file
keegancsmith Sep 22, 2022
d707154
test all modes
keegancsmith Sep 22, 2022
d88c93f
wip lstree
keegancsmith Sep 22, 2022
d29b6ba
refactor catfile to separate out gitCatFileBatchReader
keegancsmith Sep 22, 2022
01adb4e
ls-tree just writing an archive of names
keegancsmith Sep 22, 2022
d45e927
ls-tree implemented
keegancsmith Sep 23, 2022
5040bd7
skip TestDo on CI if missing .git
keegancsmith Sep 23, 2022
b07c069
check for .git in all tests
keegancsmith Sep 23, 2022
615d1d9
implement archiver via git-lfs/gitobj
keegancsmith Oct 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add contents method for git-cat-file
  • Loading branch information
keegancsmith committed Oct 10, 2022
commit 15902a28bb798d1e84e2e64e72125d68258d6bd3
81 changes: 78 additions & 3 deletions cmd/git-sg/catfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bufio"
"bytes"
"encoding/hex"
"fmt"
"io"
"log"
"os"
Expand All @@ -19,6 +20,11 @@ type gitCatFileBatch struct {
inCloser io.Closer
out *bufio.Reader
outCloser io.Closer

// readerN is the amount left to read for Read. Note: git-cat-file always
// has a trailing new line, so this will always be the size of an object +
// 1.
readerN int64
}

func startGitCatFileBatch(dir string) (_ *gitCatFileBatch, err error) {
Expand Down Expand Up @@ -74,6 +80,42 @@ func (g *gitCatFileBatch) Info(ref string) (gitCatFileBatchInfo, error) {
return gitCatFileBatchInfo{}, err
}

if err := g.discard(); err != nil {
g.kill()
return gitCatFileBatchInfo{}, err
}

line, err := g.out.ReadSlice('\n')
if err != nil {
g.kill()
return gitCatFileBatchInfo{}, err
}

info, err := parseGitCatFileBatchInfoLine(line)
if err != nil {
g.kill()
return gitCatFileBatchInfo{}, err
}

g.readerN = 0

return info, nil
}

func (g *gitCatFileBatch) Contents(ref string) (gitCatFileBatchInfo, error) {
g.in.WriteString("contents ")
g.in.WriteString(ref)
g.in.WriteByte('\n')
if err := g.in.Flush(); err != nil {
g.kill()
return gitCatFileBatchInfo{}, err
}

if err := g.discard(); err != nil {
g.kill()
return gitCatFileBatchInfo{}, err
}

line, err := g.out.ReadSlice('\n')
if err != nil {
g.kill()
Expand All @@ -86,16 +128,45 @@ func (g *gitCatFileBatch) Info(ref string) (gitCatFileBatchInfo, error) {
return gitCatFileBatchInfo{}, err
}

g.readerN = info.Size + 1

return info, nil
}

func (g *gitCatFileBatch) Read(p []byte) (n int, err error) {
// We avoid reading the final byte (a newline). That will be handled by
// discard.
if g.readerN <= 1 {
return 0, io.EOF
}
if max := g.readerN - 1; int64(len(p)) > max {
p = p[0:max]
}
n, err = g.out.Read(p)
g.readerN -= int64(n)
return
}

// discard should be called before parsing a response to flush out any unread
// data since the last command.
func (g *gitCatFileBatch) discard() error {
if g.readerN > 0 {
n, err := g.out.Discard(int(g.readerN))
g.readerN -= int64(n)
return err
}
return nil
}

// parseGitCatFileBatchInfoLine parses the info line from git-cat-file. It
// expects the default format of:
//
// <oid> SP <type> SP <size> LF
func parseGitCatFileBatchInfoLine(line []byte) (gitCatFileBatchInfo, error) {
line = bytes.TrimRight(line, "\n")
origLine := line

// PERF this allocates much less than bytes.Split
next := func() []byte {
i := bytes.IndexByte(line, ' ')
if i < 0 {
Expand All @@ -113,17 +184,17 @@ func parseGitCatFileBatchInfoLine(line []byte) (gitCatFileBatchInfo, error) {
var err error
_, err = hex.Decode(info.Hash[:], next())
if err != nil {
return info, err
return info, fmt.Errorf("unexpected git-cat-file --batch info line %q: %w", string(origLine), err)
}

info.Type, err = plumbing.ParseObjectType(string(next()))
if err != nil {
return info, err
return info, fmt.Errorf("unexpected git-cat-file --batch info line %q: %w", string(origLine), err)
}

info.Size, err = strconv.ParseInt(string(next()), 10, 64)
if err != nil {
return info, err
return info, fmt.Errorf("unexpected git-cat-file --batch info line %q: %w", string(origLine), err)
}

return info, nil
Expand All @@ -136,6 +207,10 @@ func (g *gitCatFileBatch) Close() (err error) {
}
}()

if err := g.discard(); err != nil {
return err
}

// This Close will tell git to shutdown
if err := g.inCloser.Close(); err != nil {
return err
Expand Down
58 changes: 57 additions & 1 deletion cmd/git-sg/catfile_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
package main

import "testing"
import (
"io"
"testing"

"github.com/go-git/go-git/v5/plumbing"
"github.com/google/go-cmp/cmp"
)

func TestInfo(t *testing.T) {
p, err := startGitCatFileBatch("")
Expand All @@ -21,6 +27,56 @@ func TestInfo(t *testing.T) {
}
}

func TestContents(t *testing.T) {
p, err := startGitCatFileBatch("")
if err != nil {
t.Fatal(err)
}
defer p.Close()

info, err := p.Contents("HEAD")
if err != nil {
t.Fatal(err)
}

t.Log(info.Hash, info.Type, info.Size)

b, err := io.ReadAll(p)
if err != nil {
t.Fatal(err)
}
t.Log(string(b))

if len(b) != int(info.Size) {
t.Fatalf("amount read (%d) is different to object size (%d)", len(b), info.Size)
}
if info.Type != plumbing.CommitObject {
t.Fatalf("expected HEAD to be a commit, got %s", info.Type)
}

// Now lets fetch the object again via hash and see if it stays the same.
info2, err := p.Contents(info.Hash.String())
if err != nil {
t.Fatal(err)
}

if d := cmp.Diff(info, info2); d != "" {
t.Fatalf("info changed (-first, +second):\n%s", d)
}

b2, err := io.ReadAll(p)
if err != nil {
t.Fatal(err)
}
if d := cmp.Diff(b, b2); d != "" {
t.Fatalf("content changed (-first, +second):\n%s", d)
}

if err := p.Close(); err != nil {
t.Fatal(err)
}
}

func BenchmarkInfo(b *testing.B) {
p, err := startGitCatFileBatch("")
if err != nil {
Expand Down