Skip to content

Commit

Permalink
Avoid reopening packfile on every object access
Browse files Browse the repository at this point in the history
  • Loading branch information
jtibshirani authored and Thomas committed Oct 29, 2024
1 parent 7caa174 commit aaeeb64
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 2 deletions.
40 changes: 38 additions & 2 deletions gitindex/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ import (
"strconv"
"strings"

"github.com/go-git/go-billy/v5/osfs"
"github.com/go-git/go-git/v5/plumbing/cache"
"github.com/go-git/go-git/v5/storage/filesystem"
"github.com/sourcegraph/zoekt"
"github.com/sourcegraph/zoekt/build"
"github.com/sourcegraph/zoekt/ignore"
Expand Down Expand Up @@ -404,10 +407,12 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) {
}

opts.BuildOptions.RepositoryDescription.Source = opts.RepoDir
repo, err := git.PlainOpen(opts.RepoDir)

repo, repoCloser, err := openRepo(opts.RepoDir)
if err != nil {
return false, fmt.Errorf("git.PlainOpen: %w", err)
return false, fmt.Errorf("openRepo: %w", err)
}
defer repoCloser.Close()

if err := setTemplatesFromConfig(&opts.BuildOptions.RepositoryDescription, opts.RepoDir); err != nil {
log.Printf("setTemplatesFromConfig(%s): %s", opts.RepoDir, err)
Expand Down Expand Up @@ -572,6 +577,37 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) {
return true, builder.Finish()
}

// openRepo opens a git repository in a way that's optimized for indexing.
//
// It copies the relevant logic from git.PlainOpen, and enables the filesystem KeepDescriptors option. This
// caches the packfile handles, preventing the packfile from being opened then closed on every object access.
func openRepo(repoDir string) (*git.Repository, io.Closer, error) {
fs := osfs.New(repoDir)

// Check if the root directory exists.
if _, err := fs.Stat(""); err != nil {
if os.IsNotExist(err) {
return nil, nil, git.ErrRepositoryNotExists
}
return nil, nil, err
}

// If there's a .git directory, use that as the new root.
if _, err := fs.Stat(git.GitDirName); err == nil {
if fs, err = fs.Chroot(git.GitDirName); err != nil {
return nil, nil, fmt.Errorf("fs.Chroot: %w", err)
}
}

s := filesystem.NewStorageWithOptions(fs, cache.NewObjectLRUDefault(), filesystem.Options{
KeepDescriptors: true,
})

// Because we're keeping descriptors open, we need to close the storage object when we're done.
repo, err := git.Open(s, fs)
return repo, s, err
}

type repoPathRanks struct {
MeanRank float64 `json:"mean_reference_count"`
Paths map[string]float64 `json:"paths"`
Expand Down
78 changes: 78 additions & 0 deletions gitindex/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package gitindex
import (
"bytes"
"context"
"errors"
"fmt"
"net/url"
"os"
Expand Down Expand Up @@ -64,6 +65,83 @@ func TestIndexEmptyRepo(t *testing.T) {
}
}

func TestIndexNonexistentRepo(t *testing.T) {
dir := t.TempDir()
desc := zoekt.Repository{
Name: "nonexistent",
}
opts := Options{
RepoDir: "does/not/exist",
Branches: []string{"main"},
BuildOptions: build.Options{
RepositoryDescription: desc,
IndexDir: dir,
},
}

if _, err := IndexGitRepo(opts); err == nil {
t.Fatal("expected error, got none")
} else if !errors.Is(err, git.ErrRepositoryNotExists) {
t.Fatalf("expected git.ErrRepositoryNotExists, got %v", err)
}
}

func TestIndexTinyRepo(t *testing.T) {
// Create a repo with one file in it.
dir := t.TempDir()
executeCommand(t, dir, exec.Command("git", "init", "-b", "main", "repo"))
executeCommand(t, dir, exec.Command("git", "config", "--global", "user.name", "Thomas"))
executeCommand(t, dir, exec.Command("git", "config", "--global", "user.email", "[email protected]"))

if err := os.WriteFile(filepath.Join(dir, "repo", "file1.go"), []byte("package main\n\nfunc main() {}\n"), 0644); err != nil {
t.Fatalf("WriteFile: %v", err)
}

repoDir := filepath.Join(dir, "repo")
executeCommand(t, repoDir, exec.Command("git", "add", "."))
executeCommand(t, repoDir, exec.Command("git", "commit", "-m", "initial commit"))

// Test that indexing accepts both the repo directory, and the .git subdirectory.
for _, testDir := range []string{"repo", "repo/.git"} {
opts := Options{
RepoDir: filepath.Join(dir, testDir),
Branches: []string{"main"},
BuildOptions: build.Options{
RepositoryDescription: zoekt.Repository{Name: "repo"},
IndexDir: dir,
},
}

if _, err := IndexGitRepo(opts); err != nil {
t.Fatalf("unexpected error %v", err)
}

searcher, err := shards.NewDirectorySearcher(dir)
if err != nil {
t.Fatal("NewDirectorySearcher", err)
}

results, err := searcher.Search(context.Background(), &query.Const{Value: true}, &zoekt.SearchOptions{})
searcher.Close()

if err != nil {
t.Fatal("search failed", err)
}

if len(results.Files) != 1 {
t.Fatalf("got search result %v, want 1 file", results.Files)
}
}
}

func executeCommand(t *testing.T, dir string, cmd *exec.Cmd) *exec.Cmd {
cmd.Dir = dir
if err := cmd.Run(); err != nil {
t.Fatalf("cmd.Run: %v", err)
}
return cmd
}

func TestIndexDeltaBasic(t *testing.T) {
type branchToDocumentMap map[string][]zoekt.Document

Expand Down

0 comments on commit aaeeb64

Please sign in to comment.