From aaeeb646e45790ea1bb77ca06a25e3c900bc0029 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Mon, 28 Oct 2024 12:16:40 -0700 Subject: [PATCH] Avoid reopening packfile on every object access --- gitindex/index.go | 40 ++++++++++++++++++++-- gitindex/index_test.go | 78 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 2 deletions(-) diff --git a/gitindex/index.go b/gitindex/index.go index 183c3249a..b6105ef84 100644 --- a/gitindex/index.go +++ b/gitindex/index.go @@ -32,6 +32,9 @@ import ( "strconv" "strings" + "github.com/go-git/go-billy/v5/osfs" + "github.com/go-git/go-git/v5/plumbing/cache" + "github.com/go-git/go-git/v5/storage/filesystem" "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/build" "github.com/sourcegraph/zoekt/ignore" @@ -404,10 +407,12 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) { } opts.BuildOptions.RepositoryDescription.Source = opts.RepoDir - repo, err := git.PlainOpen(opts.RepoDir) + + repo, repoCloser, err := openRepo(opts.RepoDir) if err != nil { - return false, fmt.Errorf("git.PlainOpen: %w", err) + return false, fmt.Errorf("openRepo: %w", err) } + defer repoCloser.Close() if err := setTemplatesFromConfig(&opts.BuildOptions.RepositoryDescription, opts.RepoDir); err != nil { log.Printf("setTemplatesFromConfig(%s): %s", opts.RepoDir, err) @@ -572,6 +577,37 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) { return true, builder.Finish() } +// openRepo opens a git repository in a way that's optimized for indexing. +// +// It copies the relevant logic from git.PlainOpen, and enables the filesystem KeepDescriptors option. This +// caches the packfile handles, preventing the packfile from being opened then closed on every object access. +func openRepo(repoDir string) (*git.Repository, io.Closer, error) { + fs := osfs.New(repoDir) + + // Check if the root directory exists. + if _, err := fs.Stat(""); err != nil { + if os.IsNotExist(err) { + return nil, nil, git.ErrRepositoryNotExists + } + return nil, nil, err + } + + // If there's a .git directory, use that as the new root. + if _, err := fs.Stat(git.GitDirName); err == nil { + if fs, err = fs.Chroot(git.GitDirName); err != nil { + return nil, nil, fmt.Errorf("fs.Chroot: %w", err) + } + } + + s := filesystem.NewStorageWithOptions(fs, cache.NewObjectLRUDefault(), filesystem.Options{ + KeepDescriptors: true, + }) + + // Because we're keeping descriptors open, we need to close the storage object when we're done. + repo, err := git.Open(s, fs) + return repo, s, err +} + type repoPathRanks struct { MeanRank float64 `json:"mean_reference_count"` Paths map[string]float64 `json:"paths"` diff --git a/gitindex/index_test.go b/gitindex/index_test.go index d38b49f03..f655c960a 100644 --- a/gitindex/index_test.go +++ b/gitindex/index_test.go @@ -17,6 +17,7 @@ package gitindex import ( "bytes" "context" + "errors" "fmt" "net/url" "os" @@ -64,6 +65,83 @@ func TestIndexEmptyRepo(t *testing.T) { } } +func TestIndexNonexistentRepo(t *testing.T) { + dir := t.TempDir() + desc := zoekt.Repository{ + Name: "nonexistent", + } + opts := Options{ + RepoDir: "does/not/exist", + Branches: []string{"main"}, + BuildOptions: build.Options{ + RepositoryDescription: desc, + IndexDir: dir, + }, + } + + if _, err := IndexGitRepo(opts); err == nil { + t.Fatal("expected error, got none") + } else if !errors.Is(err, git.ErrRepositoryNotExists) { + t.Fatalf("expected git.ErrRepositoryNotExists, got %v", err) + } +} + +func TestIndexTinyRepo(t *testing.T) { + // Create a repo with one file in it. + dir := t.TempDir() + executeCommand(t, dir, exec.Command("git", "init", "-b", "main", "repo")) + executeCommand(t, dir, exec.Command("git", "config", "--global", "user.name", "Thomas")) + executeCommand(t, dir, exec.Command("git", "config", "--global", "user.email", "thomas@google.com")) + + if err := os.WriteFile(filepath.Join(dir, "repo", "file1.go"), []byte("package main\n\nfunc main() {}\n"), 0644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + repoDir := filepath.Join(dir, "repo") + executeCommand(t, repoDir, exec.Command("git", "add", ".")) + executeCommand(t, repoDir, exec.Command("git", "commit", "-m", "initial commit")) + + // Test that indexing accepts both the repo directory, and the .git subdirectory. + for _, testDir := range []string{"repo", "repo/.git"} { + opts := Options{ + RepoDir: filepath.Join(dir, testDir), + Branches: []string{"main"}, + BuildOptions: build.Options{ + RepositoryDescription: zoekt.Repository{Name: "repo"}, + IndexDir: dir, + }, + } + + if _, err := IndexGitRepo(opts); err != nil { + t.Fatalf("unexpected error %v", err) + } + + searcher, err := shards.NewDirectorySearcher(dir) + if err != nil { + t.Fatal("NewDirectorySearcher", err) + } + + results, err := searcher.Search(context.Background(), &query.Const{Value: true}, &zoekt.SearchOptions{}) + searcher.Close() + + if err != nil { + t.Fatal("search failed", err) + } + + if len(results.Files) != 1 { + t.Fatalf("got search result %v, want 1 file", results.Files) + } + } +} + +func executeCommand(t *testing.T, dir string, cmd *exec.Cmd) *exec.Cmd { + cmd.Dir = dir + if err := cmd.Run(); err != nil { + t.Fatalf("cmd.Run: %v", err) + } + return cmd +} + func TestIndexDeltaBasic(t *testing.T) { type branchToDocumentMap map[string][]zoekt.Document