Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid reopening packfile on every object access #852

Merged
merged 4 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 38 additions & 2 deletions gitindex/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ import (
"strconv"
"strings"

"github.com/go-git/go-billy/v5/osfs"
"github.com/go-git/go-git/v5/plumbing/cache"
"github.com/go-git/go-git/v5/storage/filesystem"
"github.com/sourcegraph/zoekt"
"github.com/sourcegraph/zoekt/build"
"github.com/sourcegraph/zoekt/ignore"
Expand Down Expand Up @@ -404,10 +407,12 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) {
}

opts.BuildOptions.RepositoryDescription.Source = opts.RepoDir
repo, err := git.PlainOpen(opts.RepoDir)

repo, repoCloser, err := openRepo(opts.RepoDir)
if err != nil {
return false, fmt.Errorf("git.PlainOpen: %w", err)
return false, fmt.Errorf("openRepo: %w", err)
}
defer repoCloser.Close()
jtibshirani marked this conversation as resolved.
Show resolved Hide resolved

if err := setTemplatesFromConfig(&opts.BuildOptions.RepositoryDescription, opts.RepoDir); err != nil {
log.Printf("setTemplatesFromConfig(%s): %s", opts.RepoDir, err)
Expand Down Expand Up @@ -572,6 +577,37 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) {
return true, builder.Finish()
}

// openRepo opens a git repository in a way that's optimized for indexing.
//
// It copies the relevant logic from git.PlainOpen, and enables the filesystem KeepDescriptors option. This
// caches the packfile handles, preventing the packfile from being opened then closed on every object access.
func openRepo(repoDir string) (*git.Repository, io.Closer, error) {
fs := osfs.New(repoDir)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I included all the parts from git.PlainOpen that seem relevant. I also checked the go-git examples for KeepDescriptors, and this is almost identical: https://github.com/go-git/go-git/blob/3a210b5157e7d2a57eeac29e492efa510d616a24/_examples/ls/main.go#L32-L42

However git.PlainOpen is quite complex and I could be missing something. Here's what I purposefully omitted:

  • Special handling for user home shortcuts ... I tested and this was not necessary 🤔
  • Detecting when .git is a file that points to another git directory (like if you're trying to directly index submodule directory). I don't think we do this in indexing, or ever would.

I've tested zoekt-git-index with all the following:

  • Repos with and without .git subdirectories
  • Relative paths like ../megarepo
  • Shortcut for user home like ~/code/megarepo
  • Updating Sourcegraph to use this version and reindexing a bunch of repos locally


// Check if the root directory exists.
if _, err := fs.Stat(""); err != nil {
if os.IsNotExist(err) {
return nil, nil, git.ErrRepositoryNotExists
}
return nil, nil, err
}

// If there's a .git directory, use that as the new root.
if _, err := fs.Stat(git.GitDirName); err == nil {
if fs, err = fs.Chroot(git.GitDirName); err != nil {
return nil, nil, fmt.Errorf("fs.Chroot: %w", err)
}
}

s := filesystem.NewStorageWithOptions(fs, cache.NewObjectLRUDefault(), filesystem.Options{
KeepDescriptors: true,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's the point of the whole change :) This also sets me up to set LargeObjectThreshold in a follow-up PR, which could prove even more important.

})

// Because we're keeping descriptors open, we need to close the storage object when we're done.
repo, err := git.Open(s, fs)
return repo, s, err
}

type repoPathRanks struct {
MeanRank float64 `json:"mean_reference_count"`
Paths map[string]float64 `json:"paths"`
Expand Down
78 changes: 78 additions & 0 deletions gitindex/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package gitindex
import (
"bytes"
"context"
"errors"
"fmt"
"net/url"
"os"
Expand Down Expand Up @@ -64,6 +65,83 @@ func TestIndexEmptyRepo(t *testing.T) {
}
}

func TestIndexNonexistentRepo(t *testing.T) {
dir := t.TempDir()
desc := zoekt.Repository{
Name: "nonexistent",
}
opts := Options{
RepoDir: "does/not/exist",
Branches: []string{"main"},
BuildOptions: build.Options{
RepositoryDescription: desc,
IndexDir: dir,
},
}

if _, err := IndexGitRepo(opts); err == nil {
t.Fatal("expected error, got none")
} else if !errors.Is(err, git.ErrRepositoryNotExists) {
t.Fatalf("expected git.ErrRepositoryNotExists, got %v", err)
}
}

func TestIndexTinyRepo(t *testing.T) {
// Create a repo with one file in it.
dir := t.TempDir()
executeCommand(t, dir, exec.Command("git", "init", "-b", "main", "repo"))
executeCommand(t, dir, exec.Command("git", "config", "--global", "user.name", "Thomas"))
executeCommand(t, dir, exec.Command("git", "config", "--global", "user.email", "[email protected]"))

if err := os.WriteFile(filepath.Join(dir, "repo", "file1.go"), []byte("package main\n\nfunc main() {}\n"), 0644); err != nil {
t.Fatalf("WriteFile: %v", err)
}

repoDir := filepath.Join(dir, "repo")
executeCommand(t, repoDir, exec.Command("git", "add", "."))
executeCommand(t, repoDir, exec.Command("git", "commit", "-m", "initial commit"))

// Test that indexing accepts both the repo directory, and the .git subdirectory.
for _, testDir := range []string{"repo", "repo/.git"} {
opts := Options{
RepoDir: filepath.Join(dir, testDir),
Branches: []string{"main"},
BuildOptions: build.Options{
RepositoryDescription: zoekt.Repository{Name: "repo"},
IndexDir: dir,
},
}

if _, err := IndexGitRepo(opts); err != nil {
t.Fatalf("unexpected error %v", err)
}

searcher, err := shards.NewDirectorySearcher(dir)
if err != nil {
t.Fatal("NewDirectorySearcher", err)
}

results, err := searcher.Search(context.Background(), &query.Const{Value: true}, &zoekt.SearchOptions{})
searcher.Close()

if err != nil {
t.Fatal("search failed", err)
}

if len(results.Files) != 1 {
t.Fatalf("got search result %v, want 1 file", results.Files)
}
}
}

func executeCommand(t *testing.T, dir string, cmd *exec.Cmd) *exec.Cmd {
cmd.Dir = dir
if err := cmd.Run(); err != nil {
t.Fatalf("cmd.Run: %v", err)
}
return cmd
}

func TestIndexDeltaBasic(t *testing.T) {
type branchToDocumentMap map[string][]zoekt.Document

Expand Down
Loading