Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use single map for collecting files across branches #839

Merged
merged 2 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions cmd/zoekt-repo-index/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
"github.com/sourcegraph/zoekt"
"github.com/sourcegraph/zoekt/build"
"github.com/sourcegraph/zoekt/gitindex"
"github.com/sourcegraph/zoekt/ignore"
"go.uber.org/automaxprocs/maxprocs"

git "github.com/go-git/go-git/v5"
Expand Down Expand Up @@ -180,7 +181,7 @@ func main() {
}
}

perBranch := map[string]map[fileKey]gitindex.BlobRepo{}
perBranch := map[string]map[fileKey]gitindex.BlobLocation{}
opts.SubRepositories = map[string]*zoekt.Repository{}

// branch => repo => version
Expand Down Expand Up @@ -325,8 +326,8 @@ func getManifest(repo *git.Repository, branch, path string) (*manifest.Manifest,
func iterateManifest(mf *manifest.Manifest,
baseURL url.URL, revPrefix string,
cache *gitindex.RepoCache,
) (map[fileKey]gitindex.BlobRepo, map[string]plumbing.Hash, error) {
allFiles := map[fileKey]gitindex.BlobRepo{}
) (map[fileKey]gitindex.BlobLocation, map[string]plumbing.Hash, error) {
allFiles := map[fileKey]gitindex.BlobLocation{}
allVersions := map[string]plumbing.Hash{}
for _, p := range mf.Project {
rev := mf.ProjectRevision(&p)
Expand Down Expand Up @@ -359,20 +360,21 @@ func iterateManifest(mf *manifest.Manifest,
return nil, nil, err
}

files, versions, err := gitindex.TreeToFiles(topRepo, tree, projURL.String(), cache)
rw := gitindex.NewRepoWalker(topRepo, projURL.String(), cache)
subVersions, err := rw.CollectFiles(tree, rev, &ignore.Matcher{})
if err != nil {
return nil, nil, err
}

for key, repo := range files {
for key, repo := range rw.Files {
allFiles[fileKey{
SubRepoPath: filepath.Join(p.GetPath(), key.SubRepoPath),
Path: key.Path,
ID: key.ID,
}] = repo
}

for path, version := range versions {
for path, version := range subVersions {
allVersions[filepath.Join(p.GetPath(), path)] = version
}
}
Expand Down
63 changes: 23 additions & 40 deletions gitindex/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) {
}

// branch => (path, sha1) => repo.
var repos map[fileKey]BlobIndexInfo
var repos map[fileKey]BlobLocation

// Branch => Repo => SHA1
var branchVersions map[string]map[string]plumbing.Hash
Expand All @@ -452,7 +452,7 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) {
}
}

reposByPath := map[string]BlobIndexInfo{}
reposByPath := map[string]BlobLocation{}
for key, info := range repos {
reposByPath[key.SubRepoPath] = info
}
Expand All @@ -461,9 +461,9 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) {
for path, info := range reposByPath {
tpl := opts.BuildOptions.RepositoryDescription
if path != "" {
tpl = zoekt.Repository{URL: info.Repo.URL.String()}
if err := SetTemplatesFromOrigin(&tpl, info.Repo.URL); err != nil {
log.Printf("setTemplatesFromOrigin(%s, %s): %s", path, info.Repo.URL, err)
tpl = zoekt.Repository{URL: info.URL.String()}
if err := SetTemplatesFromOrigin(&tpl, info.URL); err != nil {
log.Printf("setTemplatesFromOrigin(%s, %s): %s", path, info.URL, err)
}
}
opts.BuildOptions.SubRepositories[path] = &tpl
Expand Down Expand Up @@ -592,11 +592,11 @@ func newIgnoreMatcher(tree *object.Tree) (*ignore.Matcher, error) {

// prepareDeltaBuildFunc is a function that calculates the necessary metadata for preparing
// a build.Builder instance for generating a delta build.
type prepareDeltaBuildFunc func(options Options, repository *git.Repository) (repos map[fileKey]BlobIndexInfo, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error)
type prepareDeltaBuildFunc func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error)

// prepareNormalBuildFunc is a function that calculates the necessary metadata for preparing
// a build.Builder instance for generating a normal build.
type prepareNormalBuildFunc func(options Options, repository *git.Repository) (repos map[fileKey]BlobIndexInfo, branchVersions map[string]map[string]plumbing.Hash, err error)
type prepareNormalBuildFunc func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, err error)

type gitIndexConfig struct {
// prepareDeltaBuild, if not nil, is the function that is used to calculate the metadata that will be used to
Expand All @@ -612,7 +612,7 @@ type gitIndexConfig struct {
prepareNormalBuild prepareNormalBuildFunc
}

func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobIndexInfo, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) {
func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) {
if options.Submodules {
return nil, nil, nil, fmt.Errorf("delta builds currently don't support submodule indexing")
}
Expand Down Expand Up @@ -670,7 +670,7 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f
}

// branch => (path, sha1) => repo.
repos = map[fileKey]BlobIndexInfo{}
repos = map[fileKey]BlobLocation{}

// branch name -> git worktree at most current commit
branchToCurrentTree := make(map[string]*object.Tree, len(options.Branches))
Expand All @@ -696,12 +696,7 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f
}

// TODO: Support repository submodules for delta builds
// For this prototype, we are ignoring repository submodules, which means that we can use the same
// blob location for all files
hackSharedBlobLocation := BlobRepo{
GitRepo: repository,
URL: u,
}

// loop over all branches, calculate the diff between our
// last indexed commit and the current commit, and add files mentioned in the diff
for _, branch := range existingRepository.Branches {
Expand Down Expand Up @@ -742,8 +737,9 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f
existing.Branches = append(existing.Branches, branch.Name)
repos[file] = existing
} else {
repos[file] = BlobIndexInfo{
Repo: hackSharedBlobLocation,
repos[file] = BlobLocation{
GitRepo: repository,
URL: u,
Branches: []string{branch.Name},
}
}
Expand Down Expand Up @@ -780,8 +776,9 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f
existing.Branches = append(existing.Branches, b)
repos[file] = existing
} else {
repos[file] = BlobIndexInfo{
Repo: hackSharedBlobLocation,
repos[file] = BlobLocation{
GitRepo: repository,
URL: u,
Branches: []string{b},
}
}
Expand All @@ -806,15 +803,12 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f
return repos, nil, changedOrDeletedPaths, nil
}

func prepareNormalBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobIndexInfo, branchVersions map[string]map[string]plumbing.Hash, err error) {
func prepareNormalBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, err error) {
var repoCache *RepoCache
if options.Submodules {
repoCache = NewRepoCache(options.RepoCacheDir)
}

// branch => (path, sha1) => metadata.
repos = map[fileKey]BlobIndexInfo{}

// Branch => Repo => SHA1
branchVersions = map[string]map[string]plumbing.Hash{}

Expand All @@ -823,6 +817,7 @@ func prepareNormalBuild(options Options, repository *git.Repository) (repos map[
return nil, nil, fmt.Errorf("expandBranches: %w", err)
}

rw := NewRepoWalker(repository, options.BuildOptions.RepositoryDescription.URL, repoCache)
for _, b := range branches {
commit, err := getCommit(repository, options.BranchPrefix, b)
if err != nil {
Expand All @@ -843,35 +838,23 @@ func prepareNormalBuild(options Options, repository *git.Repository) (repos map[
return nil, nil, fmt.Errorf("newIgnoreMatcher: %w", err)
}

files, subVersions, err := TreeToFiles(repository, tree, options.BuildOptions.RepositoryDescription.URL, repoCache)
subVersions, err := rw.CollectFiles(tree, b, ig)
if err != nil {
return nil, nil, fmt.Errorf("TreeToFiles: %w", err)
}
for k, v := range files {
if ig.Match(k.Path) {
continue
}

if existing, ok := repos[k]; ok {
existing.Branches = append(existing.Branches, b)
repos[k] = existing
} else {
repos[k] = BlobIndexInfo{Repo: v, Branches: []string{b}}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice optimization getting rid of this loop!

return nil, nil, fmt.Errorf("CollectFiles: %w", err)
}

branchVersions[b] = subVersions
}

return repos, branchVersions, nil
return rw.Files, branchVersions, nil
}

func createDocument(key fileKey,
repos map[fileKey]BlobIndexInfo,
repos map[fileKey]BlobLocation,
ranks repoPathRanks,
opts build.Options,
) (zoekt.Document, error) {
repo := repos[key].Repo
repo := repos[key]
blob, err := repo.GitRepo.BlobObject(key.ID)
branches := repos[key].Branches

Expand Down
4 changes: 2 additions & 2 deletions gitindex/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -608,13 +608,13 @@ func TestIndexDeltaBasic(t *testing.T) {
// setup: prepare spy versions of prepare delta / normal build so that we can observe
// whether they were called appropriately
deltaBuildCalled := false
prepareDeltaSpy := func(options Options, repository *git.Repository) (repos map[fileKey]BlobIndexInfo, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) {
prepareDeltaSpy := func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) {
deltaBuildCalled = true
return prepareDeltaBuild(options, repository)
}

normalBuildCalled := false
prepareNormalSpy := func(options Options, repository *git.Repository) (repos map[fileKey]BlobIndexInfo, branchVersions map[string]map[string]plumbing.Hash, err error) {
prepareNormalSpy := func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, err error) {
normalBuildCalled = true
return prepareNormalBuild(options, repository)
}
Expand Down
Loading
Loading