Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract all bufsync.*Funcs into a bufsync.Handler #2554

Merged
merged 6 commits into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 31 additions & 70 deletions private/buf/bufsync/backfill_tags_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.

package bufsync
package bufsync_test

import (
"context"
"fmt"
"testing"
"time"

"github.com/bufbuild/buf/private/buf/bufsync"
"github.com/bufbuild/buf/private/bufpkg/bufmodule/bufmoduleref"
"github.com/bufbuild/buf/private/pkg/command"
"github.com/bufbuild/buf/private/pkg/git"
Expand All @@ -36,76 +37,58 @@ func TestBackfilltags(t *testing.T) {
moduleIdentityInHEAD, err := bufmoduleref.NewModuleIdentity("buf.build", "acme", "foo")
require.NoError(t, err)
prepareGitRepoBackfillTags(t, repoDir, moduleIdentityInHEAD)
mockBSRChecker := newMockSyncGitChecker()
mockHandler := newMockSyncHandler()
// prepare the top 5 commits as syncable commits, mark the rest as if they were already synced
var (
commitCount int
allCommitsHashes []string
startSyncHash git.Hash
fakeNowCommitLimitTime time.Time // to be sent as a fake clock and discard "old" commits
)
require.NoError(t, repo.ForEachCommit(func(commit git.Commit) error {
allCommitsHashes = append(allCommitsHashes, commit.Hash().Hex())
commitCount++
if commitCount == 5 {
startSyncHash = commit.Hash()
if commitCount == 6 {
// mark this commit as synced; nothing after this needs to be marked because syncer
// won't travel past this
mockHandler.setSyncPoint(
defaultBranchName,
commit.Hash(),
moduleIdentityInHEAD,
)
}
if commitCount > 5 {
if commitCount == 15 {
// have the time limit at the commit 15 looking back
fakeNowCommitLimitTime = commit.Committer().Timestamp()
}
mockBSRChecker.markSynced(commit.Hash().Hex())
if commitCount == 15 {
// have the time limit at the commit 15 looking back
fakeNowCommitLimitTime = commit.Committer().Timestamp()
}
return nil
}))
mockTagsBackfiller := newMockTagsBackfiller()
mockClock := &mockClock{now: fakeNowCommitLimitTime.Add(lookbackTimeLimit)}
const moduleDir = "." // module is at the git root repo
testSyncer := syncer{
repo: repo,
storageGitProvider: storagegit.NewProvider(repo.Objects()),
logger: zaptest.NewLogger(t),
sortedModulesDirsForSync: []string{moduleDir},
modulesDirsToIdentityOverrideForSync: map[string]bufmoduleref.ModuleIdentity{moduleDir: nil},
syncedGitCommitChecker: mockBSRChecker.checkFunc(),
commitsToTags: make(map[string][]string),
modulesDirsToBranchesToIdentities: make(map[string]map[string]bufmoduleref.ModuleIdentity),
modulesToBranchesExpectedSyncPoints: make(map[string]map[string]string),
modulesIdentitiesToCommitsSyncedCache: make(map[string]map[string]struct{}),
tagsBackfiller: mockTagsBackfiller.backfillFunc(),
errorHandler: &mockErrorHandler{},
}
require.NoError(t, testSyncer.prepareSync(context.Background()))
require.NoError(t, testSyncer.backfillTags(
context.Background(),
moduleDir,
moduleIdentityInHEAD,
defaultBranchName,
startSyncHash,
mockClock,
))
// in total the repo has at least 20 commits, we expect to backfill 11 of them...
syncer, err := bufsync.NewSyncer(
zaptest.NewLogger(t),
&mockClock{now: fakeNowCommitLimitTime.Add(bufsync.LookbackTimeLimit)},
repo,
storagegit.NewProvider(repo.Objects()),
mockHandler,
bufsync.SyncerWithModule(moduleDir, nil),
)
require.NoError(t, err)
require.NoError(t, syncer.Sync(context.Background()))
// in total the repo has at least 20 commits, we expect to backfill 11 of them
// and sync the next 4 commits
assert.GreaterOrEqual(t, len(allCommitsHashes), 20)
assert.Len(t, mockTagsBackfiller.backfilledCommitsToTags, 11)
assert.Len(t, mockHandler.tagsByHash, 15)
// as follows:
for i, commitHash := range allCommitsHashes {
if i < 4 {
// the 4 most recent should not be backfilling anything, those are unsynced commits that will
// be synced by another func.
assert.NotContains(t, mockTagsBackfiller.backfilledCommitsToTags, commitHash)
} else if i < 15 {
if i < 15 {
// Between 0-4, the tags should be synced.
// Between 5-15 the tags should be backfilled.
//
// The commit #5 is the git start sync point, which will also be handled by sync because it's
// sometimes already synced and sometimes not. It's handled by both sync and backfill tags.
//
// The func it's backfilling more than 5 commits, because it needs to backfill until both
// conditions are met, at least 5 commits and at least 24 hours.
assert.Contains(t, mockTagsBackfiller.backfilledCommitsToTags, commitHash)
assert.Contains(t, mockHandler.tagsByHash, commitHash)
} else {
// past the #15 the commits are too old, we don't backfill back there
assert.NotContains(t, mockTagsBackfiller.backfilledCommitsToTags, commitHash)
assert.NotContains(t, mockHandler.tagsByHash, commitHash)
}
}
}
Expand Down Expand Up @@ -141,25 +124,3 @@ func prepareGitRepoBackfillTags(t *testing.T, repoDir string, moduleIdentity buf
time.Sleep(1 * time.Second)
doEmptyCommitAndTag(15)
}

type mockTagsBackfiller struct {
backfilledCommitsToTags map[string]struct{}
}

func newMockTagsBackfiller() mockTagsBackfiller {
return mockTagsBackfiller{backfilledCommitsToTags: make(map[string]struct{})}
}

func (b *mockTagsBackfiller) backfillFunc() TagsBackfiller {
return func(_ context.Context, _ bufmoduleref.ModuleIdentity, alreadySyncedHash git.Hash, _, _ git.Ident, _ []string) (string, error) {
// we don't really test which tags were backfilled, only which commits had its tags backfilled
b.backfilledCommitsToTags[alreadySyncedHash.Hex()] = struct{}{}
return "some-BSR-commit-name", nil
}
}

type mockClock struct {
now time.Time
}

func (c *mockClock) Now() time.Time { return c.now }
176 changes: 79 additions & 97 deletions private/buf/bufsync/bufsync.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"errors"
"fmt"
"time"

"github.com/bufbuild/buf/private/bufpkg/bufmodule/bufmoduleref"
"github.com/bufbuild/buf/private/pkg/git"
Expand Down Expand Up @@ -149,38 +150,95 @@ type ErrorHandler interface {
) error
}

// Syncer syncs a modules in a git.Repository.
// Handler is a handler for Syncer. It controls the way in which Syncer handles errors, provides
// any information the Syncer needs to Sync commits, and receives ModuleCommits that should be
// synced.
type Handler interface {
ErrorHandler

// SyncModuleCommit is invoked to process a sync point. If an error is returned, sync will abort.
SyncModuleCommit(ctx context.Context, commit ModuleCommit) error

// ResolveSyncPoint is invoked to resolve a syncpoint for a particular module at a particular branch.
// If no syncpoint is found, this function returns nil. If an error is returned, sync will abort.
ResolveSyncPoint(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
branch string,
) (git.Hash, error)

// CheckSyncedGitCommits is invoked when syncing branches to know which commits hashes from a set
// are already synced inthe BSR. It expects to receive the commit hashes that are synced already. If
// an error is returned, sync will abort.
CheckSyncedGitCommits(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
commitHashes map[string]struct{},
) (map[string]struct{}, error)

// GetModuleReleaseBranch is invoked before syncing, to gather release branch names for all the
// modules that are about to be synced. If the BSR module does not exist, the implementation should
// return `ModuleDoesNotExistErr` error.
GetModuleReleaseBranch(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
) (string, error)

// BackfillTags is invoked when a commit with valid modules is found within a lookback threshold
// past the start sync point for such module. The Syncer assumes that the "old" commit is already
// synced, so it will attempt to backfill existing tags using that git hash, in case they were
// recently created or moved there.
//
// A common scenario is SemVer releases: a commit is pushed to the default Git branch, the sync
// process triggers and completes, and some minutes later that commit is tagged "v1.2.3". The next
// time the sync command runs, this backfiller would pick such tag and backfill it to the correct
// BSR commit.
//
// It's expected to return the BSR commit name to which the tags were backfilled.
BackfillTags(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
alreadySyncedHash git.Hash,
author git.Ident,
committer git.Ident,
tags []string,
) (string, error)
}

// Syncer syncs modules in a git.Repository.
type Syncer interface {
// Sync syncs the repository using the provided SyncFunc. It processes commits in reverse
// topological order, loads any configured named modules, extracts any Git metadata for that
// commit, and invokes SyncFunc with a ModuleCommit.
Sync(context.Context, SyncFunc) error
// Sync syncs the repository. It processes commits in reverse topological order, loads any
// configured named modules, extracts any Git metadata for that commit, and invokes
// Handler#SyncModuleCommit with a ModuleCommit.
Sync(context.Context) error
}

// NewSyncer creates a new Syncer.
func NewSyncer(
logger *zap.Logger,
clock Clock,
repo git.Repository,
storageGitProvider storagegit.Provider,
errorHandler ErrorHandler,
handler Handler,
options ...SyncerOption,
) (Syncer, error) {
return newSyncer(
logger,
clock,
repo,
storageGitProvider,
errorHandler,
handler,
options...,
)
}

// SyncerOption configures the creation of a new Syncer.
type SyncerOption func(*syncer) error

// SyncerWithRemote configures a Syncer with a resumption using a SyncPointResolver.
func SyncerWithRemote(remoteName string) SyncerOption {
// SyncerWithGitRemote configures a Syncer to sync commits from particular Git remote.
func SyncerWithGitRemote(gitRemoteName string) SyncerOption {
return func(s *syncer) error {
s.remote = remoteName
s.gitRemoteName = gitRemoteName
return nil
}
}
Expand Down Expand Up @@ -208,43 +266,6 @@ func SyncerWithModule(moduleDir string, identityOverride bufmoduleref.ModuleIden
}
}

// SyncerWithResumption configures a Syncer with a resumption using a SyncPointResolver.
func SyncerWithResumption(resolver SyncPointResolver) SyncerOption {
return func(s *syncer) error {
s.syncPointResolver = resolver
return nil
}
}

// SyncerWithGitCommitChecker configures a git commit checker, to know if a module has a given git
// hash alrady synced in a BSR instance.
func SyncerWithGitCommitChecker(checker SyncedGitCommitChecker) SyncerOption {
return func(s *syncer) error {
s.syncedGitCommitChecker = checker
return nil
}
}

// SyncerWithModuleDefaultBranchGetter configures a getter for modules' default branch, to protect
// those branches syncs in cases like a Git history rewrite. If this option is not passed, the
// syncer treats the BSR default branch as any other branch.
func SyncerWithModuleDefaultBranchGetter(getter ModuleDefaultBranchGetter) SyncerOption {
return func(s *syncer) error {
s.moduleDefaultBranchGetter = getter
return nil
}
}

// SyncerWithTagsBackfiller configures a tags backfiller for older, already synced commits. If this
// option is not passed, the syncer won't try to sync older tags past each module's start sync
// points on all branches.
func SyncerWithTagsBackfiller(backfiller TagsBackfiller) SyncerOption {
return func(s *syncer) error {
s.tagsBackfiller = backfiller
return nil
}
}

// SyncerWithAllBranches sets the syncer to sync all branches. Be default the syncer only processes
// commits in the current checked out branch.
func SyncerWithAllBranches() SyncerOption {
Expand All @@ -254,56 +275,6 @@ func SyncerWithAllBranches() SyncerOption {
}
}

// SyncFunc is invoked by Syncer to process a sync point. If an error is returned,
// sync will abort.
type SyncFunc func(ctx context.Context, commit ModuleCommit) error

// SyncPointResolver is invoked by Syncer to resolve a syncpoint for a particular module
// at a particular branch. If no syncpoint is found, this function returns nil. If an error
// is returned, sync will abort.
type SyncPointResolver func(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
branch string,
) (git.Hash, error)

// SyncedGitCommitChecker is invoked when syncing branches to know which commits hashes from a set
// are already synced inthe BSR. It expects to receive the commit hashes that are synced already. If
// an error is returned, sync will abort.
type SyncedGitCommitChecker func(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
commitHashes map[string]struct{},
) (map[string]struct{}, error)

// ModuleDefaultBranchGetter is invoked before syncing, to gather default branch names for all the
// modules that are about to be synced. If the BSR module does not exist, the implementation should
// return `ModuleDoesNotExistErr` error.
type ModuleDefaultBranchGetter func(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
) (string, error)

// TagsBackfiller is invoked when a commit with valid modules is found within a lookback threshold
// past the start sync point for such module. The Syncer assumes that the "old" commit is already
// synced, so it will attempt to backfill existing tags using that git hash, in case they were
// recently created or moved there.
//
// A common scenario is SemVer releases: a commit is pushed to the default Git branch, the sync
// process triggers and completes, and some minutes later that commit is tagged "v1.2.3". The next
// time the sync command runs, this backfiller would pick such tag and backfill it to the correct
// BSR commit.
//
// It's expected to return the BSR commit name to which the tags were backfilled.
type TagsBackfiller func(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
alreadySyncedHash git.Hash,
author git.Ident,
committer git.Ident,
tags []string,
) (string, error)

// ModuleCommit is a module at a particular commit.
type ModuleCommit interface {
// Branch is the git branch that this module is sourced from.
Expand All @@ -320,3 +291,14 @@ type ModuleCommit interface {
// Bucket is the bucket for the module.
Bucket() storage.ReadBucket
}

// Clock provides the current time.
type Clock interface {
// Now provides the current time.
Now() time.Time
}

// NewRealClock returns a Clock that returns the current time using time#Now().
func NewRealClock() Clock {
return newClock()
}
Loading