Skip to content

Commit

Permalink
hg: Add a cache for mercurial repositories.
Browse files Browse the repository at this point in the history
The idea is to save the whole untouched clone (with no checkout) in the cache.

If already present, the pull is done directly in the cache, and is
faster (except on very small repos) because only new changeset are
transfered.

If the ref is a changeset id (not a tag, branch, topic or bookmark), and
the changeset is already known in the cached clone, no pull is done
which avoid any network exchange.

Then we copy the cached entry and do the checkout.

Signed-off-by: Christophe de Vienne <[email protected]>
  • Loading branch information
cdevienne committed Apr 2, 2024
1 parent 97da79a commit ad46667
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 12 deletions.
2 changes: 1 addition & 1 deletion pkg/vendir/directory/directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func (d *Directory) Sync(syncOpts SyncOpts) (ctlconf.LockDirectory, error) {
lockDirContents.Git = &lock

case contents.Hg != nil:
hgSync := ctlhg.NewSync(*contents.Hg, NewInfoLog(d.ui), syncOpts.RefFetcher)
hgSync := ctlhg.NewSync(*contents.Hg, NewInfoLog(d.ui), syncOpts.RefFetcher, syncOpts.Cache)

d.ui.PrintLinef("Fetching: %s + %s (hg from %s)", d.opts.Path, contents.Path, hgSync.Desc())

Expand Down
61 changes: 55 additions & 6 deletions pkg/vendir/fetch/hg/hg.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,74 @@ type Hg struct {
refFetcher ctlfetch.RefFetcher
authDir string
env []string
cacheID string
}

func NewHg(opts ctlconf.DirectoryContentsHg,
infoLog io.Writer, refFetcher ctlfetch.RefFetcher,
tempArea ctlfetch.TempArea,
) (*Hg, error) {
t := Hg{opts, infoLog, refFetcher, "", nil}
t := Hg{opts, infoLog, refFetcher, "", nil, ""}
if err := t.setup(tempArea); err != nil {
return nil, err
}
return &t, nil
}

// CacheID returns a cache id for the repository
// It doesn't include the ref because we want to reuse a cache when only the ref
// is changed
// Basically we combine all data used to write the hgrc file
func (t *Hg) CacheID() string {
return t.cacheID
}

//nolint:revive
type HgInfo struct {
SHA string
ChangeSetTitle string
}

func (t *Hg) Retrieve(dstPath string, tempArea ctlfetch.TempArea) (HgInfo, error) {
if len(t.opts.URL) == 0 {
return HgInfo{}, fmt.Errorf("Expected non-empty URL")
// CloneHasTargetRef returns true if the given clone contains the target
// ref, and this ref is a revision id (not a tag or a branch)
func (t *Hg) CloneHasTargetRef(dstPath string) bool {
out, _, err := t.run([]string{"id", "--id", "-r", t.opts.Ref}, dstPath)
if err != nil {
return false
}
out = strings.TrimSpace(out)
if strings.HasPrefix(t.opts.Ref, out) {
return true
}
return false
}

err := t.fetch(dstPath, tempArea)
if err != nil {
func (t *Hg) Clone(dstPath string) error {
if err := t.initClone(dstPath); err != nil {
return err
}
return t.SyncClone(dstPath)
}

func (t *Hg) SyncClone(dstPath string) error {
if _, _, err := t.run([]string{"pull"}, dstPath); err != nil {
return err
}
return nil
}

func (t *Hg) LocalClone(localClone, dstPath string) error {
if err := t.initClone(dstPath); err != nil {
return err
}
if _, _, err := t.run([]string{"pull", localClone}, dstPath); err != nil {
return err
}
return nil
}

func (t *Hg) Checkout(dstPath string) (HgInfo, error) {
if _, _, err := t.run([]string{"checkout", t.opts.Ref}, dstPath); err != nil {
return HgInfo{}, err
}

Expand Down Expand Up @@ -80,6 +122,10 @@ func (t *Hg) Close() {
}

func (t *Hg) setup(tempArea ctlfetch.TempArea) error {
if len(t.opts.URL) == 0 {
return fmt.Errorf("Expected non-empty URL")
}

authOpts, err := t.getAuthOpts()
if err != nil {
return err
Expand Down Expand Up @@ -132,6 +178,7 @@ hgauth.password = %s
}

sshCmd = append(sshCmd, "-i", path, "-o", "IdentitiesOnly=yes")
t.cacheID += "private-key=" + *authOpts.PrivateKey + "|"
}

if authOpts.KnownHosts != nil {
Expand All @@ -143,6 +190,7 @@ hgauth.password = %s
}

sshCmd = append(sshCmd, "-o", "StrictHostKeyChecking=yes", "-o", "UserKnownHostsFile="+path)
t.cacheID += "known-hosts=" + *authOpts.KnownHosts + "|"
} else {
sshCmd = append(sshCmd, "-o", "StrictHostKeyChecking=no")
}
Expand All @@ -157,6 +205,7 @@ hgauth.password = %s
return fmt.Errorf("Writing %s: %s", hgRcPath, err)
}
t.env = append(t.env, "HGRCPATH="+hgRcPath)
t.cacheID += hgRc
}

return nil
Expand Down
34 changes: 29 additions & 5 deletions pkg/vendir/fetch/hg/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,20 @@ import (

ctlconf "carvel.dev/vendir/pkg/vendir/config"
ctlfetch "carvel.dev/vendir/pkg/vendir/fetch"
ctlcache "carvel.dev/vendir/pkg/vendir/fetch/cache"
)

type Sync struct {
opts ctlconf.DirectoryContentsHg
log io.Writer
refFetcher ctlfetch.RefFetcher
cache ctlcache.Cache
}

func NewSync(opts ctlconf.DirectoryContentsHg,
log io.Writer, refFetcher ctlfetch.RefFetcher) Sync {
log io.Writer, refFetcher ctlfetch.RefFetcher, cache ctlcache.Cache) Sync {

return Sync{opts, log, refFetcher}
return Sync{opts, log, refFetcher, cache}
}

func (d Sync) Desc() string {
Expand All @@ -46,13 +48,35 @@ func (d Sync) Sync(dstPath string, tempArea ctlfetch.TempArea) (ctlconf.LockDire

hg, err := NewHg(d.opts, d.log, d.refFetcher, tempArea)
if err != nil {
return hgLockConf, err
return hgLockConf, fmt.Errorf("Setting up hg: %w", err)
}
defer hg.Close()

info, err := hg.Retrieve(incomingTmpPath, tempArea)
if cachePath, ok := d.cache.Has("hg", hg.CacheID()); ok {
// Sync directly in the cache if needed
if !hg.CloneHasTargetRef(cachePath) {
if err := hg.SyncClone(cachePath); err != nil {
return hgLockConf, fmt.Errorf("Syncing hg cached clone: %w", err)
}
}
// fetch from cachedDir
if err := d.cache.CopyFrom("hg", hg.CacheID(), incomingTmpPath); err != nil {
return hgLockConf, fmt.Errorf("Extracting cached hg clone: %w", err)
}
} else {
// fetch in the target directory, and save it to cache
if err := hg.Clone(incomingTmpPath); err != nil {
return hgLockConf, fmt.Errorf("Cloning hg repository: %w", err)
}
if err := d.cache.Save("hg", hg.CacheID(), incomingTmpPath); err != nil {
return hgLockConf, fmt.Errorf("Saving hg repository to cache: %w", err)
}
}

// now checkout the wanted revision
info, err := hg.Checkout(incomingTmpPath)
if err != nil {
return hgLockConf, fmt.Errorf("Fetching hg repository: %s", err)
return hgLockConf, fmt.Errorf("Checking out hg repository: %s", err)
}

hgLockConf.SHA = info.SHA
Expand Down

0 comments on commit ad46667

Please sign in to comment.