diff --git a/pkg/vendir/directory/directory.go b/pkg/vendir/directory/directory.go index be2f89cf..1a06f88d 100644 --- a/pkg/vendir/directory/directory.go +++ b/pkg/vendir/directory/directory.go @@ -119,7 +119,7 @@ func (d *Directory) Sync(syncOpts SyncOpts) (ctlconf.LockDirectory, error) { lockDirContents.Git = &lock case contents.Hg != nil: - hgSync := ctlhg.NewSync(*contents.Hg, NewInfoLog(d.ui), syncOpts.RefFetcher) + hgSync := ctlhg.NewSync(*contents.Hg, NewInfoLog(d.ui), syncOpts.RefFetcher, syncOpts.Cache) d.ui.PrintLinef("Fetching: %s + %s (hg from %s)", d.opts.Path, contents.Path, hgSync.Desc()) diff --git a/pkg/vendir/fetch/hg/hg.go b/pkg/vendir/fetch/hg/hg.go index 0bf11e4d..aa24ff40 100644 --- a/pkg/vendir/fetch/hg/hg.go +++ b/pkg/vendir/fetch/hg/hg.go @@ -5,6 +5,8 @@ package hg import ( "bytes" + "crypto/sha256" + "encoding/hex" "fmt" "io" "net/url" @@ -21,12 +23,28 @@ type Hg struct { opts ctlconf.DirectoryContentsHg infoLog io.Writer refFetcher ctlfetch.RefFetcher + authDir string + env []string + cacheID string } func NewHg(opts ctlconf.DirectoryContentsHg, - infoLog io.Writer, refFetcher ctlfetch.RefFetcher) *Hg { + infoLog io.Writer, refFetcher ctlfetch.RefFetcher, + tempArea ctlfetch.TempArea, +) (*Hg, error) { + t := Hg{opts, infoLog, refFetcher, "", nil, ""} + if err := t.setup(tempArea); err != nil { + return nil, err + } + return &t, nil +} - return &Hg{opts, infoLog, refFetcher} +// getCacheID returns a cache id for the repository +// It doesn't include the ref because we want to reuse a cache when only the ref +// is changed +// Basically we combine all data used to write the hgrc file +func (t *Hg) getCacheID() string { + return t.cacheID } //nolint:revive @@ -35,27 +53,50 @@ type HgInfo struct { ChangeSetTitle string } -func (t *Hg) Retrieve(dstPath string, tempArea ctlfetch.TempArea) (HgInfo, error) { - if len(t.opts.URL) == 0 { - return HgInfo{}, fmt.Errorf("Expected non-empty URL") +// cloneHasTargetRef returns true if the given clone contains the target +// ref, and this ref is a revision id (not a tag or a branch) +func (t *Hg) cloneHasTargetRef(dstPath string) bool { + out, _, err := t.run([]string{"id", "--id", "-r", t.opts.Ref}, dstPath) + if err != nil { + return false + } + out = strings.TrimSpace(out) + if strings.HasPrefix(t.opts.Ref, out) { + return true } + return false +} - err := t.fetch(dstPath, tempArea) - if err != nil { +func (t *Hg) clone(dstPath string) error { + if err := t.initClone(dstPath); err != nil { + return err + } + return t.syncClone(dstPath) +} + +func (t *Hg) syncClone(dstPath string) error { + if _, _, err := t.run([]string{"pull"}, dstPath); err != nil { + return err + } + return nil +} + +func (t *Hg) checkout(dstPath string) (HgInfo, error) { + if _, _, err := t.run([]string{"checkout", t.opts.Ref}, dstPath); err != nil { return HgInfo{}, err } info := HgInfo{} // use hg log to retrieve full cset sha - out, _, err := t.run([]string{"log", "-r", ".", "-T", "{node}"}, nil, dstPath) + out, _, err := t.run([]string{"log", "-r", ".", "-T", "{node}"}, dstPath) if err != nil { return HgInfo{}, err } info.SHA = strings.TrimSpace(out) - out, _, err = t.run([]string{"log", "-l", "1", "-T", "{desc|firstline|strip}", "-r", info.SHA}, nil, dstPath) + out, _, err = t.run([]string{"log", "-l", "1", "-T", "{desc|firstline|strip}", "-r", info.SHA}, dstPath) if err != nil { return HgInfo{}, err } @@ -65,7 +106,20 @@ func (t *Hg) Retrieve(dstPath string, tempArea ctlfetch.TempArea) (HgInfo, error return info, nil } -func (t *Hg) fetch(dstPath string, tempArea ctlfetch.TempArea) error { +func (t *Hg) Close() { + if t.authDir != "" { + os.RemoveAll(t.authDir) + t.authDir = "" + } +} + +func (t *Hg) setup(tempArea ctlfetch.TempArea) error { + if len(t.opts.URL) == 0 { + return fmt.Errorf("Expected non-empty URL") + } + + cacheID := t.opts.URL + authOpts, err := t.getAuthOpts() if err != nil { return err @@ -76,17 +130,12 @@ func (t *Hg) fetch(dstPath string, tempArea ctlfetch.TempArea) error { return err } - defer os.RemoveAll(authDir) + t.authDir = authDir - env := os.Environ() + t.env = os.Environ() hgURL := t.opts.URL - _, _, err = t.run([]string{"init"}, env, dstPath) - if err != nil { - return err - } - var hgRc string if t.opts.Evolve { @@ -147,39 +196,38 @@ hgauth.password = %s if err != nil { return fmt.Errorf("Writing %s: %s", hgRcPath, err) } - env = append(env, "HGRCPATH="+hgRcPath) + t.env = append(t.env, "HGRCPATH="+hgRcPath) + } + + sha := sha256.Sum256([]byte(cacheID)) + t.cacheID = hex.EncodeToString(sha[:]) + + return nil +} + +func (t *Hg) initClone(dstPath string) error { + hgURL := t.opts.URL + + if _, _, err := t.run([]string{"init"}, dstPath); err != nil { + return err } repoHgRcPath := filepath.Join(dstPath, ".hg", "hgrc") repoHgRc := fmt.Sprintf("[paths]\ndefault = %s\n", hgURL) - err = os.WriteFile(repoHgRcPath, []byte(repoHgRc), 0600) - if err != nil { + if err := os.WriteFile(repoHgRcPath, []byte(repoHgRc), 0600); err != nil { return fmt.Errorf("Writing %s: %s", repoHgRcPath, err) } - return t.runMultiple([][]string{ - {"pull"}, - {"checkout", t.opts.Ref}, - }, env, dstPath) -} - -func (t *Hg) runMultiple(argss [][]string, env []string, dstPath string) error { - for _, args := range argss { - _, _, err := t.run(args, env, dstPath) - if err != nil { - return err - } - } return nil } -func (t *Hg) run(args []string, env []string, dstPath string) (string, string, error) { +func (t *Hg) run(args []string, dstPath string) (string, string, error) { var stdoutBs, stderrBs bytes.Buffer cmd := exec.Command("hg", args...) - cmd.Env = env + cmd.Env = t.env cmd.Dir = dstPath cmd.Stdout = io.MultiWriter(t.infoLog, &stdoutBs) cmd.Stderr = io.MultiWriter(t.infoLog, &stderrBs) diff --git a/pkg/vendir/fetch/hg/sync.go b/pkg/vendir/fetch/hg/sync.go index abfb44fe..59c0282c 100644 --- a/pkg/vendir/fetch/hg/sync.go +++ b/pkg/vendir/fetch/hg/sync.go @@ -11,18 +11,20 @@ import ( ctlconf "carvel.dev/vendir/pkg/vendir/config" ctlfetch "carvel.dev/vendir/pkg/vendir/fetch" + ctlcache "carvel.dev/vendir/pkg/vendir/fetch/cache" ) type Sync struct { opts ctlconf.DirectoryContentsHg log io.Writer refFetcher ctlfetch.RefFetcher + cache ctlcache.Cache } func NewSync(opts ctlconf.DirectoryContentsHg, - log io.Writer, refFetcher ctlfetch.RefFetcher) Sync { + log io.Writer, refFetcher ctlfetch.RefFetcher, cache ctlcache.Cache) Sync { - return Sync{opts, log, refFetcher} + return Sync{opts, log, refFetcher, cache} } func (d Sync) Desc() string { @@ -44,11 +46,40 @@ func (d Sync) Sync(dstPath string, tempArea ctlfetch.TempArea) (ctlconf.LockDire defer os.RemoveAll(incomingTmpPath) - hg := NewHg(d.opts, d.log, d.refFetcher) + hg, err := NewHg(d.opts, d.log, d.refFetcher, tempArea) + if err != nil { + return hgLockConf, fmt.Errorf("Setting up hg: %w", err) + } + defer hg.Close() + + if cachePath, ok := d.cache.Has("hg", hg.getCacheID()); ok { + // fetch from cachedDir + if err := d.cache.CopyFrom("hg", hg.getCacheID(), incomingTmpPath); err != nil { + return hgLockConf, fmt.Errorf("Extracting cached hg clone: %w", err) + } + // Sync if needed + if !hg.cloneHasTargetRef(cachePath) { + if err := hg.syncClone(incomingTmpPath); err != nil { + return hgLockConf, fmt.Errorf("Syncing hg repository: %w", err) + } + if err := d.cache.Save("hg", hg.getCacheID(), incomingTmpPath); err != nil { + return hgLockConf, fmt.Errorf("Saving hg repository to cache: %w", err) + } + } + } else { + // fetch in the target directory + if err := hg.clone(incomingTmpPath); err != nil { + return hgLockConf, fmt.Errorf("Cloning hg repository: %w", err) + } + if err := d.cache.Save("hg", hg.getCacheID(), incomingTmpPath); err != nil { + return hgLockConf, fmt.Errorf("Saving hg repository to cache: %w", err) + } + } - info, err := hg.Retrieve(incomingTmpPath, tempArea) + // now checkout the wanted revision + info, err := hg.checkout(incomingTmpPath) if err != nil { - return hgLockConf, fmt.Errorf("Fetching hg repository: %s", err) + return hgLockConf, fmt.Errorf("Checking out hg repository: %s", err) } hgLockConf.SHA = info.SHA diff --git a/test/e2e/assets/hg-repos/README.md b/test/e2e/assets/hg-repos/README.md new file mode 100644 index 00000000..ce1330fe --- /dev/null +++ b/test/e2e/assets/hg-repos/README.md @@ -0,0 +1,6 @@ +``build.sh`` assemble a mercurial repository and more, suitable for testing the hg +fetcher cache feature: + +- A base repository (repo) +- A bundle with an extra changeset +- A json file with the changeset ids we need in the test, and the bundle filename diff --git a/test/e2e/assets/hg-repos/asset.tgz b/test/e2e/assets/hg-repos/asset.tgz new file mode 100644 index 00000000..0f4cdca9 Binary files /dev/null and b/test/e2e/assets/hg-repos/asset.tgz differ diff --git a/test/e2e/assets/hg-repos/build.sh b/test/e2e/assets/hg-repos/build.sh new file mode 100755 index 00000000..bc9f2695 --- /dev/null +++ b/test/e2e/assets/hg-repos/build.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +rm -rf build + +mkdir build +cd build + +mkdir repo +cd repo + +hg init . + +echo "content1" > file1.txt + +hg add file1.txt +hg commit -m "Added file1" +CSET1_ID=$(hg id --id) + +hg tag first-tag +hg phase -p + +hg topic "wip" +echo "content2" > file1.txt +hg commit -m "extra cset" +CSETX_ID=$(hg id --id) + +hg strip -r . + +BUNDLE=$(basename .hg/strip-backup/*-backup.hg) +mv .hg/strip-backup/$BUNDLE .. + +hg checkout 00000 + +cd .. + +cat > info.json <