diff --git a/go.mod b/go.mod index e667ef185..8ba4a9969 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/cppforlife/go-patch v0.0.0-20240118020416-2147782e467b github.com/google/go-cmp v0.5.9 github.com/hashicorp/go-version v1.6.0 - github.com/k14s/difflib v0.0.0-20201117154628-0c031775bf57 + github.com/k14s/difflib v0.0.0-20240118055029-596a7a5585c3 github.com/k14s/ytt v0.36.0 github.com/mitchellh/go-wordwrap v1.0.1 github.com/spf13/cobra v1.7.0 diff --git a/go.sum b/go.sum index 7b9537a1a..78e15b093 100644 --- a/go.sum +++ b/go.sum @@ -169,8 +169,9 @@ github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHm github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/k14s/difflib v0.0.0-20201117154628-0c031775bf57 h1:CwBRArr+BWBopnUJhDjJw86rPL/jGbEjfHWKzTasSqE= github.com/k14s/difflib v0.0.0-20201117154628-0c031775bf57/go.mod h1:B0xN2MiNBGWOWi9CcfAo9LBI8IU4J1utlbOIJCsmKr4= +github.com/k14s/difflib v0.0.0-20240118055029-596a7a5585c3 h1:q2ikACDbDDbyUcN9JkDcNMGhIx1EBRkctAsPZMr35qM= +github.com/k14s/difflib v0.0.0-20240118055029-596a7a5585c3/go.mod h1:B0xN2MiNBGWOWi9CcfAo9LBI8IU4J1utlbOIJCsmKr4= github.com/k14s/starlark-go v0.0.0-20200720175618-3a5c849cc368 h1:4bcRTTSx+LKSxMWibIwzHnDNmaN1x52oEpvnjCy+8vk= github.com/k14s/starlark-go v0.0.0-20200720175618-3a5c849cc368/go.mod h1:lKGj1op99m4GtQISxoD2t+K+WO/q2NzEPKvfXFQfbCA= github.com/k14s/ytt v0.36.0 h1:ERr7q+r3ziYJv91fvTx2b76d1MIo3SI/EsAS01WU+Zo= diff --git a/pkg/kapp/cmd/app/delete.go b/pkg/kapp/cmd/app/delete.go index 344b5a7fa..a0d974cc1 100644 --- a/pkg/kapp/cmd/app/delete.go +++ b/pkg/kapp/cmd/app/delete.go @@ -233,7 +233,7 @@ func (o *DeleteOptions) calculateAndPresentChanges(existingResources []ctlres.Re ) { // Figure out changes for X existing resources -> 0 new resources - changeFactory := ctldiff.NewChangeFactory(nil, nil, nil) + changeFactory := ctldiff.NewChangeFactory(nil, nil, nil, ctldiff.ChangeOpts{o.DiffFlags.AnchoredDiff}) changeSetFactory := ctldiff.NewChangeSetFactory(o.DiffFlags.ChangeSetOpts, changeFactory) changes, err := changeSetFactory.New(existingResources, nil).Calculate() diff --git a/pkg/kapp/cmd/app/deploy.go b/pkg/kapp/cmd/app/deploy.go index e0141fd79..0eac63d6e 100644 --- a/pkg/kapp/cmd/app/deploy.go +++ b/pkg/kapp/cmd/app/deploy.go @@ -410,7 +410,7 @@ func (o *DeployOptions) calculateAndPresentChanges(existingResources, var clusterChangeSet ctlcap.ClusterChangeSet { // Figure out changes for X existing resources -> X new resources - changeFactory := ctldiff.NewChangeFactory(conf.RebaseMods(), conf.DiffAgainstLastAppliedFieldExclusionMods(), conf.DiffAgainstExistingFieldExclusionMods()) + changeFactory := ctldiff.NewChangeFactory(conf.RebaseMods(), conf.DiffAgainstLastAppliedFieldExclusionMods(), conf.DiffAgainstExistingFieldExclusionMods(), ctldiff.ChangeOpts{o.DiffFlags.AnchoredDiff}) changeSetFactory := ctldiff.NewChangeSetFactory(o.DiffFlags.ChangeSetOpts, changeFactory) err := ctldiff.NewRenewableResources(existingResources, newResources).Prepare() diff --git a/pkg/kapp/cmd/tools/diff.go b/pkg/kapp/cmd/tools/diff.go index 7e7e74e8f..74b777a1f 100644 --- a/pkg/kapp/cmd/tools/diff.go +++ b/pkg/kapp/cmd/tools/diff.go @@ -53,7 +53,7 @@ func (o *DiffOptions) Run() error { return err } - changeFactory := ctldiff.NewChangeFactory(nil, nil, nil) + changeFactory := ctldiff.NewChangeFactory(nil, nil, nil, ctldiff.ChangeOpts{o.DiffFlags.AnchoredDiff}) changes, err := ctldiff.NewChangeSet(existingResources, newResources, o.DiffFlags.ChangeSetOpts, changeFactory).Calculate() if err != nil { diff --git a/pkg/kapp/cmd/tools/diff_flags.go b/pkg/kapp/cmd/tools/diff_flags.go index 5ea8424a7..73889b7cb 100644 --- a/pkg/kapp/cmd/tools/diff_flags.go +++ b/pkg/kapp/cmd/tools/diff_flags.go @@ -17,6 +17,8 @@ type DiffFlags struct { Run bool ExitStatus bool UI bool + + AnchoredDiff bool } func (s *DiffFlags) SetWithPrefix(prefix string, cmd *cobra.Command) { @@ -39,4 +41,6 @@ func (s *DiffFlags) SetWithPrefix(prefix string, cmd *cobra.Command) { cmd.Flags().StringVar(&s.Filter, prefix+"filter", "", `Set changes filter (example: {"and":[{"ops":["update"]},{"existingResource":{"kinds":["Deployment"]}]})`) cmd.Flags().BoolVar(&s.ChangesYAML, prefix+"changes-yaml", false, "Print YAML to be applied") + + cmd.Flags().BoolVar(&s.AnchoredDiff, prefix+"anchored", false, "Allow using anchored diff for large resources") } diff --git a/pkg/kapp/config/default_test.go b/pkg/kapp/config/default_test.go index e8ace3f56..3b1cb0fdb 100644 --- a/pkg/kapp/config/default_test.go +++ b/pkg/kapp/config/default_test.go @@ -15,7 +15,7 @@ import ( func TestDefaultTemplateRules(t *testing.T) { _, defaultConfig, err := config.NewConfFromResourcesWithDefaults([]ctlres.Resource{}) require.NoError(t, err) - changeFactory := ctldiff.NewChangeFactory(defaultConfig.RebaseMods(), defaultConfig.DiffAgainstLastAppliedFieldExclusionMods(), defaultConfig.DiffAgainstExistingFieldExclusionMods()) + changeFactory := ctldiff.NewChangeFactory(defaultConfig.RebaseMods(), defaultConfig.DiffAgainstLastAppliedFieldExclusionMods(), defaultConfig.DiffAgainstExistingFieldExclusionMods(), ctldiff.ChangeOpts{false}) testCases := []struct { description string diff --git a/pkg/kapp/diff/change.go b/pkg/kapp/diff/change.go index f216f4de4..24ba26e40 100644 --- a/pkg/kapp/diff/change.go +++ b/pkg/kapp/diff/change.go @@ -45,11 +45,13 @@ type ChangeImpl struct { configurableTextDiff *ConfigurableTextDiff opsDiff *OpsDiff changeOpVal ChangeOp + + opts ChangeOpts } var _ Change = &ChangeImpl{} -func NewChange(existingRes, newRes, appliedRes, clusterOriginalRes ctlres.Resource) *ChangeImpl { +func NewChange(existingRes, newRes, appliedRes, clusterOriginalRes ctlres.Resource, opts ChangeOpts) *ChangeImpl { if existingRes == nil && newRes == nil { panic("Expected either existingRes or newRes be non-nil") } @@ -67,7 +69,7 @@ func NewChange(existingRes, newRes, appliedRes, clusterOriginalRes ctlres.Resour clusterOriginalRes = clusterOriginalRes.DeepCopy() } - return &ChangeImpl{existingRes: existingRes, newRes: newRes, appliedRes: appliedRes, clusterOriginalRes: clusterOriginalRes} + return &ChangeImpl{existingRes: existingRes, newRes: newRes, appliedRes: appliedRes, clusterOriginalRes: clusterOriginalRes, opts: opts} } func (d *ChangeImpl) NewOrExistingResource() ctlres.Resource { @@ -129,7 +131,7 @@ func (d *ChangeImpl) isIgnoredTransient() bool { func (d *ChangeImpl) ConfigurableTextDiff() *ConfigurableTextDiff { // diff is called very often, so memoize if d.configurableTextDiff == nil { - d.configurableTextDiff = NewConfigurableTextDiff(d.existingRes, d.newRes, d.IsIgnored()) + d.configurableTextDiff = NewConfigurableTextDiff(d.existingRes, d.newRes, d.IsIgnored(), d.opts) } return d.configurableTextDiff } diff --git a/pkg/kapp/diff/change_factory.go b/pkg/kapp/diff/change_factory.go index f4e4f2f90..022b7611b 100644 --- a/pkg/kapp/diff/change_factory.go +++ b/pkg/kapp/diff/change_factory.go @@ -11,12 +11,17 @@ type ChangeFactory struct { rebaseMods []ctlres.ResourceModWithMultiple diffAgainstLastAppliedFieldExclusionMods []ctlres.FieldRemoveMod diffAgainstExistingFieldExclusionRules []ctlres.FieldRemoveMod + opts ChangeOpts +} + +type ChangeOpts struct { + AllowAnchoredDiff bool } func NewChangeFactory(rebaseMods []ctlres.ResourceModWithMultiple, - diffAgainstLastAppliedFieldExclusionMods []ctlres.FieldRemoveMod, diffAgainstExistingFieldExclusionRules []ctlres.FieldRemoveMod) ChangeFactory { + diffAgainstLastAppliedFieldExclusionMods []ctlres.FieldRemoveMod, diffAgainstExistingFieldExclusionRules []ctlres.FieldRemoveMod, opts ChangeOpts) ChangeFactory { - return ChangeFactory{rebaseMods, diffAgainstLastAppliedFieldExclusionMods, diffAgainstExistingFieldExclusionRules} + return ChangeFactory{rebaseMods, diffAgainstLastAppliedFieldExclusionMods, diffAgainstExistingFieldExclusionRules, opts} } func (f ChangeFactory) NewChangeAgainstLastApplied(existingRes, newRes ctlres.Resource) (Change, error) { @@ -59,7 +64,7 @@ func (f ChangeFactory) NewChangeAgainstLastApplied(existingRes, newRes ctlres.Re return nil, err } - return NewChange(existingRes, rebasedNewRes, newRes, existingResForRebasing), nil + return NewChange(existingRes, rebasedNewRes, newRes, existingResForRebasing, f.opts), nil } func (f ChangeFactory) NewExactChange(existingRes, newRes ctlres.Resource) (Change, error) { @@ -86,7 +91,7 @@ func (f ChangeFactory) NewExactChange(existingRes, newRes ctlres.Resource) (Chan return nil, err } - return NewChange(existingRes, rebasedNewRes, newRes, existingRes), nil + return NewChange(existingRes, rebasedNewRes, newRes, existingRes, f.opts), nil } func (f ChangeFactory) NewResourceWithHistory(resource ctlres.Resource) ResourceWithHistory { diff --git a/pkg/kapp/diff/change_set_test.go b/pkg/kapp/diff/change_set_test.go index f42340222..fde06c940 100644 --- a/pkg/kapp/diff/change_set_test.go +++ b/pkg/kapp/diff/change_set_test.go @@ -48,7 +48,7 @@ metadata: }, } - changeFactory := ctldiff.NewChangeFactory(mods, nil, nil) + changeFactory := ctldiff.NewChangeFactory(mods, nil, nil, ctldiff.ChangeOpts{false}) changeSet := ctldiff.NewChangeSet([]ctlres.Resource{existingRes}, []ctlres.Resource{newRes}, ctldiff.ChangeSetOpts{}, changeFactory) @@ -106,7 +106,7 @@ metadata: }, } - changeFactory := ctldiff.NewChangeFactory(mods, nil, nil) + changeFactory := ctldiff.NewChangeFactory(mods, nil, nil, ctldiff.ChangeOpts{false}) changeSet := ctldiff.NewChangeSet([]ctlres.Resource{existingRes}, []ctlres.Resource{newRes}, ctldiff.ChangeSetOpts{}, changeFactory) @@ -174,7 +174,7 @@ metadata: }, } - changeFactory := ctldiff.NewChangeFactory(rebaseMods, ignoreFieldsMods, nil) + changeFactory := ctldiff.NewChangeFactory(rebaseMods, ignoreFieldsMods, nil, ctldiff.ChangeOpts{false}) changeSet := ctldiff.NewChangeSet([]ctlres.Resource{existingRes}, []ctlres.Resource{newRes}, ctldiff.ChangeSetOpts{AgainstLastApplied: true}, changeFactory) @@ -246,7 +246,7 @@ metadata: }, } - changeFactory := ctldiff.NewChangeFactory(rebaseMods, ignoreFieldsMods, nil) + changeFactory := ctldiff.NewChangeFactory(rebaseMods, ignoreFieldsMods, nil, ctldiff.ChangeOpts{false}) changeSet := ctldiff.NewChangeSet([]ctlres.Resource{existingRes}, []ctlres.Resource{newRes}, ctldiff.ChangeSetOpts{AgainstLastApplied: true}, changeFactory) @@ -304,7 +304,7 @@ metadata: }, } - changeFactory := ctldiff.NewChangeFactory(mods, nil, nil) + changeFactory := ctldiff.NewChangeFactory(mods, nil, nil, ctldiff.ChangeOpts{false}) changeSet := ctldiff.NewChangeSet([]ctlres.Resource{existingRes}, []ctlres.Resource{newRes}, ctldiff.ChangeSetOpts{}, changeFactory) diff --git a/pkg/kapp/diff/change_set_with_versioned_rs.go b/pkg/kapp/diff/change_set_with_versioned_rs.go index c6587d0ba..b43391007 100644 --- a/pkg/kapp/diff/change_set_with_versioned_rs.go +++ b/pkg/kapp/diff/change_set_with_versioned_rs.go @@ -198,7 +198,7 @@ func (d ChangeSetWithVersionedRs) noopAndDeleteChanges( } func (d ChangeSetWithVersionedRs) newKeepChange(existingRes ctlres.Resource) Change { - return NewChangePrecalculated(existingRes, nil, nil, ChangeOpKeep, NewConfigurableTextDiff(existingRes, nil, true), OpsDiff{}) + return NewChangePrecalculated(existingRes, nil, nil, ChangeOpKeep, NewConfigurableTextDiff(existingRes, nil, true, ChangeOpts{false}), OpsDiff{}) } func (d ChangeSetWithVersionedRs) newNoopChange(existingRes ctlres.Resource) Change { diff --git a/pkg/kapp/diff/configurable_text_diff.go b/pkg/kapp/diff/configurable_text_diff.go index 575e99a72..b5442f438 100644 --- a/pkg/kapp/diff/configurable_text_diff.go +++ b/pkg/kapp/diff/configurable_text_diff.go @@ -16,10 +16,12 @@ type ConfigurableTextDiff struct { ignored bool memoizedTextDiff *TextDiff + + opts ChangeOpts } -func NewConfigurableTextDiff(existingRes, newRes ctlres.Resource, ignored bool) *ConfigurableTextDiff { - return &ConfigurableTextDiff{existingRes, newRes, ignored, nil} +func NewConfigurableTextDiff(existingRes, newRes ctlres.Resource, ignored bool, opts ChangeOpts) *ConfigurableTextDiff { + return &ConfigurableTextDiff{existingRes, newRes, ignored, nil, opts} } func (d ConfigurableTextDiff) Full() TextDiff { @@ -73,5 +75,5 @@ func (d ConfigurableTextDiff) calculate(existingRes, newRes ctlres.Resource) Tex newLines = existingLines // show as no changes } - return NewTextDiff(existingLines, newLines) + return NewTextDiff(existingLines, newLines, d.opts.AllowAnchoredDiff) } diff --git a/pkg/kapp/diff/text_diff.go b/pkg/kapp/diff/text_diff.go index bf79be7fc..753cd581a 100644 --- a/pkg/kapp/diff/text_diff.go +++ b/pkg/kapp/diff/text_diff.go @@ -15,7 +15,11 @@ type TextDiff struct { recs []difflib.DiffRecord } -func NewTextDiff(existingLines, newLines []string) TextDiff { +func NewTextDiff(existingLines, newLines []string, allowAnchoredDiff bool) TextDiff { + if allowAnchoredDiff && (len(existingLines) > 500 || len(newLines) > 500) { + // Diff is memory hungry, use AnchoredDiff for large resources + return TextDiff{difflib.AnchoredDiff(existingLines, newLines)} + } return TextDiff{difflib.Diff(existingLines, newLines)} } diff --git a/test/e2e/diff_test.go b/test/e2e/diff_test.go index c83279492..20e02e593 100644 --- a/test/e2e/diff_test.go +++ b/test/e2e/diff_test.go @@ -486,3 +486,125 @@ metadata: RunOpts{IntoNs: true, StdinReader: strings.NewReader(yaml1)}) }) } + +func TestAnchoredDiff(t *testing.T) { + env := BuildEnv(t) + logger := Logger{} + kapp := Kapp{t, env.Namespace, env.KappBinaryPath, logger} + + name := "test-anchored-diff" + cleanUp := func() { + kapp.Run([]string{"delete", "-a", name}) + } + + cleanUp() + defer cleanUp() + + yaml1 := `apiVersion: v1 +kind: ConfigMap +metadata: + name: cm-1 + annotations: + kbld.k14s.io/images: | + - origins: + - resolved: + tag: 9.5.5 + url: docker.io/grafana/grafana:9.5.5 + url: index.docker.io/grafana/grafana@sha256:6c6fe32401b6b14e1886e61a7bacd5cc4b6fbd0de1e58e985db0e48f99fe1be1 + - origins: + - resolved: + tag: 1.24.3 + url: quay.io/kiwigrid/k8s-sidecar:1.24.3 + url: quay.io/kiwigrid/k8s-sidecar@sha256:5af76eebbba79edf4f7471bf1c3d5f2b40858114730c92d95eafe5716abe1fe8 +data: + +` + + yaml2 := `apiVersion: v1 +kind: ConfigMap +metadata: + name: cm-1 + annotations: + kbld.k14s.io/images: | + - origins: + - resolved: + tag: 10.1.4 + url: docker.io/grafana/grafana:10.1.4 + url: index.docker.io/grafana/grafana@sha256:29f39e23705d3ef653fa84ca3c01731e0771f1fedbd69ecb99868270cdeb0572 + - origins: + - resolved: + tag: 1.25.1 + url: quay.io/kiwigrid/k8s-sidecar:1.25.1 + url: quay.io/kiwigrid/k8s-sidecar@sha256:415d07ee1027c3ff7af9e26e05e03ffd0ec0ccf9f619ac00ab24366efe4343bd +data: + +` + // Add keys so that number of lines in the yamls are > 500 + for i := 0; i <= 500; i++ { + line := fmt.Sprintf(" key%v: value%v\n", i, i) + yaml1 += line + yaml2 += line + } + + logger.Section("deploy initial", func() { + kapp.RunWithOpts([]string{"deploy", "-f", "-", "-a", name}, RunOpts{IntoNs: true, StdinReader: strings.NewReader(yaml1)}) + }) + + logger.Section("deploy without anchored diff", func() { + out, _ := kapp.RunWithOpts([]string{"deploy", "-f", "-", "-a", name, "-c", "--diff-run", "--diff-summary=false"}, RunOpts{IntoNs: true, StdinReader: strings.NewReader(yaml2)}) + expectedDiff := ` +@@ update configmap/cm-1 (v1) namespace: kapp-test @@ + ... +508,508 - resolved: +509 - tag: 9.5.5 +510 - url: docker.io/grafana/grafana:9.5.5 +511 - url: index.docker.io/grafana/grafana@sha256:6c6fe32401b6b14e1886e61a7bacd5cc4b6fbd0de1e58e985db0e48f99fe1be1 + 509 + tag: 10.1.4 + 510 + url: docker.io/grafana/grafana:10.1.4 + 511 + url: index.docker.io/grafana/grafana@sha256:29f39e23705d3ef653fa84ca3c01731e0771f1fedbd69ecb99868270cdeb0572 +512,512 - origins: +513,513 - resolved: +514 - tag: 1.24.3 +515 - url: quay.io/kiwigrid/k8s-sidecar:1.24.3 +516 - url: quay.io/kiwigrid/k8s-sidecar@sha256:5af76eebbba79edf4f7471bf1c3d5f2b40858114730c92d95eafe5716abe1fe8 + 514 + tag: 1.25.1 + 515 + url: quay.io/kiwigrid/k8s-sidecar:1.25.1 + 516 + url: quay.io/kiwigrid/k8s-sidecar@sha256:415d07ee1027c3ff7af9e26e05e03ffd0ec0ccf9f619ac00ab24366efe4343bd +517,517 creationTimestamp: "2006-01-02T15:04:05Z07:00" +518,518 labels: + +Succeeded +` + require.Equal(t, expectedDiff, replaceTimestampWithDfaultValue(replaceTarget(out))) + }) + + logger.Section("deploy with anchored diff", func() { + out, _ := kapp.RunWithOpts([]string{"deploy", "-f", "-", "-a", name, "-c", "--diff-run", "--diff-summary=false", "--diff-anchored"}, RunOpts{IntoNs: true, StdinReader: strings.NewReader(yaml2)}) + expectedDiff := ` +@@ update configmap/cm-1 (v1) namespace: kapp-test @@ + ... +508,508 - resolved: +509 - tag: 9.5.5 +510 - url: docker.io/grafana/grafana:9.5.5 +511 - url: index.docker.io/grafana/grafana@sha256:6c6fe32401b6b14e1886e61a7bacd5cc4b6fbd0de1e58e985db0e48f99fe1be1 +512 - - origins: +513 - - resolved: +514 - tag: 1.24.3 +515 - url: quay.io/kiwigrid/k8s-sidecar:1.24.3 +516 - url: quay.io/kiwigrid/k8s-sidecar@sha256:5af76eebbba79edf4f7471bf1c3d5f2b40858114730c92d95eafe5716abe1fe8 + 509 + tag: 10.1.4 + 510 + url: docker.io/grafana/grafana:10.1.4 + 511 + url: index.docker.io/grafana/grafana@sha256:29f39e23705d3ef653fa84ca3c01731e0771f1fedbd69ecb99868270cdeb0572 + 512 + - origins: + 513 + - resolved: + 514 + tag: 1.25.1 + 515 + url: quay.io/kiwigrid/k8s-sidecar:1.25.1 + 516 + url: quay.io/kiwigrid/k8s-sidecar@sha256:415d07ee1027c3ff7af9e26e05e03ffd0ec0ccf9f619ac00ab24366efe4343bd +517,517 creationTimestamp: "2006-01-02T15:04:05Z07:00" +518,518 labels: + +Succeeded +` + require.Equal(t, expectedDiff, replaceTimestampWithDfaultValue(replaceTarget(out))) + }) +} diff --git a/vendor/github.com/k14s/difflib/difflib.go b/vendor/github.com/k14s/difflib/difflib.go index fe7d56a68..1fe57228b 100644 --- a/vendor/github.com/k14s/difflib/difflib.go +++ b/vendor/github.com/k14s/difflib/difflib.go @@ -20,17 +20,18 @@ import ( "bytes" "fmt" "math" + "sort" "strings" ) // DeltaType describes the relationship of elements in two // sequences. The following table provides a summary: // -// Constant Code Meaning -// ---------- ------ --------------------------------------- -// Common " " The element occurs in both sequences. -// LeftOnly "-" The element is unique to sequence 1. -// RightOnly "+" The element is unique to sequence 2. +// Constant Code Meaning +// ---------- ------ --------------------------------------- +// Common " " The element occurs in both sequences. +// LeftOnly "-" The element is unique to sequence 1. +// RightOnly "+" The element is unique to sequence 2. type DeltaType int const ( @@ -53,9 +54,9 @@ func (t DeltaType) String() string { } type DiffRecord struct { - Payload string - Delta DeltaType - LineLeft int + Payload string + Delta DeltaType + LineLeft int LineRight int } @@ -80,7 +81,7 @@ func Diff(seq1, seq2 []string) (diff []DiffRecord) { diff = append(diff, diffRes...) for i, content := range seq1[len(seq1)-end:] { - diff = append(diff, DiffRecord{content, Common, len(seq1)-end+i, len(seq2)-end+i}) + diff = append(diff, DiffRecord{content, Common, len(seq1) - end + i, len(seq2) - end + i}) } return } @@ -211,16 +212,248 @@ func compute(seq1, seq2 []string, startLine int) (diff []DiffRecord) { i, j := len(seq1), len(seq2) for i > 0 || j > 0 { if i > 0 && matrix[i][j] == matrix[i-1][j] { - diff = append(diff, DiffRecord{seq1[len(seq1)-i], LeftOnly, startLine+len(seq1)-i, startLine+len(seq2)-j}) + diff = append(diff, DiffRecord{seq1[len(seq1)-i], LeftOnly, startLine + len(seq1) - i, startLine + len(seq2) - j}) i-- } else if j > 0 && matrix[i][j] == matrix[i][j-1] { - diff = append(diff, DiffRecord{seq2[len(seq2)-j], RightOnly, startLine+len(seq1)-i, startLine+len(seq2)-j}) + diff = append(diff, DiffRecord{seq2[len(seq2)-j], RightOnly, startLine + len(seq1) - i, startLine + len(seq2) - j}) j-- } else if i > 0 && j > 0 { - diff = append(diff, DiffRecord{seq1[len(seq1)-i], Common, startLine+len(seq1)-i, startLine+len(seq2)-j}) + diff = append(diff, DiffRecord{seq1[len(seq1)-i], Common, startLine + len(seq1) - i, startLine + len(seq2) - j}) i-- j-- } } return } + +// A pair is a pair of values tracked for both the x and y side of a diff. +// It is typically a pair of line indexes. +type pair struct{ x, y int } + +// Diff returns an anchored diff of the two texts old and new +// in the “unified diff” format. If old and new are identical, +// Diff returns a nil slice (no output). +// +// Unix diff implementations typically look for a diff with +// the smallest number of lines inserted and removed, +// which can in the worst case take time quadratic in the +// number of lines in the texts. As a result, many implementations +// either can be made to run for a long time or cut off the search +// after a predetermined amount of work. +// +// In contrast, this implementation looks for a diff with the +// smallest number of “unique” lines inserted and removed, +// where unique means a line that appears just once in both old and new. +// We call this an “anchored diff” because the unique lines anchor +// the chosen matching regions. An anchored diff is usually clearer +// than a standard diff, because the algorithm does not try to +// reuse unrelated blank lines or closing braces. +// The algorithm also guarantees to run in O(n log n) time +// instead of the standard O(n²) time. +// +// Some systems call this approach a “patience diff,” named for +// the “patience sorting” algorithm, itself named for a solitaire card game. +// We avoid that name for two reasons. First, the name has been used +// for a few different variants of the algorithm, so it is imprecise. +// Second, the name is frequently interpreted as meaning that you have +// to wait longer (to be patient) for the diff, meaning that it is a slower algorithm, +// when in fact the algorithm is faster than the standard one. +func AnchoredDiff(seq1, seq2 []string) []DiffRecord { + diff := []DiffRecord{} + equalDiff := []DiffRecord{} + + // Loop over matches to consider, + // expanding each match to include surrounding lines, + // and then printing diff chunks. + // To avoid setup/teardown cases outside the loop, + // tgs returns a leading {0,0} and trailing {len(x), len(y)} pair + // in the sequence of matches. + var ( + done pair // printed up to x[:done.x] and y[:done.y] + chunk pair // start lines of current chunk + count pair // number of lines from each side in current chunk + ctext []struct{} // lines for current chunk + ) + for _, m := range tgs(seq1, seq2) { + if m.x < done.x { + // Already handled scanning forward from earlier match. + continue + } + + // Expand matching lines as far possible, + // establishing that x[start.x:end.x] == y[start.y:end.y]. + // Note that on the first (or last) iteration we may (or definitely do) + // have an empty match: start.x==end.x and start.y==end.y. + start := m + for start.x > done.x && start.y > done.y && seq1[start.x-1] == seq2[start.y-1] { + start.x-- + start.y-- + } + end := m + for end.x < len(seq1) && end.y < len(seq2) && seq1[end.x] == seq2[end.y] { + equalDiff = append(equalDiff, DiffRecord{seq1[end.x], Common, end.x, end.y}) + end.x++ + end.y++ + } + + // If both sequences are identical, then add 'common' diff for all lines + if start.x == 0 && start.y == 0 && end.x == len(seq1) && end.y == len(seq2) { + diff = append(diff, equalDiff...) + } + + // Emit the mismatched lines before start into this chunk. + // (No effect on first sentinel iteration, when start = {0,0}.) + for _, s := range seq1[done.x:start.x] { + diff = append(diff, DiffRecord{s, LeftOnly, chunk.x + count.x, chunk.y + count.y}) + ctext = append(ctext, struct{}{}) + count.x++ + } + for _, s := range seq2[done.y:start.y] { + diff = append(diff, DiffRecord{s, RightOnly, chunk.x + count.x, chunk.y + count.y}) + ctext = append(ctext, struct{}{}) + count.y++ + } + + // If we're not at EOF and have too few common lines, + // the chunk includes all the common lines and continues. + const C = 30 // maximum number of context lines + if (end.x < len(seq1) || end.y < len(seq2)) && + (end.x-start.x < C || (len(ctext) > 0 && end.x-start.x < 2*C)) { + for _, s := range seq1[start.x:end.x] { + ctext = append(ctext, struct{}{}) + diff = append(diff, DiffRecord{s, Common, chunk.x + count.x, chunk.y + count.y}) + count.x++ + count.y++ + } + done = end + continue + } + + // End chunk with common lines for context. + if len(ctext) > 0 { + n := end.x - start.x + if n > C { + n = C + } + for _, s := range seq1[start.x : start.x+n] { + ctext = append(ctext, struct{}{}) + diff = append(diff, DiffRecord{s, Common, chunk.x + count.x, chunk.y + count.y}) + count.x++ + count.y++ + } + done = pair{start.x + n, start.y + n} + + // Format and emit chunk. + // Convert line numbers to 1-indexed. + // Special case: empty file shows up as 0,0 not 1,0. + if count.x > 0 { + chunk.x++ + } + if count.y > 0 { + chunk.y++ + } + count.x = 0 + count.y = 0 + ctext = ctext[:0] + } + + // If we reached EOF, we're done. + if end.x >= len(seq1) && end.y >= len(seq2) { + break + } + + // Otherwise start a new chunk. + chunk = pair{end.x - C, end.y - C} + for _, s := range seq1[chunk.x:end.x] { + ctext = append(ctext, struct{}{}) + diff = append(diff, DiffRecord{s, Common, chunk.x + count.x, chunk.y + count.y}) + count.x++ + count.y++ + } + done = end + } + + return diff +} + +// tgs returns the pairs of indexes of the longest common subsequence +// of unique lines in x and y, where a unique line is one that appears +// once in x and once in y. +// +// The longest common subsequence algorithm is as described in +// Thomas G. Szymanski, “A Special Case of the Maximal Common +// Subsequence Problem,” Princeton TR #170 (January 1975), +// available at https://research.swtch.com/tgs170.pdf. +func tgs(x, y []string) []pair { + // Count the number of times each string appears in a and b. + // We only care about 0, 1, many, counted as 0, -1, -2 + // for the x side and 0, -4, -8 for the y side. + // Using negative numbers now lets us distinguish positive line numbers later. + m := make(map[string]int) + for _, s := range x { + if c := m[s]; c > -2 { + m[s] = c - 1 + } + } + for _, s := range y { + if c := m[s]; c > -8 { + m[s] = c - 4 + } + } + + // Now unique strings can be identified by m[s] = -1+-4. + // + // Gather the indexes of those strings in x and y, building: + // xi[i] = increasing indexes of unique strings in x. + // yi[i] = increasing indexes of unique strings in y. + // inv[i] = index j such that x[xi[i]] = y[yi[j]]. + var xi, yi, inv []int + for i, s := range y { + if m[s] == -1+-4 { + m[s] = len(yi) + yi = append(yi, i) + } + } + for i, s := range x { + if j, ok := m[s]; ok && j >= 0 { + xi = append(xi, i) + inv = append(inv, j) + } + } + + // Apply Algorithm A from Szymanski's paper. + // In those terms, A = J = inv and B = [0, n). + // We add sentinel pairs {0,0}, and {len(x),len(y)} + // to the returned sequence, to help the processing loop. + J := inv + n := len(xi) + T := make([]int, n) + L := make([]int, n) + for i := range T { + T[i] = n + 1 + } + for i := 0; i < n; i++ { + k := sort.Search(n, func(k int) bool { + return T[k] >= J[i] + }) + T[k] = J[i] + L[i] = k + 1 + } + k := 0 + for _, v := range L { + if k < v { + k = v + } + } + seq := make([]pair, 2+k) + seq[1+k] = pair{len(x), len(y)} // sentinel at end + lastj := n + for i := n - 1; i >= 0; i-- { + if L[i] == k && J[i] < lastj { + seq[k] = pair{xi[i], yi[J[i]]} + k-- + } + } + seq[0] = pair{0, 0} // sentinel at start + return seq +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 5951c85fb..08cea1c27 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -80,7 +80,7 @@ github.com/josharian/intern # github.com/json-iterator/go v1.1.12 ## explicit; go 1.12 github.com/json-iterator/go -# github.com/k14s/difflib v0.0.0-20201117154628-0c031775bf57 +# github.com/k14s/difflib v0.0.0-20240118055029-596a7a5585c3 ## explicit github.com/k14s/difflib # github.com/k14s/starlark-go v0.0.0-20200720175618-3a5c849cc368