From 0b8bd0890dddcd26b601904a762eb1988b600282 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 13:18:21 +0000 Subject: [PATCH] Use xxhash instead of sha256 for hashing AST nodes (#6192) (#6252) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Conflicts: # internal/pkg/agent/transpiler/ast.go (cherry picked from commit 9c1311073b0b3fd3b725dd4e30082573d0169280) # Conflicts: # internal/pkg/agent/transpiler/ast.go # Conflicts: # internal/pkg/agent/transpiler/ast.go Co-authored-by: Mikołaj Świątek --- NOTICE.txt | 64 ++++++------- ...1733238171-use-xxhash-for-ast-hashing.yaml | 32 +++++++ go.mod | 2 +- internal/pkg/agent/transpiler/ast.go | 93 ++++++++++++++++++- internal/pkg/agent/transpiler/utils.go | 9 +- 5 files changed, 162 insertions(+), 38 deletions(-) create mode 100644 changelog/fragments/1733238171-use-xxhash-for-ast-hashing.yaml diff --git a/NOTICE.txt b/NOTICE.txt index f8785d7cdfc..97179a781f8 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -204,6 +204,38 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +-------------------------------------------------------------------------------- +Dependency : github.com/cespare/xxhash/v2 +Version: v2.3.0 +Licence type (autodetected): MIT +-------------------------------------------------------------------------------- + +Contents of probable licence file $GOMODCACHE/github.com/cespare/xxhash/v2@v2.3.0/LICENSE.txt: + +Copyright (c) 2016 Caleb Spare + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------------- Dependency : github.com/docker/docker Version: v27.3.1+incompatible @@ -33560,38 +33592,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --------------------------------------------------------------------------------- -Dependency : github.com/cespare/xxhash/v2 -Version: v2.3.0 -Licence type (autodetected): MIT --------------------------------------------------------------------------------- - -Contents of probable licence file $GOMODCACHE/github.com/cespare/xxhash/v2@v2.3.0/LICENSE.txt: - -Copyright (c) 2016 Caleb Spare - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -------------------------------------------------------------------------------- Dependency : github.com/chai2010/gettext-go Version: v1.0.2 diff --git a/changelog/fragments/1733238171-use-xxhash-for-ast-hashing.yaml b/changelog/fragments/1733238171-use-xxhash-for-ast-hashing.yaml new file mode 100644 index 00000000000..a6655e8cda6 --- /dev/null +++ b/changelog/fragments/1733238171-use-xxhash-for-ast-hashing.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: enhancement + +# Change summary; a 80ish characters long description of the change. +summary: Use xxHash for hashing AST nodes + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: elastic-agent + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: https://github.com/owner/repo/1234 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: https://github.com/owner/repo/1234 diff --git a/go.mod b/go.mod index d78cba71635..d98f4eb4c0f 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/blakesmith/ar v0.0.0-20150311145944-8bd4349a67f2 github.com/cavaliergopher/rpm v1.2.0 github.com/cenkalti/backoff/v4 v4.3.0 + github.com/cespare/xxhash/v2 v2.3.0 github.com/docker/docker v27.3.1+incompatible github.com/docker/go-units v0.5.0 github.com/dolmen-go/contextio v0.0.0-20200217195037-68fc5150bcd5 @@ -221,7 +222,6 @@ require ( github.com/aws/smithy-go v1.20.4 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bmatcuk/doublestar/v4 v4.7.1 // indirect - github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/chai2010/gettext-go v1.0.2 // indirect github.com/cloudfoundry-community/go-cfclient v0.0.0-20190808214049-35bcce23fc5f // indirect github.com/cloudfoundry/noaa v2.1.0+incompatible // indirect diff --git a/internal/pkg/agent/transpiler/ast.go b/internal/pkg/agent/transpiler/ast.go index 341c63ebc48..f792a27d572 100644 --- a/internal/pkg/agent/transpiler/ast.go +++ b/internal/pkg/agent/transpiler/ast.go @@ -5,7 +5,6 @@ package transpiler import ( - "bytes" "crypto/sha256" "encoding/base64" "fmt" @@ -14,6 +13,8 @@ import ( "strconv" "strings" + "github.com/cespare/xxhash/v2" + "github.com/elastic/elastic-agent/internal/pkg/eql" ) @@ -58,6 +59,9 @@ type Node interface { // Hash compute a sha256 hash of the current node and recursively call any children. Hash() []byte + // Hash64With recursively computes the given hash for the Node and its children + Hash64With(h *xxhash.Digest) error + // Apply apply the current vars, returning the new value for the node. This does not modify the original Node. Apply(*Vars) (Node, error) @@ -163,6 +167,16 @@ func (d *Dict) Hash() []byte { return h.Sum(nil) } +// Hash64With recursively computes the given hash for the Node and its children +func (d *Dict) Hash64With(h *xxhash.Digest) error { + for _, v := range d.value { + if err := v.Hash64With(h); err != nil { + return err + } + } + return nil +} + // Apply applies the vars to all the nodes in the dictionary. This does not modify the original dictionary. func (d *Dict) Apply(vars *Vars) (Node, error) { nodes := make([]Node, 0, len(d.value)) @@ -279,6 +293,17 @@ func (k *Key) Hash() []byte { return h.Sum(nil) } +// Hash64With recursively computes the given hash for the Node and its children +func (k *Key) Hash64With(h *xxhash.Digest) error { + if _, err := h.WriteString(k.name); err != nil { + return err + } + if k.value != nil { + return k.value.Hash64With(h) + } + return nil +} + // Apply applies the vars to the value. This does not modify the original node. func (k *Key) Apply(vars *Vars) (Node, error) { if k.value == nil { @@ -361,6 +386,16 @@ func (l *List) Hash() []byte { return h.Sum(nil) } +// Hash64With recursively computes the given hash for the Node and its children +func (l *List) Hash64With(h *xxhash.Digest) error { + for _, v := range l.value { + if err := v.Hash64With(h); err != nil { + return err + } + } + return nil +} + // Find takes an index and return the values at that index. func (l *List) Find(idx string) (Node, bool) { i, err := strconv.Atoi(idx) @@ -481,6 +516,12 @@ func (s *StrVal) Hash() []byte { return []byte(s.value) } +// Hash64With recursively computes the given hash for the Node and its children +func (s *StrVal) Hash64With(h *xxhash.Digest) error { + _, err := h.WriteString(s.value) + return err +} + // Apply applies the vars to the string value. This does not modify the original string. func (s *StrVal) Apply(vars *Vars) (Node, error) { return vars.Replace(s.value) @@ -542,6 +583,12 @@ func (s *IntVal) Hash() []byte { return []byte(s.String()) } +// Hash64With recursively computes the given hash for the Node and its children +func (s *IntVal) Hash64With(h *xxhash.Digest) error { + _, err := h.WriteString(s.String()) + return err +} + // Processors returns any linked processors that are now connected because of Apply. func (s *IntVal) Processors() Processors { return s.processors @@ -593,6 +640,12 @@ func (s *UIntVal) Hash() []byte { return []byte(s.String()) } +// Hash64With recursively computes the given hash for the Node and its children +func (s *UIntVal) Hash64With(h *xxhash.Digest) error { + _, err := h.WriteString(s.String()) + return err +} + // Apply does nothing. func (s *UIntVal) Apply(_ *Vars) (Node, error) { return s, nil @@ -647,7 +700,18 @@ func (s *FloatVal) ShallowClone() Node { // Hash return a string representation of the value, we try to return the minimal precision we can. func (s *FloatVal) Hash() []byte { - return []byte(strconv.FormatFloat(s.value, 'f', -1, 64)) + return []byte(s.hashString()) +} + +// Hash64With recursively computes the given hash for the Node and its children +func (s *FloatVal) Hash64With(h *xxhash.Digest) error { + _, err := h.WriteString(s.hashString()) + return err +} + +// hashString returns a string representation of s suitable for hashing. +func (s *FloatVal) hashString() string { + return strconv.FormatFloat(s.value, 'f', -1, 64) } // Apply does nothing. @@ -712,6 +776,18 @@ func (s *BoolVal) Hash() []byte { return falseVal } +// Hash64With recursively computes the given hash for the Node and its children +func (s *BoolVal) Hash64With(h *xxhash.Digest) error { + var encodedBool []byte + if s.value { + encodedBool = trueVal + } else { + encodedBool = falseVal + } + _, err := h.Write(encodedBool) + return err +} + // Apply does nothing. func (s *BoolVal) Apply(_ *Vars) (Node, error) { return s, nil @@ -827,6 +903,11 @@ func (a *AST) Hash() []byte { return a.root.Hash() } +// Hash64With recursively computes the given hash for the Node and its children +func (a *AST) Hash64With(h *xxhash.Digest) error { + return a.root.Hash64With(h) +} + // HashStr return the calculated hash as a base64 url encoded string. func (a *AST) HashStr() string { return base64.URLEncoding.EncodeToString(a.root.Hash()) @@ -837,7 +918,13 @@ func (a *AST) Equal(other *AST) bool { if a.root == nil || other.root == nil { return a.root == other.root } - return bytes.Equal(a.Hash(), other.Hash()) + hasher := xxhash.New() + _ = a.Hash64With(hasher) + thisHash := hasher.Sum64() + hasher.Reset() + _ = other.Hash64With(hasher) + otherHash := hasher.Sum64() + return thisHash == otherHash } // Lookup looks for a value from the AST. diff --git a/internal/pkg/agent/transpiler/utils.go b/internal/pkg/agent/transpiler/utils.go index e726fd996a5..15edd3bf1ae 100644 --- a/internal/pkg/agent/transpiler/utils.go +++ b/internal/pkg/agent/transpiler/utils.go @@ -7,6 +7,8 @@ package transpiler import ( "errors" "fmt" + + "github.com/cespare/xxhash/v2" ) const ( @@ -23,7 +25,8 @@ func RenderInputs(inputs Node, varsArray []*Vars) (Node, error) { return nil, fmt.Errorf("inputs must be an array") } var nodes []varIDMap - nodesMap := map[string]*Dict{} + nodesMap := map[uint64]*Dict{} + hasher := xxhash.New() for _, vars := range varsArray { for _, node := range l.Value().([]Node) { dict, ok := node.(*Dict) @@ -56,7 +59,9 @@ func RenderInputs(inputs Node, varsArray []*Vars) (Node, error) { continue } } - hash := string(dict.Hash()) + hasher.Reset() + _ = dict.Hash64With(hasher) + hash := hasher.Sum64() _, exists := nodesMap[hash] if !exists { nodesMap[hash] = dict