From d6f4312f0a8db90e458b573c8920b888f27d3dec Mon Sep 17 00:00:00 2001 From: Ismail Khoffi Date: Fri, 12 Feb 2021 17:01:45 +0100 Subject: [PATCH 1/3] restructure code to expose the internal hasher: - expose NmtHasher API s.t. it can be used to construct and use NMT Hashsers externally (e.g. for IPLD multihashers) - Add two functions Sha256Namespace8FlaggedLeaf, Sha256Namespace8FlaggedInner whose signature matches regular hash functions that simplify the above even further --- hasher.go | 156 ++++++++++++++++++++++ internal/hasher_test.go => hasher_test.go | 2 +- internal/hasher.go | 121 ----------------- internal/nmt_hasher.go | 24 ---- nmt.go | 9 +- nmt_test.go | 10 -- proof.go | 8 +- 7 files changed, 162 insertions(+), 168 deletions(-) create mode 100644 hasher.go rename internal/hasher_test.go => hasher_test.go (99%) delete mode 100644 internal/hasher.go delete mode 100644 internal/nmt_hasher.go diff --git a/hasher.go b/hasher.go new file mode 100644 index 0000000..6976265 --- /dev/null +++ b/hasher.go @@ -0,0 +1,156 @@ +package nmt + +import ( + "bytes" + "crypto/sha256" + "hash" + + "github.com/lazyledger/nmt/namespace" +) + +const ( + LeafPrefix = 0 + NodePrefix = 1 + + DefaultNamespaceIDLen = 8 +) + +// defaultHasher uses sha256 as a base-hasher, 8 bytes +// for the namespace IDs and ignores the maximum possible namespace. +var defaultHasher = NewNmtHasher(sha256.New(), DefaultNamespaceIDLen, true) + +// Sha256Namespace8FlaggedLeaf uses sha256 as a base-hasher, 8 bytes +// for the namespace IDs and ignores the maximum possible namespace. +// +// Sha256Namespace8FlaggedLeaf(namespacedData) results in: +// ns(rawData) || ns(rawData) || sha256(LeafPrefix || rawData), +// where rawData is the leaf's data minus the namespace.ID prefix +// (namely namespacedData[NamespaceLen:]). +// +// Note that for the input len(namespacedData) >= DefaultNamespaceIDLen has to hold. +// If the input does not fulfil this, we will panic. +// The output will be of length 2*DefaultNamespaceIDLen+sha256.Size = 48 bytes. +func Sha256Namespace8FlaggedLeaf(namespacedData []byte) []byte { + return defaultHasher.HashLeaf(namespacedData) +} + +// Sha256Namespace8FlaggedInner hashes inner nodes to: +// minNID || maxNID || sha256(NodePrefix || leftRight), where leftRight consists of the full +// left and right child node bytes, including their respective min and max namespace IDs. +// Hence, the input has to be of size: +// 48 = 32 + 8 + 8 = sha256.Size + 2*DefaultNamespaceIDLen bytes. +// If the input does not fulfil this, we will panic. +// The output will also be of length 2*DefaultNamespaceIDLen+sha256.Size = 48 bytes. +func Sha256Namespace8FlaggedInner(leftRight []byte) []byte { + const flagLen = DefaultNamespaceIDLen * 2 + sha256Len := defaultHasher.Size() + left := leftRight[flagLen:+sha256Len] + right := leftRight[flagLen+sha256Len:] + + return defaultHasher.HashNode(left, right) +} + +type Hasher struct { + hash.Hash + NamespaceLen namespace.IDSize + + ignoreMaxNs bool + precomputedMaxNs namespace.ID +} + +func (n *Hasher) IsMaxNamespaceIDIgnored() bool { + return n.ignoreMaxNs +} + +func (n *Hasher) NamespaceSize() namespace.IDSize { + return n.NamespaceLen +} + +func NewNmtHasher(baseHasher hash.Hash, nidLen namespace.IDSize, ignoreMaxNamespace bool) *Hasher { + return &Hasher{ + Hash: baseHasher, + NamespaceLen: nidLen, + ignoreMaxNs: ignoreMaxNamespace, + precomputedMaxNs: bytes.Repeat([]byte{0xFF}, int(nidLen)), + } +} + +func (n *Hasher) EmptyRoot() []byte { + emptyNs := bytes.Repeat([]byte{0}, int(n.NamespaceLen)) + h := n.Sum(nil) + digest := append(append(emptyNs, emptyNs...), h...) + + return digest +} + +// HashLeaf hashes leaves to: +// ns(rawData) || ns(rawData) || hash(leafPrefix || rawData), where raw data is the leaf's +// data minus the namespaceID (namely leaf[NamespaceLen:]). +// Hence, the input length has to be greater or equal to the +// size of the underlying namespace.ID. +// +//Note that for leaves minNs = maxNs = ns(leaf) = leaf[:NamespaceLen]. +//nolint:errcheck +func (n *Hasher) HashLeaf(leaf []byte) []byte { + h := n.Hash + h.Reset() + + nID := leaf[:n.NamespaceLen] + data := leaf[n.NamespaceLen:] + res := append(append(make([]byte, 0), nID...), nID...) + data = append([]byte{LeafPrefix}, data...) + h.Write(data) + return h.Sum(res) +} + +// HashNode hashes inner nodes to: +// minNID || maxNID || hash(NodePrefix || left || right), where left and right are the full +// left and right child node bytes, including their respective min and max namespace IDs: +// left = left.Min() || left.Max() || l.Hash(). +func (n *Hasher) HashNode(l, r []byte) []byte { + h := n.Hash + h.Reset() + + // the actual hash result of the children got extended (or flagged) by their + // children's minNs || maxNs; hence the flagLen = 2 * NamespaceLen: + flagLen := 2 * n.NamespaceLen + leftMinNs, leftMaxNs := l[:n.NamespaceLen], l[n.NamespaceLen:flagLen] + rightMinNs, rightMaxNs := r[:n.NamespaceLen], r[n.NamespaceLen:flagLen] + + minNs := min(leftMinNs, rightMinNs) + var maxNs []byte + if n.ignoreMaxNs && n.precomputedMaxNs.Equal(leftMinNs) { + maxNs = n.precomputedMaxNs + } else if n.ignoreMaxNs && n.precomputedMaxNs.Equal(rightMinNs) { + maxNs = leftMaxNs + } else { + maxNs = max(leftMaxNs, rightMaxNs) + } + + res := append(append(make([]byte, 0), minNs...), maxNs...) + + // Note this seems a little faster than calling several Write()s on the + // underlying Hash function (see: https://github.com/google/trillian/pull/1503): + data := append(append(append( + make([]byte, 0, 1+len(l)+len(r)), + NodePrefix), + l...), + r...) + //nolint:errcheck + h.Write(data) + return h.Sum(res) +} + +func max(ns []byte, ns2 []byte) []byte { + if bytes.Compare(ns, ns2) >= 0 { + return ns + } + return ns2 +} + +func min(ns []byte, ns2 []byte) []byte { + if bytes.Compare(ns, ns2) <= 0 { + return ns + } + return ns2 +} diff --git a/internal/hasher_test.go b/hasher_test.go similarity index 99% rename from internal/hasher_test.go rename to hasher_test.go index e324f9e..3b1fe9c 100644 --- a/internal/hasher_test.go +++ b/hasher_test.go @@ -1,4 +1,4 @@ -package internal +package nmt import ( "crypto" diff --git a/internal/hasher.go b/internal/hasher.go deleted file mode 100644 index 0476662..0000000 --- a/internal/hasher.go +++ /dev/null @@ -1,121 +0,0 @@ -package internal - -import ( - "bytes" - "hash" - - "github.com/lazyledger/nmt/namespace" -) - -const ( - LeafPrefix = 0 - NodePrefix = 1 -) - -type DefaultNmtHasher struct { - hash.Hash - NamespaceLen namespace.IDSize - - ignoreMaxNs bool - cachedMaxNs namespace.ID -} - -func (n *DefaultNmtHasher) IsMaxNamespaceIDIgnored() bool { - return n.ignoreMaxNs -} - -func (n *DefaultNmtHasher) NamespaceSize() namespace.IDSize { - return n.NamespaceLen -} - -func NewNmtHasher(baseHasher hash.Hash, nidLen namespace.IDSize, ignoreMaxNamespace bool) *DefaultNmtHasher { - return &DefaultNmtHasher{ - Hash: baseHasher, - NamespaceLen: nidLen, - ignoreMaxNs: ignoreMaxNamespace, - cachedMaxNs: bytes.Repeat([]byte{0xFF}, int(nidLen)), - } -} - -func (n *DefaultNmtHasher) EmptyRoot() []byte { - emptyNs := bytes.Repeat([]byte{0}, int(n.NamespaceLen)) - h := n.Sum(nil) - digest := append(append(emptyNs, emptyNs...), h...) - - return digest -} - -// HashLeaf hashes leaves to: -// ns(rawData) || ns(rawData) || hash(leafPrefix || rawData), where raw data is the leaf's -// data minus the namespaceID (namely leaf[NamespaceLen:]). -// Note that here minNs = maxNs = ns(leaf) = leaf[:NamespaceLen]. -//nolint:errcheck -func (n *DefaultNmtHasher) HashLeaf(leaf []byte) []byte { - h := n.Hash - h.Reset() - // TODO this makes assumptions on how the passed in leaf data looks like - // instead, this should use the Data / NamespaceID methods: - // nID := leaf.NamespaceID() - // data := leaf.Data() - // But this will requires further refactoring. - // It does not matter too much right now as all this is hidden - // in the internal package. - nID := leaf[:n.NamespaceLen] - data := leaf[n.NamespaceLen:] - res := append(append(make([]byte, 0), nID...), nID...) - data = append([]byte{LeafPrefix}, data...) - h.Write(data) - return h.Sum(res) -} - -// HashNode hashes inner nodes to: -// minNID || maxNID || hash(NodePrefix || left || right), where left and right are the full -// left and right child node bytes, including their respective min and max namespace IDs: -// left = left.Min() || left.Max() || l.Hash(). -func (n *DefaultNmtHasher) HashNode(l, r []byte) []byte { - h := n.Hash - h.Reset() - - // the actual hash result of the children got extended (or flagged) by their - // children's minNs || maxNs; hence the flagLen = 2 * NamespaceLen: - flagLen := 2 * n.NamespaceLen - leftMinNs, leftMaxNs := l[:n.NamespaceLen], l[n.NamespaceLen:flagLen] - rightMinNs, rightMaxNs := r[:n.NamespaceLen], r[n.NamespaceLen:flagLen] - - minNs := min(leftMinNs, rightMinNs) - var maxNs []byte - if n.ignoreMaxNs && n.cachedMaxNs.Equal(leftMinNs) { - maxNs = n.cachedMaxNs - } else if n.ignoreMaxNs && n.cachedMaxNs.Equal(rightMinNs) { - maxNs = leftMaxNs - } else { - maxNs = max(leftMaxNs, rightMaxNs) - } - - res := append(append(make([]byte, 0), minNs...), maxNs...) - - // Note this seems a little faster than calling several Write()s on the - // underlying Hash function (see: https://github.com/google/trillian/pull/1503): - data := append(append(append( - make([]byte, 0, 1+len(l)+len(r)), - NodePrefix), - l...), - r...) - //nolint:errcheck - h.Write(data) - return h.Sum(res) -} - -func max(ns []byte, ns2 []byte) []byte { - if bytes.Compare(ns, ns2) >= 0 { - return ns - } - return ns2 -} - -func min(ns []byte, ns2 []byte) []byte { - if bytes.Compare(ns, ns2) <= 0 { - return ns - } - return ns2 -} diff --git a/internal/nmt_hasher.go b/internal/nmt_hasher.go deleted file mode 100644 index 91be267..0000000 --- a/internal/nmt_hasher.go +++ /dev/null @@ -1,24 +0,0 @@ -package internal - -import "github.com/lazyledger/nmt/namespace" - -// NmtHasher provides the functions needed to compute an NMT. -// NmtHasher provides the functions needed to compute an NMT. -type NmtHasher interface { - // EmptyRoot returns the namespaced root for a no-leaves Namespaced Merkle - // tree. - EmptyRoot() []byte - - // HashLeaf defines how a leaf is hashed. - HashLeaf(leaf []byte) []byte - - // HashNode defines how a inner node is hashed. - HashNode(l, r []byte) []byte - - // Size returns the size of the underlying hasher. - Size() int - // Return the size of the namespace - NamespaceSize() namespace.IDSize - // Returns if the NmtHasher ignores the max namespace. - IsMaxNamespaceIDIgnored() bool -} diff --git a/nmt.go b/nmt.go index 1744923..14a513c 100644 --- a/nmt.go +++ b/nmt.go @@ -13,11 +13,6 @@ import ( "github.com/lazyledger/nmt/namespace" ) -const ( - LeafPrefix = internal.LeafPrefix - NodePrefix = internal.NodePrefix -) - var ( ErrMismatchedNamespaceSize = errors.New("mismatching namespace sizes") ErrInvalidPushOrder = errors.New("pushed data has to be lexicographically ordered by namespace IDs") @@ -74,7 +69,7 @@ func NodeVisitor(nodeVisitorFn NodeVisitorFn) Option { } type NamespacedMerkleTree struct { - treeHasher internal.NmtHasher + treeHasher *Hasher visit NodeVisitorFn // just cache stuff until we pass in a store and keep all nodes in there @@ -106,7 +101,7 @@ func New(h hash.Hash, setters ...Option) *NamespacedMerkleTree { for _, setter := range setters { setter(opts) } - treeHasher := internal.NewNmtHasher(h, opts.NamespaceIDSize, opts.IgnoreMaxNamespace) + treeHasher := NewNmtHasher(h, opts.NamespaceIDSize, opts.IgnoreMaxNamespace) return &NamespacedMerkleTree{ treeHasher: treeHasher, visit: opts.NodeVisitor, diff --git a/nmt_test.go b/nmt_test.go index 355b514..3920e0a 100644 --- a/nmt_test.go +++ b/nmt_test.go @@ -662,16 +662,6 @@ func repeat(data []namespace.PrefixedData, num int) []namespace.PrefixedData { return res } -func sum(hash crypto.Hash, data ...[]byte) []byte { - h := hash.New() - for _, d := range data { - //nolint:errcheck - h.Write(d) - } - - return h.Sum(nil) -} - func generateRandNamespacedRawData(total int, nidSize int, leafSize int) [][]byte { data := make([][]byte, total) for i := 0; i < total; i++ { diff --git a/proof.go b/proof.go index 79f1a4e..921d3a5 100644 --- a/proof.go +++ b/proof.go @@ -7,8 +7,6 @@ import ( "math/bits" "github.com/lazyledger/merkletree" - - "github.com/lazyledger/nmt/internal" "github.com/lazyledger/nmt/namespace" ) @@ -99,7 +97,7 @@ func NewAbsenceProof(proofStart, proofEnd int, proofNodes [][]byte, leafHash []b // the provided data in the tree. Additionally, it verifies that the namespace // is complete and no leaf of that namespace was left out in the proof. func (proof Proof) VerifyNamespace(h hash.Hash, nID namespace.ID, data [][]byte, root namespace.IntervalDigest) bool { - nth := internal.NewNmtHasher(h, nID.Size(), proof.isMaxNamespaceIDIgnored) + nth := NewNmtHasher(h, nID.Size(), proof.isMaxNamespaceIDIgnored) if nID.Size() != root.Min().Size() || nID.Size() != root.Max().Size() { // conflicting namespace sizes return false @@ -139,7 +137,7 @@ func (proof Proof) VerifyNamespace(h hash.Hash, nID namespace.ID, data [][]byte, return proof.verifyLeafHashes(nth, true, nID, gotLeafHashes, root) } -func (proof Proof) verifyLeafHashes(nth internal.NmtHasher, verifyCompleteness bool, nID namespace.ID, gotLeafHashes [][]byte, root namespace.IntervalDigest) bool { +func (proof Proof) verifyLeafHashes(nth *Hasher, verifyCompleteness bool, nID namespace.ID, gotLeafHashes [][]byte, root namespace.IntervalDigest) bool { // The code below is almost identical to NebulousLabs' // merkletree.VerifyMultiRangeProof. // @@ -207,7 +205,7 @@ func (proof Proof) verifyLeafHashes(nth internal.NmtHasher, verifyCompleteness b } func (proof Proof) VerifyInclusion(h hash.Hash, nid namespace.ID, data []byte, root namespace.IntervalDigest) bool { - nth := internal.NewNmtHasher(h, nid.Size(), proof.isMaxNamespaceIDIgnored) + nth := NewNmtHasher(h, nid.Size(), proof.isMaxNamespaceIDIgnored) leafData := append(nid, data...) return proof.verifyLeafHashes(nth, false, nid, [][]byte{nth.HashLeaf(leafData)}, root) } From 6cb03eeda1732e1ee8add1d17e4032646f34be11 Mon Sep 17 00:00:00 2001 From: Ismail Khoffi Date: Sat, 13 Feb 2021 21:58:48 +0100 Subject: [PATCH 2/3] fix bug/typo and minor doc improvements --- hasher.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hasher.go b/hasher.go index 6976265..9a4aa9a 100644 --- a/hasher.go +++ b/hasher.go @@ -27,7 +27,10 @@ var defaultHasher = NewNmtHasher(sha256.New(), DefaultNamespaceIDLen, true) // where rawData is the leaf's data minus the namespace.ID prefix // (namely namespacedData[NamespaceLen:]). // -// Note that for the input len(namespacedData) >= DefaultNamespaceIDLen has to hold. +// Note that different from other cryptographic hash functions, this here +// makes assumptions on the input: +// len(namespacedData) >= DefaultNamespaceIDLen has to hold, +// as the first DefaultNamespaceIDLen bytes are interpreted as the namespace ID). // If the input does not fulfil this, we will panic. // The output will be of length 2*DefaultNamespaceIDLen+sha256.Size = 48 bytes. func Sha256Namespace8FlaggedLeaf(namespacedData []byte) []byte { @@ -44,7 +47,7 @@ func Sha256Namespace8FlaggedLeaf(namespacedData []byte) []byte { func Sha256Namespace8FlaggedInner(leftRight []byte) []byte { const flagLen = DefaultNamespaceIDLen * 2 sha256Len := defaultHasher.Size() - left := leftRight[flagLen:+sha256Len] + left := leftRight[:flagLen+sha256Len] right := leftRight[flagLen+sha256Len:] return defaultHasher.HashNode(left, right) From b2f848efad53bcf7b85a5de2254ac51cfdba30a0 Mon Sep 17 00:00:00 2001 From: Ismail Khoffi Date: Sat, 13 Feb 2021 23:31:26 +0100 Subject: [PATCH 3/3] Some sanity checks as tests --- hasher_test.go | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/hasher_test.go b/hasher_test.go index 3b1fe9c..37c93a5 100644 --- a/hasher_test.go +++ b/hasher_test.go @@ -99,6 +99,57 @@ func Test_namespacedTreeHasher_HashNode(t *testing.T) { } } +func TestSha256Namespace8FlaggedLeaf(t *testing.T) { + tests := []struct { + name string + data []byte + wantPanic bool + wantLen int + }{ + {"input too short: panic", []byte("smaller"), true, 0}, + {"input 8 byte: Ok", []byte("8bytesss"), false, 48}, + {"input greater 8 byte: Ok", []byte("8bytesssSomeNotSoRandData"), false, 48}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantPanic { + shouldPanic(t, func() { + Sha256Namespace8FlaggedLeaf(tt.data) + }) + } else if got := Sha256Namespace8FlaggedLeaf(tt.data); len(got) != tt.wantLen { + t.Errorf("len(Sha256Namespace8FlaggedLeaf()) = %v, want %v", got, tt.wantLen) + } + }) + } +} + +func TestSha256Namespace8FlaggedInner(t *testing.T) { + nilHash := sha256.Sum256(nil) + nid1 := []byte("nid01234") + nid2 := []byte("nid12345") + tests := []struct { + name string + data []byte + wantPanic bool + wantLen int + }{ + {"input smaller 48: panic", []byte("smaller48"), true, 0}, + {"input still too small: panic", append(append(nid1, nid2...), []byte("data")...), true, 0}, + {"valid input: ok", append(append(append(nid1, nilHash[:]...), nid2...), nilHash[:]...), false, 48}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantPanic { + shouldPanic(t, func() { + Sha256Namespace8FlaggedInner(tt.data) + }) + } else if got := Sha256Namespace8FlaggedInner(tt.data); len(got) != tt.wantLen { + t.Errorf("len(Sha256Namespace8FlaggedLeaf()) = %v, want %v", got, tt.wantLen) + } + }) + } +} + func sum(hash crypto.Hash, data ...[]byte) []byte { h := hash.New() for _, d := range data {