From d6f4312f0a8db90e458b573c8920b888f27d3dec Mon Sep 17 00:00:00 2001
From: Ismail Khoffi <Ismail.Khoffi@gmail.com>
Date: Fri, 12 Feb 2021 17:01:45 +0100
Subject: [PATCH 1/3] restructure code to expose the internal hasher:

 - expose NmtHasher API s.t. it can be used to construct and use NMT Hashsers externally (e.g. for IPLD multihashers)
 - Add two functions Sha256Namespace8FlaggedLeaf, Sha256Namespace8FlaggedInner whose signature matches regular hash functions that simplify the above even further
---
 hasher.go                                 | 156 ++++++++++++++++++++++
 internal/hasher_test.go => hasher_test.go |   2 +-
 internal/hasher.go                        | 121 -----------------
 internal/nmt_hasher.go                    |  24 ----
 nmt.go                                    |   9 +-
 nmt_test.go                               |  10 --
 proof.go                                  |   8 +-
 7 files changed, 162 insertions(+), 168 deletions(-)
 create mode 100644 hasher.go
 rename internal/hasher_test.go => hasher_test.go (99%)
 delete mode 100644 internal/hasher.go
 delete mode 100644 internal/nmt_hasher.go

diff --git a/hasher.go b/hasher.go
new file mode 100644
index 0000000..6976265
--- /dev/null
+++ b/hasher.go
@@ -0,0 +1,156 @@
+package nmt
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"hash"
+
+	"github.com/lazyledger/nmt/namespace"
+)
+
+const (
+	LeafPrefix = 0
+	NodePrefix = 1
+
+	DefaultNamespaceIDLen = 8
+)
+
+// defaultHasher uses sha256 as a base-hasher, 8 bytes
+// for the namespace IDs and ignores the maximum possible namespace.
+var defaultHasher = NewNmtHasher(sha256.New(), DefaultNamespaceIDLen, true)
+
+// Sha256Namespace8FlaggedLeaf uses sha256 as a base-hasher, 8 bytes
+// for the namespace IDs and ignores the maximum possible namespace.
+//
+// Sha256Namespace8FlaggedLeaf(namespacedData) results in:
+// ns(rawData) || ns(rawData) || sha256(LeafPrefix || rawData),
+// where rawData is the leaf's data minus the namespace.ID prefix
+// (namely namespacedData[NamespaceLen:]).
+//
+// Note that for the input len(namespacedData) >= DefaultNamespaceIDLen has to hold.
+// If the input does not fulfil this, we will panic.
+// The output will be of length 2*DefaultNamespaceIDLen+sha256.Size = 48 bytes.
+func Sha256Namespace8FlaggedLeaf(namespacedData []byte) []byte {
+	return defaultHasher.HashLeaf(namespacedData)
+}
+
+// Sha256Namespace8FlaggedInner hashes inner nodes to:
+// minNID || maxNID || sha256(NodePrefix || leftRight), where leftRight consists of the full
+// left and right child node bytes, including their respective min and max namespace IDs.
+// Hence, the input has to be of size:
+// 48 = 32 + 8 + 8  = sha256.Size + 2*DefaultNamespaceIDLen bytes.
+// If the input does not fulfil this, we will panic.
+// The output will also be of length 2*DefaultNamespaceIDLen+sha256.Size = 48 bytes.
+func Sha256Namespace8FlaggedInner(leftRight []byte) []byte {
+	const flagLen = DefaultNamespaceIDLen * 2
+	sha256Len := defaultHasher.Size()
+	left := leftRight[flagLen:+sha256Len]
+	right := leftRight[flagLen+sha256Len:]
+
+	return defaultHasher.HashNode(left, right)
+}
+
+type Hasher struct {
+	hash.Hash
+	NamespaceLen namespace.IDSize
+
+	ignoreMaxNs      bool
+	precomputedMaxNs namespace.ID
+}
+
+func (n *Hasher) IsMaxNamespaceIDIgnored() bool {
+	return n.ignoreMaxNs
+}
+
+func (n *Hasher) NamespaceSize() namespace.IDSize {
+	return n.NamespaceLen
+}
+
+func NewNmtHasher(baseHasher hash.Hash, nidLen namespace.IDSize, ignoreMaxNamespace bool) *Hasher {
+	return &Hasher{
+		Hash:             baseHasher,
+		NamespaceLen:     nidLen,
+		ignoreMaxNs:      ignoreMaxNamespace,
+		precomputedMaxNs: bytes.Repeat([]byte{0xFF}, int(nidLen)),
+	}
+}
+
+func (n *Hasher) EmptyRoot() []byte {
+	emptyNs := bytes.Repeat([]byte{0}, int(n.NamespaceLen))
+	h := n.Sum(nil)
+	digest := append(append(emptyNs, emptyNs...), h...)
+
+	return digest
+}
+
+// HashLeaf hashes leaves to:
+// ns(rawData) || ns(rawData) || hash(leafPrefix || rawData), where raw data is the leaf's
+// data minus the namespaceID (namely leaf[NamespaceLen:]).
+// Hence, the input length has to be greater or equal to the
+// size of the underlying namespace.ID.
+//
+//Note that for leaves minNs = maxNs = ns(leaf) = leaf[:NamespaceLen].
+//nolint:errcheck
+func (n *Hasher) HashLeaf(leaf []byte) []byte {
+	h := n.Hash
+	h.Reset()
+
+	nID := leaf[:n.NamespaceLen]
+	data := leaf[n.NamespaceLen:]
+	res := append(append(make([]byte, 0), nID...), nID...)
+	data = append([]byte{LeafPrefix}, data...)
+	h.Write(data)
+	return h.Sum(res)
+}
+
+// HashNode hashes inner nodes to:
+// minNID || maxNID || hash(NodePrefix || left || right), where left and right are the full
+// left and right child node bytes, including their respective min and max namespace IDs:
+// left = left.Min() || left.Max() || l.Hash().
+func (n *Hasher) HashNode(l, r []byte) []byte {
+	h := n.Hash
+	h.Reset()
+
+	// the actual hash result of the children got extended (or flagged) by their
+	// children's minNs || maxNs; hence the flagLen = 2 * NamespaceLen:
+	flagLen := 2 * n.NamespaceLen
+	leftMinNs, leftMaxNs := l[:n.NamespaceLen], l[n.NamespaceLen:flagLen]
+	rightMinNs, rightMaxNs := r[:n.NamespaceLen], r[n.NamespaceLen:flagLen]
+
+	minNs := min(leftMinNs, rightMinNs)
+	var maxNs []byte
+	if n.ignoreMaxNs && n.precomputedMaxNs.Equal(leftMinNs) {
+		maxNs = n.precomputedMaxNs
+	} else if n.ignoreMaxNs && n.precomputedMaxNs.Equal(rightMinNs) {
+		maxNs = leftMaxNs
+	} else {
+		maxNs = max(leftMaxNs, rightMaxNs)
+	}
+
+	res := append(append(make([]byte, 0), minNs...), maxNs...)
+
+	// Note this seems a little faster than calling several Write()s on the
+	// underlying Hash function (see: https://github.com/google/trillian/pull/1503):
+	data := append(append(append(
+		make([]byte, 0, 1+len(l)+len(r)),
+		NodePrefix),
+		l...),
+		r...)
+	//nolint:errcheck
+	h.Write(data)
+	return h.Sum(res)
+}
+
+func max(ns []byte, ns2 []byte) []byte {
+	if bytes.Compare(ns, ns2) >= 0 {
+		return ns
+	}
+	return ns2
+}
+
+func min(ns []byte, ns2 []byte) []byte {
+	if bytes.Compare(ns, ns2) <= 0 {
+		return ns
+	}
+	return ns2
+}
diff --git a/internal/hasher_test.go b/hasher_test.go
similarity index 99%
rename from internal/hasher_test.go
rename to hasher_test.go
index e324f9e..3b1fe9c 100644
--- a/internal/hasher_test.go
+++ b/hasher_test.go
@@ -1,4 +1,4 @@
-package internal
+package nmt
 
 import (
 	"crypto"
diff --git a/internal/hasher.go b/internal/hasher.go
deleted file mode 100644
index 0476662..0000000
--- a/internal/hasher.go
+++ /dev/null
@@ -1,121 +0,0 @@
-package internal
-
-import (
-	"bytes"
-	"hash"
-
-	"github.com/lazyledger/nmt/namespace"
-)
-
-const (
-	LeafPrefix = 0
-	NodePrefix = 1
-)
-
-type DefaultNmtHasher struct {
-	hash.Hash
-	NamespaceLen namespace.IDSize
-
-	ignoreMaxNs bool
-	cachedMaxNs namespace.ID
-}
-
-func (n *DefaultNmtHasher) IsMaxNamespaceIDIgnored() bool {
-	return n.ignoreMaxNs
-}
-
-func (n *DefaultNmtHasher) NamespaceSize() namespace.IDSize {
-	return n.NamespaceLen
-}
-
-func NewNmtHasher(baseHasher hash.Hash, nidLen namespace.IDSize, ignoreMaxNamespace bool) *DefaultNmtHasher {
-	return &DefaultNmtHasher{
-		Hash:         baseHasher,
-		NamespaceLen: nidLen,
-		ignoreMaxNs:  ignoreMaxNamespace,
-		cachedMaxNs:  bytes.Repeat([]byte{0xFF}, int(nidLen)),
-	}
-}
-
-func (n *DefaultNmtHasher) EmptyRoot() []byte {
-	emptyNs := bytes.Repeat([]byte{0}, int(n.NamespaceLen))
-	h := n.Sum(nil)
-	digest := append(append(emptyNs, emptyNs...), h...)
-
-	return digest
-}
-
-// HashLeaf hashes leaves to:
-// ns(rawData) || ns(rawData) || hash(leafPrefix || rawData), where raw data is the leaf's
-// data minus the namespaceID (namely leaf[NamespaceLen:]).
-// Note that here minNs = maxNs = ns(leaf) = leaf[:NamespaceLen].
-//nolint:errcheck
-func (n *DefaultNmtHasher) HashLeaf(leaf []byte) []byte {
-	h := n.Hash
-	h.Reset()
-	// TODO this makes assumptions on how the passed in leaf data looks like
-	// instead, this should use the Data / NamespaceID methods:
-	// nID := leaf.NamespaceID()
-	// data := leaf.Data()
-	// But this will requires further refactoring.
-	// It does not matter too much right now as all this is hidden
-	// in the internal package.
-	nID := leaf[:n.NamespaceLen]
-	data := leaf[n.NamespaceLen:]
-	res := append(append(make([]byte, 0), nID...), nID...)
-	data = append([]byte{LeafPrefix}, data...)
-	h.Write(data)
-	return h.Sum(res)
-}
-
-// HashNode hashes inner nodes to:
-// minNID || maxNID || hash(NodePrefix || left || right), where left and right are the full
-// left and right child node bytes, including their respective min and max namespace IDs:
-// left = left.Min() || left.Max() || l.Hash().
-func (n *DefaultNmtHasher) HashNode(l, r []byte) []byte {
-	h := n.Hash
-	h.Reset()
-
-	// the actual hash result of the children got extended (or flagged) by their
-	// children's minNs || maxNs; hence the flagLen = 2 * NamespaceLen:
-	flagLen := 2 * n.NamespaceLen
-	leftMinNs, leftMaxNs := l[:n.NamespaceLen], l[n.NamespaceLen:flagLen]
-	rightMinNs, rightMaxNs := r[:n.NamespaceLen], r[n.NamespaceLen:flagLen]
-
-	minNs := min(leftMinNs, rightMinNs)
-	var maxNs []byte
-	if n.ignoreMaxNs && n.cachedMaxNs.Equal(leftMinNs) {
-		maxNs = n.cachedMaxNs
-	} else if n.ignoreMaxNs && n.cachedMaxNs.Equal(rightMinNs) {
-		maxNs = leftMaxNs
-	} else {
-		maxNs = max(leftMaxNs, rightMaxNs)
-	}
-
-	res := append(append(make([]byte, 0), minNs...), maxNs...)
-
-	// Note this seems a little faster than calling several Write()s on the
-	// underlying Hash function (see: https://github.com/google/trillian/pull/1503):
-	data := append(append(append(
-		make([]byte, 0, 1+len(l)+len(r)),
-		NodePrefix),
-		l...),
-		r...)
-	//nolint:errcheck
-	h.Write(data)
-	return h.Sum(res)
-}
-
-func max(ns []byte, ns2 []byte) []byte {
-	if bytes.Compare(ns, ns2) >= 0 {
-		return ns
-	}
-	return ns2
-}
-
-func min(ns []byte, ns2 []byte) []byte {
-	if bytes.Compare(ns, ns2) <= 0 {
-		return ns
-	}
-	return ns2
-}
diff --git a/internal/nmt_hasher.go b/internal/nmt_hasher.go
deleted file mode 100644
index 91be267..0000000
--- a/internal/nmt_hasher.go
+++ /dev/null
@@ -1,24 +0,0 @@
-package internal
-
-import "github.com/lazyledger/nmt/namespace"
-
-// NmtHasher provides the functions needed to compute an NMT.
-// NmtHasher provides the functions needed to compute an NMT.
-type NmtHasher interface {
-	// EmptyRoot returns the namespaced root for a no-leaves Namespaced Merkle
-	// tree.
-	EmptyRoot() []byte
-
-	// HashLeaf defines how a leaf is hashed.
-	HashLeaf(leaf []byte) []byte
-
-	// HashNode defines how a inner node is hashed.
-	HashNode(l, r []byte) []byte
-
-	// Size returns the size of the underlying hasher.
-	Size() int
-	// Return the size of the namespace
-	NamespaceSize() namespace.IDSize
-	// Returns if the NmtHasher ignores the max namespace.
-	IsMaxNamespaceIDIgnored() bool
-}
diff --git a/nmt.go b/nmt.go
index 1744923..14a513c 100644
--- a/nmt.go
+++ b/nmt.go
@@ -13,11 +13,6 @@ import (
 	"github.com/lazyledger/nmt/namespace"
 )
 
-const (
-	LeafPrefix = internal.LeafPrefix
-	NodePrefix = internal.NodePrefix
-)
-
 var (
 	ErrMismatchedNamespaceSize = errors.New("mismatching namespace sizes")
 	ErrInvalidPushOrder        = errors.New("pushed data has to be lexicographically ordered by namespace IDs")
@@ -74,7 +69,7 @@ func NodeVisitor(nodeVisitorFn NodeVisitorFn) Option {
 }
 
 type NamespacedMerkleTree struct {
-	treeHasher internal.NmtHasher
+	treeHasher *Hasher
 	visit      NodeVisitorFn
 
 	// just cache stuff until we pass in a store and keep all nodes in there
@@ -106,7 +101,7 @@ func New(h hash.Hash, setters ...Option) *NamespacedMerkleTree {
 	for _, setter := range setters {
 		setter(opts)
 	}
-	treeHasher := internal.NewNmtHasher(h, opts.NamespaceIDSize, opts.IgnoreMaxNamespace)
+	treeHasher := NewNmtHasher(h, opts.NamespaceIDSize, opts.IgnoreMaxNamespace)
 	return &NamespacedMerkleTree{
 		treeHasher:      treeHasher,
 		visit:           opts.NodeVisitor,
diff --git a/nmt_test.go b/nmt_test.go
index 355b514..3920e0a 100644
--- a/nmt_test.go
+++ b/nmt_test.go
@@ -662,16 +662,6 @@ func repeat(data []namespace.PrefixedData, num int) []namespace.PrefixedData {
 	return res
 }
 
-func sum(hash crypto.Hash, data ...[]byte) []byte {
-	h := hash.New()
-	for _, d := range data {
-		//nolint:errcheck
-		h.Write(d)
-	}
-
-	return h.Sum(nil)
-}
-
 func generateRandNamespacedRawData(total int, nidSize int, leafSize int) [][]byte {
 	data := make([][]byte, total)
 	for i := 0; i < total; i++ {
diff --git a/proof.go b/proof.go
index 79f1a4e..921d3a5 100644
--- a/proof.go
+++ b/proof.go
@@ -7,8 +7,6 @@ import (
 	"math/bits"
 
 	"github.com/lazyledger/merkletree"
-
-	"github.com/lazyledger/nmt/internal"
 	"github.com/lazyledger/nmt/namespace"
 )
 
@@ -99,7 +97,7 @@ func NewAbsenceProof(proofStart, proofEnd int, proofNodes [][]byte, leafHash []b
 // the provided data in the tree. Additionally, it verifies that the namespace
 // is complete and no leaf of that namespace was left out in the proof.
 func (proof Proof) VerifyNamespace(h hash.Hash, nID namespace.ID, data [][]byte, root namespace.IntervalDigest) bool {
-	nth := internal.NewNmtHasher(h, nID.Size(), proof.isMaxNamespaceIDIgnored)
+	nth := NewNmtHasher(h, nID.Size(), proof.isMaxNamespaceIDIgnored)
 	if nID.Size() != root.Min().Size() || nID.Size() != root.Max().Size() {
 		// conflicting namespace sizes
 		return false
@@ -139,7 +137,7 @@ func (proof Proof) VerifyNamespace(h hash.Hash, nID namespace.ID, data [][]byte,
 	return proof.verifyLeafHashes(nth, true, nID, gotLeafHashes, root)
 }
 
-func (proof Proof) verifyLeafHashes(nth internal.NmtHasher, verifyCompleteness bool, nID namespace.ID, gotLeafHashes [][]byte, root namespace.IntervalDigest) bool {
+func (proof Proof) verifyLeafHashes(nth *Hasher, verifyCompleteness bool, nID namespace.ID, gotLeafHashes [][]byte, root namespace.IntervalDigest) bool {
 	// The code below is almost identical to NebulousLabs'
 	// merkletree.VerifyMultiRangeProof.
 	//
@@ -207,7 +205,7 @@ func (proof Proof) verifyLeafHashes(nth internal.NmtHasher, verifyCompleteness b
 }
 
 func (proof Proof) VerifyInclusion(h hash.Hash, nid namespace.ID, data []byte, root namespace.IntervalDigest) bool {
-	nth := internal.NewNmtHasher(h, nid.Size(), proof.isMaxNamespaceIDIgnored)
+	nth := NewNmtHasher(h, nid.Size(), proof.isMaxNamespaceIDIgnored)
 	leafData := append(nid, data...)
 	return proof.verifyLeafHashes(nth, false, nid, [][]byte{nth.HashLeaf(leafData)}, root)
 }

From 6cb03eeda1732e1ee8add1d17e4032646f34be11 Mon Sep 17 00:00:00 2001
From: Ismail Khoffi <Ismail.Khoffi@gmail.com>
Date: Sat, 13 Feb 2021 21:58:48 +0100
Subject: [PATCH 2/3] fix bug/typo and minor doc improvements

---
 hasher.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/hasher.go b/hasher.go
index 6976265..9a4aa9a 100644
--- a/hasher.go
+++ b/hasher.go
@@ -27,7 +27,10 @@ var defaultHasher = NewNmtHasher(sha256.New(), DefaultNamespaceIDLen, true)
 // where rawData is the leaf's data minus the namespace.ID prefix
 // (namely namespacedData[NamespaceLen:]).
 //
-// Note that for the input len(namespacedData) >= DefaultNamespaceIDLen has to hold.
+// Note that different from other cryptographic hash functions, this here
+// makes assumptions on the input:
+// len(namespacedData) >= DefaultNamespaceIDLen has to hold,
+// as the first DefaultNamespaceIDLen bytes are interpreted as the namespace ID).
 // If the input does not fulfil this, we will panic.
 // The output will be of length 2*DefaultNamespaceIDLen+sha256.Size = 48 bytes.
 func Sha256Namespace8FlaggedLeaf(namespacedData []byte) []byte {
@@ -44,7 +47,7 @@ func Sha256Namespace8FlaggedLeaf(namespacedData []byte) []byte {
 func Sha256Namespace8FlaggedInner(leftRight []byte) []byte {
 	const flagLen = DefaultNamespaceIDLen * 2
 	sha256Len := defaultHasher.Size()
-	left := leftRight[flagLen:+sha256Len]
+	left := leftRight[:flagLen+sha256Len]
 	right := leftRight[flagLen+sha256Len:]
 
 	return defaultHasher.HashNode(left, right)

From b2f848efad53bcf7b85a5de2254ac51cfdba30a0 Mon Sep 17 00:00:00 2001
From: Ismail Khoffi <Ismail.Khoffi@gmail.com>
Date: Sat, 13 Feb 2021 23:31:26 +0100
Subject: [PATCH 3/3] Some sanity checks as tests

---
 hasher_test.go | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/hasher_test.go b/hasher_test.go
index 3b1fe9c..37c93a5 100644
--- a/hasher_test.go
+++ b/hasher_test.go
@@ -99,6 +99,57 @@ func Test_namespacedTreeHasher_HashNode(t *testing.T) {
 	}
 }
 
+func TestSha256Namespace8FlaggedLeaf(t *testing.T) {
+	tests := []struct {
+		name      string
+		data      []byte
+		wantPanic bool
+		wantLen   int
+	}{
+		{"input too short: panic", []byte("smaller"), true, 0},
+		{"input 8 byte: Ok", []byte("8bytesss"), false, 48},
+		{"input greater 8 byte: Ok", []byte("8bytesssSomeNotSoRandData"), false, 48},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if tt.wantPanic {
+				shouldPanic(t, func() {
+					Sha256Namespace8FlaggedLeaf(tt.data)
+				})
+			} else if got := Sha256Namespace8FlaggedLeaf(tt.data); len(got) != tt.wantLen {
+				t.Errorf("len(Sha256Namespace8FlaggedLeaf()) = %v, want %v", got, tt.wantLen)
+			}
+		})
+	}
+}
+
+func TestSha256Namespace8FlaggedInner(t *testing.T) {
+	nilHash := sha256.Sum256(nil)
+	nid1 := []byte("nid01234")
+	nid2 := []byte("nid12345")
+	tests := []struct {
+		name      string
+		data      []byte
+		wantPanic bool
+		wantLen   int
+	}{
+		{"input smaller 48: panic", []byte("smaller48"), true, 0},
+		{"input still too small: panic", append(append(nid1, nid2...), []byte("data")...), true, 0},
+		{"valid input: ok", append(append(append(nid1, nilHash[:]...), nid2...), nilHash[:]...), false, 48},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if tt.wantPanic {
+				shouldPanic(t, func() {
+					Sha256Namespace8FlaggedInner(tt.data)
+				})
+			} else if got := Sha256Namespace8FlaggedInner(tt.data); len(got) != tt.wantLen {
+				t.Errorf("len(Sha256Namespace8FlaggedLeaf()) = %v, want %v", got, tt.wantLen)
+			}
+		})
+	}
+}
+
 func sum(hash crypto.Hash, data ...[]byte) []byte {
 	h := hash.New()
 	for _, d := range data {