From 0648f4157d3bce9c5a9c3e0793c32f12a4f4223f Mon Sep 17 00:00:00 2001
From: Ismail Khoffi <Ismail.Khoffi@gmail.com>
Date: Wed, 4 Nov 2020 18:13:20 +0100
Subject: [PATCH 1/7] Basic DA functionality  (#83)

* move Messages field to the end of Block.Data

* Add some constants for share computation and the NMT:

 - also a bunch of todos regarding shares computation

* First (compiling) stab on creating shares

* Test with Evidence and fix bug discovered by test

* remove resolved todos

* introduce split method

* Introduce LenDelimitedMarshaler interface and some reformatting

* Introduce TxLenDelimitedMarshaler

* add some test cases

* fix some comments

* fix some comments & linter

* Add reserved namespaces to params

* Move ll-specific consts into a separate file (consts.go)

* Add MarshalDelimited to HexBytes

* Add tail-padding shares

* Add ComputeShares method on Data to compute all shares

* Fix compute the next square num and not the next power of two

* lints

* Unexport MakeShares function:

- it's likely to change and it doesn't have to be part of the public API

* lints 2

* First stab on computing row/column roots

* fix rebase glitches:
 - move DA related constants out of params.go

* refactor MakeBlock to take in interm. state roots and messages

* refactor state.MakeBlock too

* Add todos LenDelimitedMarshaler and extract appendShares logic

* Simplify shares computation: remove LenDelimitedMarshaler abstraction

* actually use DA header to compute the DataRoot everywhere (will lead to failing tests for sure)

* WIP: Update block related core data structures in protobuf too

* WIP: fix zero shares edge-case and get rid of Block.Data.hash (use dataAvailabilityHeader.Hash() instead)

* Fixed tests, only 3 failing tests to go: TestReapMaxBytesMaxGas, TestTxFilter, TestMempoolFilters

* Fix TestTxFilter:

 - the size of the wrapping Data{} proto message increased a few bytes

* Fix Message proto and `DataFromProto`

* Fix last 2 remaining tests related to the increased block/block.Data size

* Use infectious lib instead of leopard

* proto-lint: snake_case

* some lints and minor changes

* linter

* panic if pushing to tree fails, extend Data.ToProto()

* revert renaming in comment

* add todo about refactoring as soon as the rsmt2d allows the user to choose the merkle tree
---
 internal/consensus/replay_test.go | 54 +++++++++++++++++++++++++++++++
 state/helpers_test.go             |  4 +++
 state/tx_filter_test.go           |  2 +-
 types/block_test.go               | 12 +++----
 4 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/internal/consensus/replay_test.go b/internal/consensus/replay_test.go
index 4d1c9c6b26..b8b1f74522 100644
--- a/internal/consensus/replay_test.go
+++ b/internal/consensus/replay_test.go
@@ -1001,6 +1001,60 @@ func TestHandshakePanicsIfAppReturnsWrongAppHash(t *testing.T) {
 	}
 }
 
+func makeBlocks(n int, state *sm.State, privVal types.PrivValidator) []*types.Block {
+	blocks := make([]*types.Block, 0)
+
+	var (
+		prevBlock     *types.Block
+		prevBlockMeta *types.BlockMeta
+	)
+
+	appHeight := byte(0x01)
+	for i := 0; i < n; i++ {
+		height := int64(i + 1)
+
+		block, parts := makeBlock(*state, prevBlock, prevBlockMeta, privVal, height)
+		blocks = append(blocks, block)
+
+		prevBlock = block
+		prevBlockMeta = types.NewBlockMeta(block, parts)
+
+		// update state
+		state.AppHash = []byte{appHeight}
+		appHeight++
+		state.LastBlockHeight = height
+	}
+
+	return blocks
+}
+
+func makeBlock(state sm.State, lastBlock *types.Block, lastBlockMeta *types.BlockMeta,
+	privVal types.PrivValidator, height int64) (*types.Block, *types.PartSet) {
+
+	lastCommit := types.NewCommit(height-1, 0, types.BlockID{}, nil)
+	if height > 1 {
+		vote, _ := types.MakeVote(
+			lastBlock.Header.Height,
+			lastBlockMeta.BlockID,
+			state.Validators,
+			privVal,
+			lastBlock.Header.ChainID,
+			time.Now())
+		lastCommit = types.NewCommit(vote.Height, vote.Round,
+			lastBlockMeta.BlockID, []types.CommitSig{vote.CommitSig()})
+	}
+
+	return state.MakeBlock(
+		height,
+		[]types.Tx{},
+		nil,
+		nil,
+		nil,
+		lastCommit,
+		state.Validators.GetProposer().Address,
+	)
+}
+
 type badApp struct {
 	abci.BaseApplication
 	numBlocks           byte
diff --git a/state/helpers_test.go b/state/helpers_test.go
index d28982b2c0..92b19379ba 100644
--- a/state/helpers_test.go
+++ b/state/helpers_test.go
@@ -22,6 +22,10 @@ import (
 	"github.com/tendermint/tendermint/types"
 )
 
+const (
+	nTxsPerBlock = 10
+)
+
 type paramsChangeTestCase struct {
 	height int64
 	params types.ConsensusParams
diff --git a/state/tx_filter_test.go b/state/tx_filter_test.go
index c79ee1241e..4092fcbc2c 100644
--- a/state/tx_filter_test.go
+++ b/state/tx_filter_test.go
@@ -13,7 +13,7 @@ import (
 
 func TestTxFilter(t *testing.T) {
 	genDoc := randomGenesisDoc()
-	genDoc.ConsensusParams.Block.MaxBytes = 3000
+	genDoc.ConsensusParams.Block.MaxBytes = 3040
 	genDoc.ConsensusParams.Evidence.MaxBytes = 1500
 
 	// Max size of Txs is much smaller than size of block,
diff --git a/types/block_test.go b/types/block_test.go
index d02f374d71..371730b5d0 100644
--- a/types/block_test.go
+++ b/types/block_test.go
@@ -460,10 +460,10 @@ func TestBlockMaxDataBytes(t *testing.T) {
 		0: {-10, 1, 0, true, 0},
 		1: {10, 1, 0, true, 0},
 		2: {841, 1, 0, true, 0},
-		3: {842, 1, 0, false, 0},
-		4: {843, 1, 0, false, 1},
-		5: {954, 2, 0, false, 1},
-		6: {1053, 2, 100, false, 0},
+		3: {845, 1, 0, false, 0},
+		4: {846, 1, 0, false, 1},
+		5: {956, 2, 0, false, 1},
+		6: {1056, 2, 100, false, 0},
 	}
 
 	for i, tc := range testCases {
@@ -491,8 +491,8 @@ func TestBlockMaxDataBytesNoEvidence(t *testing.T) {
 		0: {-10, 1, true, 0},
 		1: {10, 1, true, 0},
 		2: {841, 1, true, 0},
-		3: {842, 1, false, 0},
-		4: {843, 1, false, 1},
+		3: {845, 1, false, 0},
+		4: {846, 1, false, 1},
 	}
 
 	for i, tc := range testCases {

From c093b422f5561e9dc78fe4a1de4d0829e1f65435 Mon Sep 17 00:00:00 2001
From: evan-forbes <evan.samuel.forbes@gmail.com>
Date: Mon, 20 Sep 2021 18:26:35 -0500
Subject: [PATCH 2/7] clean up some unused test helper functions

---
 internal/consensus/replay_test.go | 54 -------------------------------
 state/tx_filter_test.go           |  2 +-
 2 files changed, 1 insertion(+), 55 deletions(-)

diff --git a/internal/consensus/replay_test.go b/internal/consensus/replay_test.go
index b8b1f74522..4d1c9c6b26 100644
--- a/internal/consensus/replay_test.go
+++ b/internal/consensus/replay_test.go
@@ -1001,60 +1001,6 @@ func TestHandshakePanicsIfAppReturnsWrongAppHash(t *testing.T) {
 	}
 }
 
-func makeBlocks(n int, state *sm.State, privVal types.PrivValidator) []*types.Block {
-	blocks := make([]*types.Block, 0)
-
-	var (
-		prevBlock     *types.Block
-		prevBlockMeta *types.BlockMeta
-	)
-
-	appHeight := byte(0x01)
-	for i := 0; i < n; i++ {
-		height := int64(i + 1)
-
-		block, parts := makeBlock(*state, prevBlock, prevBlockMeta, privVal, height)
-		blocks = append(blocks, block)
-
-		prevBlock = block
-		prevBlockMeta = types.NewBlockMeta(block, parts)
-
-		// update state
-		state.AppHash = []byte{appHeight}
-		appHeight++
-		state.LastBlockHeight = height
-	}
-
-	return blocks
-}
-
-func makeBlock(state sm.State, lastBlock *types.Block, lastBlockMeta *types.BlockMeta,
-	privVal types.PrivValidator, height int64) (*types.Block, *types.PartSet) {
-
-	lastCommit := types.NewCommit(height-1, 0, types.BlockID{}, nil)
-	if height > 1 {
-		vote, _ := types.MakeVote(
-			lastBlock.Header.Height,
-			lastBlockMeta.BlockID,
-			state.Validators,
-			privVal,
-			lastBlock.Header.ChainID,
-			time.Now())
-		lastCommit = types.NewCommit(vote.Height, vote.Round,
-			lastBlockMeta.BlockID, []types.CommitSig{vote.CommitSig()})
-	}
-
-	return state.MakeBlock(
-		height,
-		[]types.Tx{},
-		nil,
-		nil,
-		nil,
-		lastCommit,
-		state.Validators.GetProposer().Address,
-	)
-}
-
 type badApp struct {
 	abci.BaseApplication
 	numBlocks           byte
diff --git a/state/tx_filter_test.go b/state/tx_filter_test.go
index 4092fcbc2c..c79ee1241e 100644
--- a/state/tx_filter_test.go
+++ b/state/tx_filter_test.go
@@ -13,7 +13,7 @@ import (
 
 func TestTxFilter(t *testing.T) {
 	genDoc := randomGenesisDoc()
-	genDoc.ConsensusParams.Block.MaxBytes = 3040
+	genDoc.ConsensusParams.Block.MaxBytes = 3000
 	genDoc.ConsensusParams.Evidence.MaxBytes = 1500
 
 	// Max size of Txs is much smaller than size of block,

From a1e69d1e7aafa1c7223fd57de1f8c561738c2c71 Mon Sep 17 00:00:00 2001
From: evan-forbes <evan.samuel.forbes@gmail.com>
Date: Mon, 20 Sep 2021 18:30:39 -0500
Subject: [PATCH 3/7] linter

---
 state/helpers_test.go | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/state/helpers_test.go b/state/helpers_test.go
index 92b19379ba..d28982b2c0 100644
--- a/state/helpers_test.go
+++ b/state/helpers_test.go
@@ -22,10 +22,6 @@ import (
 	"github.com/tendermint/tendermint/types"
 )
 
-const (
-	nTxsPerBlock = 10
-)
-
 type paramsChangeTestCase struct {
 	height int64
 	params types.ConsensusParams

From cb58051a6bdb4f6866f8379ccf9c4cab7e78296f Mon Sep 17 00:00:00 2001
From: evan-forbes <evan.samuel.forbes@gmail.com>
Date: Mon, 20 Sep 2021 18:40:57 -0500
Subject: [PATCH 4/7] still debugging the exact right number of bytes for max
 data...

---
 types/block_test.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/types/block_test.go b/types/block_test.go
index 371730b5d0..d02f374d71 100644
--- a/types/block_test.go
+++ b/types/block_test.go
@@ -460,10 +460,10 @@ func TestBlockMaxDataBytes(t *testing.T) {
 		0: {-10, 1, 0, true, 0},
 		1: {10, 1, 0, true, 0},
 		2: {841, 1, 0, true, 0},
-		3: {845, 1, 0, false, 0},
-		4: {846, 1, 0, false, 1},
-		5: {956, 2, 0, false, 1},
-		6: {1056, 2, 100, false, 0},
+		3: {842, 1, 0, false, 0},
+		4: {843, 1, 0, false, 1},
+		5: {954, 2, 0, false, 1},
+		6: {1053, 2, 100, false, 0},
 	}
 
 	for i, tc := range testCases {
@@ -491,8 +491,8 @@ func TestBlockMaxDataBytesNoEvidence(t *testing.T) {
 		0: {-10, 1, true, 0},
 		1: {10, 1, true, 0},
 		2: {841, 1, true, 0},
-		3: {845, 1, false, 0},
-		4: {846, 1, false, 1},
+		3: {842, 1, false, 0},
+		4: {843, 1, false, 1},
 	}
 
 	for i, tc := range testCases {

From 801cfc8ec4bdcbf48759296da270838b01d25bd5 Mon Sep 17 00:00:00 2001
From: John Adler <adlerjohn@users.noreply.github.com>
Date: Thu, 25 Mar 2021 11:49:22 -0400
Subject: [PATCH 5/7] Implement spec-compliant share splitting (#246)

* Export block data compute shares.
* Refactor to use ShareSize constant directly.
* Change message splitting to prefix namespace ID.
* Implement chunking for contiguous.
* Add termination condition.
* Rename append contiguous to split contiguous.
* Update test for small tx.
* Add test for two contiguous.
* Make tx and msg adjusted share sizes exported constants.
* Panic on hopefully-unreachable condition instead of silently skipping.
* Update hardcoded response for block format.

Co-authored-by: Ismail Khoffi <Ismail.Khoffi@gmail.com>
---
 types/block.go       | 12 +++---
 types/shares.go      | 80 +++++++++++++++++++++++++++++++++------
 types/shares_test.go | 90 ++++++++++++++++++++++++++++++++------------
 types/tx.go          |  4 +-
 4 files changed, 142 insertions(+), 44 deletions(-)

diff --git a/types/block.go b/types/block.go
index 3736c3e1e6..f961fa34e2 100644
--- a/types/block.go
+++ b/types/block.go
@@ -1120,26 +1120,26 @@ type IntermediateStateRoots struct {
 	RawRootsList []tmbytes.HexBytes `json:"intermediate_roots"`
 }
 
-func (roots IntermediateStateRoots) splitIntoShares(shareSize int) NamespacedShares {
+func (roots IntermediateStateRoots) splitIntoShares() NamespacedShares {
 	shares := make([]NamespacedShare, 0)
 	for _, root := range roots.RawRootsList {
 		rawData, err := root.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("app returned intermediate state root that can not be encoded %#v", root))
 		}
-		shares = appendToShares(shares, consts.IntermediateStateRootsNamespaceID, rawData, shareSize)
+		shares = appendToShares(shares, consts.IntermediateStateRootsNamespaceID, rawData)
 	}
 	return shares
 }
 
-func (msgs Messages) splitIntoShares(shareSize int) NamespacedShares {
+func (msgs Messages) splitIntoShares() NamespacedShares {
 	shares := make([]NamespacedShare, 0)
 	for _, m := range msgs.MessagesList {
 		rawData, err := m.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("app accepted a Message that can not be encoded %#v", m))
 		}
-		shares = appendToShares(shares, m.NamespaceID, rawData, shareSize)
+		shares = appendToShares(shares, m.NamespaceID, rawData)
 	}
 	return shares
 }
@@ -1346,7 +1346,7 @@ func (data *EvidenceData) FromProto(eviData *tmproto.EvidenceList) error {
 	return nil
 }
 
-func (data *EvidenceData) splitIntoShares(shareSize int) NamespacedShares {
+func (data *EvidenceData) splitIntoShares() NamespacedShares {
 	shares := make([]NamespacedShare, 0)
 	for _, ev := range data.Evidence {
 		var rawData []byte
@@ -1367,7 +1367,7 @@ func (data *EvidenceData) splitIntoShares(shareSize int) NamespacedShares {
 		if err != nil {
 			panic(fmt.Sprintf("evidence included in evidence pool that can not be encoded %#v, err: %v", ev, err))
 		}
-		shares = appendToShares(shares, consts.EvidenceNamespaceID, rawData, shareSize)
+		shares = appendToShares(shares, consts.EvidenceNamespaceID, rawData)
 	}
 	return shares
 }
diff --git a/types/shares.go b/types/shares.go
index a88677334a..55238a1db0 100644
--- a/types/shares.go
+++ b/types/shares.go
@@ -59,28 +59,51 @@ func (m Message) MarshalDelimited() ([]byte, error) {
 	return append(lenBuf[:n], m.Data...), nil
 }
 
-func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte, shareSize int) []NamespacedShare {
-	if len(rawData) < shareSize {
-		rawShare := rawData
-		paddedShare := zeroPadIfNecessary(rawShare, shareSize)
+// appendToShares appends raw data as shares.
+// Used for messages.
+func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare {
+	if len(rawData) < consts.MsgShareSize {
+		rawShare := []byte(append(nid, rawData...))
+		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
+		share := NamespacedShare{paddedShare, nid}
+		shares = append(shares, share)
+	} else { // len(rawData) >= MsgShareSize
+		shares = append(shares, split(rawData, nid)...)
+	}
+	return shares
+}
+
+// splitContiguous splits multiple raw data contiguously as shares.
+// Used for transactions, intermediate state roots, and evidence.
+func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare {
+	shares := make([]NamespacedShare, 0)
+	// Index into the outer slice of rawDatas
+	outerIndex := 0
+	// Index into the inner slice of rawDatas
+	innerIndex := 0
+	for outerIndex < len(rawDatas) {
+		var rawData []byte
+		startIndex := 0
+		rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, consts.TxShareSize)
+		rawShare := []byte(append(append(nid, byte(startIndex)), rawData...))
+		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
 		share := NamespacedShare{paddedShare, nid}
 		shares = append(shares, share)
-	} else { // len(rawData) >= shareSize
-		shares = append(shares, split(rawData, shareSize, nid)...)
 	}
 	return shares
 }
 
 // TODO(ismail): implement corresponding merge method for clients requesting
 // shares for a particular namespace
-func split(rawData []byte, shareSize int, nid namespace.ID) []NamespacedShare {
+func split(rawData []byte, nid namespace.ID) []NamespacedShare {
 	shares := make([]NamespacedShare, 0)
-	firstRawShare := rawData[:shareSize]
+	firstRawShare := []byte(append(nid, rawData[:consts.MsgShareSize]...))
 	shares = append(shares, NamespacedShare{firstRawShare, nid})
-	rawData = rawData[shareSize:]
+	rawData = rawData[consts.MsgShareSize:]
 	for len(rawData) > 0 {
-		shareSizeOrLen := min(shareSize, len(rawData))
-		paddedShare := zeroPadIfNecessary(rawData[:shareSizeOrLen], shareSize)
+		shareSizeOrLen := min(consts.MsgShareSize, len(rawData))
+		rawShare := []byte(append(nid, rawData[:shareSizeOrLen]...))
+		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
 		share := NamespacedShare{paddedShare, nid}
 		shares = append(shares, share)
 		rawData = rawData[shareSizeOrLen:]
@@ -88,6 +111,41 @@ func split(rawData []byte, shareSize int, nid namespace.ID) []NamespacedShare {
 	return shares
 }
 
+// getNextChunk gets the next chunk for contiguous shares
+// Precondition: none of the slices in rawDatas is zero-length
+// This precondition should always hold at this point since zero-length txs are simply invalid.
+func getNextChunk(rawDatas [][]byte, outerIndex int, innerIndex int, width int) ([]byte, int, int, int) {
+	rawData := make([]byte, 0, width)
+	startIndex := 0
+	firstBytesToFetch := 0
+
+	curIndex := 0
+	for curIndex < width && outerIndex < len(rawDatas) {
+		bytesToFetch := min(len(rawDatas[outerIndex])-innerIndex, width-curIndex)
+		if bytesToFetch == 0 {
+			panic("zero-length contiguous share data is invalid")
+		}
+		if curIndex == 0 {
+			firstBytesToFetch = bytesToFetch
+		}
+		// If we've already placed some data in this chunk, that means
+		// a new data segment begins
+		if curIndex != 0 {
+			// Offset by the fixed reserved bytes at the beginning of the share
+			startIndex = firstBytesToFetch + consts.NamespaceSize + consts.ShareReservedBytes
+		}
+		rawData = append(rawData, rawDatas[outerIndex][innerIndex:innerIndex+bytesToFetch]...)
+		innerIndex += bytesToFetch
+		if innerIndex >= len(rawDatas[outerIndex]) {
+			innerIndex = 0
+			outerIndex++
+		}
+		curIndex += bytesToFetch
+	}
+
+	return rawData, outerIndex, innerIndex, startIndex
+}
+
 func GenerateTailPaddingShares(n int, shareWidth int) NamespacedShares {
 	shares := make([]NamespacedShare, n)
 	for i := 0; i < n; i++ {
diff --git a/types/shares_test.go b/types/shares_test.go
index a180401071..ee18db6863 100644
--- a/types/shares_test.go
+++ b/types/shares_test.go
@@ -11,13 +11,12 @@ import (
 )
 
 type splitter interface {
-	splitIntoShares(shareSize int) NamespacedShares
+	splitIntoShares() NamespacedShares
 }
 
 func TestMakeShares(t *testing.T) {
 	reservedTxNamespaceID := append(bytes.Repeat([]byte{0}, 7), 1)
 	reservedEvidenceNamespaceID := append(bytes.Repeat([]byte{0}, 7), 3)
-	// resveredIntermediateStateRootsNamespaceID := append(bytes.Repeat([]byte{0}, 7), 2)
 	val := NewMockPV()
 	blockID := makeBlockID([]byte("blockhash"), 1000, []byte("partshash"))
 	blockID2 := makeBlockID([]byte("blockhash2"), 1000, []byte("partshash"))
@@ -38,12 +37,11 @@ func TestMakeShares(t *testing.T) {
 	}
 	msg1Marshaled, _ := msg1.MarshalDelimited()
 	if err != nil {
-		t.Fatalf("Could not encode evidence: %v, error: %v", testEvidence, err)
+		t.Fatalf("Could not encode evidence: %v, error: %v\n", testEvidence, err)
 	}
 
 	type args struct {
-		data      splitter
-		shareSize int
+		data splitter
 	}
 	tests := []struct {
 		name string
@@ -55,50 +53,91 @@ func TestMakeShares(t *testing.T) {
 				data: &EvidenceData{
 					Evidence: []Evidence{testEvidence},
 				},
-				shareSize: consts.ShareSize,
 			}, NamespacedShares{NamespacedShare{
-				Share: testEvidenceBytes[:consts.ShareSize],
-				ID:    reservedEvidenceNamespaceID,
+				Share: append(
+					append(reservedEvidenceNamespaceID, byte(0)),
+					testEvidenceBytes[:consts.TxShareSize]...,
+				),
+				ID: reservedEvidenceNamespaceID,
 			}, NamespacedShare{
-				Share: zeroPadIfNecessary(testEvidenceBytes[consts.ShareSize:], consts.ShareSize),
-				ID:    reservedEvidenceNamespaceID,
+				Share: append(
+					append(reservedEvidenceNamespaceID, byte(0)),
+					zeroPadIfNecessary(testEvidenceBytes[consts.TxShareSize:], consts.TxShareSize)...,
+				),
+				ID: reservedEvidenceNamespaceID,
 			}},
 		},
 		{"small LL Tx",
 			args{
-				data:      Txs{smolTx},
-				shareSize: consts.ShareSize,
+				data: Txs{smolTx},
 			},
 			NamespacedShares{
 				NamespacedShare{
-					Share: zeroPadIfNecessary(smolTxLenDelimited, consts.ShareSize),
-					ID:    reservedTxNamespaceID,
+					Share: append(
+						append(reservedTxNamespaceID, byte(0)),
+						zeroPadIfNecessary(smolTxLenDelimited, consts.TxShareSize)...,
+					),
+					ID: reservedTxNamespaceID,
 				},
 			},
 		},
 		{"one large LL Tx",
 			args{
-				data:      Txs{largeTx},
-				shareSize: consts.ShareSize,
+				data: Txs{largeTx},
 			},
 			NamespacedShares{
 				NamespacedShare{
-					Share: Share(largeTxLenDelimited[:consts.ShareSize]),
-					ID:    reservedTxNamespaceID,
+					Share: append(
+						append(reservedTxNamespaceID, byte(0)),
+						largeTxLenDelimited[:consts.TxShareSize]...,
+					),
+					ID: reservedTxNamespaceID,
 				},
 				NamespacedShare{
-					Share: zeroPadIfNecessary(largeTxLenDelimited[consts.ShareSize:], consts.ShareSize),
-					ID:    reservedTxNamespaceID,
+					Share: append(
+						append(reservedTxNamespaceID, byte(0)),
+						zeroPadIfNecessary(largeTxLenDelimited[consts.TxShareSize:], consts.TxShareSize)...,
+					),
+					ID: reservedTxNamespaceID,
+				},
+			},
+		},
+		{"large then small LL Tx",
+			args{
+				data: Txs{largeTx, smolTx},
+			},
+			NamespacedShares{
+				NamespacedShare{
+					Share: append(
+						append(reservedTxNamespaceID, byte(0)),
+						largeTxLenDelimited[:consts.TxShareSize]...,
+					),
+					ID: reservedTxNamespaceID,
+				},
+				NamespacedShare{
+					Share: append(
+						append(reservedTxNamespaceID, byte(len(largeTxLenDelimited)-consts.TxShareSize+consts.NamespaceSize+consts.ShareReservedBytes)),
+						zeroPadIfNecessary(
+							append(largeTxLenDelimited[consts.TxShareSize:], smolTxLenDelimited...),
+							consts.TxShareSize,
+						)...,
+					),
+					ID: reservedTxNamespaceID,
 				},
 			},
 		},
 		{"ll-app message",
 			args{
-				data:      Messages{[]Message{msg1}},
-				shareSize: consts.ShareSize,
+				data: Messages{[]Message{msg1}},
 			},
 			NamespacedShares{
-				NamespacedShare{zeroPadIfNecessary(msg1Marshaled, consts.ShareSize), msg1.NamespaceID},
+				NamespacedShare{
+					Share: append(
+						[]byte(msg1.NamespaceID),
+						zeroPadIfNecessary(msg1Marshaled, consts.MsgShareSize)...,
+					),
+					ID: msg1.NamespaceID,
+				},
 			},
 		},
 	}
@@ -106,8 +145,9 @@ func TestMakeShares(t *testing.T) {
 		tt := tt // stupid scopelint :-/
 		i := i
 		t.Run(tt.name, func(t *testing.T) {
-			if got := tt.args.data.splitIntoShares(tt.args.shareSize); !reflect.DeepEqual(got, tt.want) {
-				t.Errorf("%v: makeShares() = \n%v\nwant\n%v", i, got, tt.want)
+			got := tt.args.data.splitIntoShares()
+			if !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("%v: makeShares() = \n%+v\nwant\n%+v\n", i, got, tt.want)
 			}
 		})
 	}
diff --git a/types/tx.go b/types/tx.go
index 1cfdcf5880..5abae88832 100644
--- a/types/tx.go
+++ b/types/tx.go
@@ -80,14 +80,14 @@ func (txs Txs) Proof(i int) TxProof {
 	}
 }
 
-func (txs Txs) splitIntoShares(shareSize int) NamespacedShares {
+func (txs Txs) splitIntoShares() NamespacedShares {
 	shares := make([]NamespacedShare, 0)
 	for _, tx := range txs {
 		rawData, err := tx.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("included Tx in mem-pool that can not be encoded %v", tx))
 		}
-		shares = appendToShares(shares, consts.TxNamespaceID, rawData, shareSize)
+		shares = appendToShares(shares, consts.TxNamespaceID, rawData)
 	}
 	return shares
 }

From eeba8088e8f5a8354b9edaec6b1f8c7141f07897 Mon Sep 17 00:00:00 2001
From: Evan Forbes <42654277+evan-forbes@users.noreply.github.com>
Date: Fri, 26 Mar 2021 12:48:48 -0500
Subject: [PATCH 6/7] fix overwrite bug (#251)

* fix overwrite bug and stop splitting shares of size MsgShareSize

* remove ineffectual code

* review feedback: better docs

Co-authored-by: Ismail Khoffi <Ismail.Khoffi@gmail.com>

* remove uneeded copy and only fix the source of the bug

Co-authored-by: Ismail Khoffi <Ismail.Khoffi@gmail.com>

* fix overwrite bug while also being consistent with using NamespacedShares

* update to the latest rsmt2d for the nmt wrapper

Co-authored-by: Ismail Khoffi <Ismail.Khoffi@gmail.com>
---
 types/shares.go      |  8 ++++---
 types/shares_test.go | 53 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/types/shares.go b/types/shares.go
index 55238a1db0..a96ff36969 100644
--- a/types/shares.go
+++ b/types/shares.go
@@ -62,12 +62,12 @@ func (m Message) MarshalDelimited() ([]byte, error) {
 // appendToShares appends raw data as shares.
 // Used for messages.
 func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare {
-	if len(rawData) < consts.MsgShareSize {
+	if len(rawData) <= consts.MsgShareSize {
 		rawShare := []byte(append(nid, rawData...))
 		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
 		share := NamespacedShare{paddedShare, nid}
 		shares = append(shares, share)
-	} else { // len(rawData) >= MsgShareSize
+	} else { // len(rawData) > MsgShareSize
 		shares = append(shares, split(rawData, nid)...)
 	}
 	return shares
@@ -102,7 +102,9 @@ func split(rawData []byte, nid namespace.ID) []NamespacedShare {
 	rawData = rawData[consts.MsgShareSize:]
 	for len(rawData) > 0 {
 		shareSizeOrLen := min(consts.MsgShareSize, len(rawData))
-		rawShare := []byte(append(nid, rawData[:shareSizeOrLen]...))
+		rawShare := make([]byte, consts.NamespaceSize)
+		copy(rawShare, nid)
+		rawShare = append(rawShare, rawData[:shareSizeOrLen]...)
 		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
 		share := NamespacedShare{paddedShare, nid}
 		shares = append(shares, share)
diff --git a/types/shares_test.go b/types/shares_test.go
index ee18db6863..5fed4814ee 100644
--- a/types/shares_test.go
+++ b/types/shares_test.go
@@ -2,10 +2,13 @@ package types
 
 import (
 	"bytes"
+	"crypto/rand"
 	"reflect"
+	"sort"
 	"testing"
 
 	"github.com/celestiaorg/nmt/namespace"
+	"github.com/stretchr/testify/assert"
 	"github.com/tendermint/tendermint/internal/libs/protoio"
 	"github.com/tendermint/tendermint/pkg/consts"
 )
@@ -176,3 +179,53 @@ func Test_zeroPadIfNecessary(t *testing.T) {
 		})
 	}
 }
+
+func Test_appendToSharesOverwrite(t *testing.T) {
+	var shares NamespacedShares
+
+	// generate some arbitrary namespaced shares first share that must be split
+	newShare := generateRandomNamespacedShares(1, consts.MsgShareSize+1)[0]
+
+	// make a copy of the portion of the share to check if it's overwritten later
+	extraCopy := make([]byte, consts.MsgShareSize)
+	copy(extraCopy, newShare.Share[:consts.MsgShareSize])
+
+	// use appendToShares to add our new share
+	appendToShares(shares, newShare.ID, newShare.Share)
+
+	// check if the original share data has been overwritten.
+	assert.Equal(t, extraCopy, []byte(newShare.Share[:consts.MsgShareSize]))
+}
+
+func generateRandomNamespacedShares(count, leafSize int) []NamespacedShare {
+	shares := generateRandNamespacedRawData(count, consts.NamespaceSize, leafSize)
+	nsShares := make(NamespacedShares, count)
+	for i, s := range shares {
+		nsShares[i] = NamespacedShare{
+			Share: s[consts.NamespaceSize:],
+			ID:    s[:consts.NamespaceSize],
+		}
+	}
+	return nsShares
+}
+
+func generateRandNamespacedRawData(total, nidSize, leafSize int) [][]byte {
+	data := make([][]byte, total)
+	for i := 0; i < total; i++ {
+		nid := make([]byte, nidSize)
+		rand.Read(nid)
+		data[i] = nid
+	}
+	sortByteArrays(data)
+	for i := 0; i < total; i++ {
+		d := make([]byte, leafSize)
+		rand.Read(d)
+		data[i] = append(data[i], d...)
+	}
+
+	return data
+}
+
+func sortByteArrays(src [][]byte) {
+	sort.Slice(src, func(i, j int) bool { return bytes.Compare(src[i], src[j]) < 0 })
+}

From 66f720dffe0adc9b40a6fa52f4649bb23303a9ff Mon Sep 17 00:00:00 2001
From: Evan Forbes <42654277+evan-forbes@users.noreply.github.com>
Date: Mon, 5 Apr 2021 22:51:00 -0500
Subject: [PATCH 7/7] Spec compliant merge shares (#261)

* start spec compliant share merging

* refactor and finish unit testing

* whoops

* linter gods

* fix initial changes and use constants

* use constant

* more polish

* docs fix* review feedback: docs and out of range panic protection

* review feedback: add panic protection from empty input

* use constant instead of recalculating `ShareSize`* don't redeclare existing var* be more explicit with returned nil* use constant instead of recalculating `ShareSize`* review feedback: use consistent capitalization

* stop accepting reserved namespaces as normal messages

* use a descriptive var name for message length

* linter and comparison fix

* reorg tests, add test for parse delimiter, DataFromBlock and fix evidence marshal bug

* catch error for linter

* update test MakeShares to include length delimiters for the SHARE_RESERVED_BYTE

* minor iteration change

* refactor share splitting to fix bug

* fix all bugs with third and final refactor

* fix conflict

* revert unnecessary changes

* review feedback: better docs* reivew feedback: add comment for safeLen

* review feedback: remove unnecessay comments

* review feedback: split up share merging and splitting into their own files

* review feedback: more descriptive var names

* fix accidental change

* add some constant docs

* spelling error

Co-authored-by: Hlib Kanunnikov <hlibwondertab@gmail.com>
Co-authored-by: John Adler <adlerjohn@users.noreply.github.com>
Co-authored-by: Ismail Khoffi <Ismail.Khoffi@gmail.com>
---
 pkg/consts/consts.go     |   3 +
 types/block.go           | 100 +++++++---
 types/share_merging.go   | 333 ++++++++++++++++++++++++++++++++
 types/share_splitting.go | 148 +++++++++++++++
 types/shares.go          | 122 ------------
 types/shares_test.go     | 397 +++++++++++++++++++++++++++++++++++----
 types/tx.go              |   9 +-
 7 files changed, 932 insertions(+), 180 deletions(-)
 create mode 100644 types/share_merging.go
 create mode 100644 types/share_splitting.go

diff --git a/pkg/consts/consts.go b/pkg/consts/consts.go
index 703fa0a8bf..c7d9025fb2 100644
--- a/pkg/consts/consts.go
+++ b/pkg/consts/consts.go
@@ -4,6 +4,7 @@ import (
 	"crypto/sha256"
 
 	"github.com/celestiaorg/nmt/namespace"
+	"github.com/celestiaorg/rsmt2d"
 )
 
 // This contains all constants of:
@@ -61,4 +62,6 @@ var (
 
 	// NewBaseHashFunc change accordingly if another hash.Hash should be used as a base hasher in the NMT:
 	NewBaseHashFunc = sha256.New
+
+	DefaultCodec = rsmt2d.NewRSGF8Codec
 )
diff --git a/types/block.go b/types/block.go
index f961fa34e2..7bfa26ee1a 100644
--- a/types/block.go
+++ b/types/block.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"errors"
 	"fmt"
+	"math"
 	"strings"
 	"time"
 
@@ -1112,6 +1113,69 @@ func (data *Data) Hash() tmbytes.HexBytes {
 	return data.hash
 }
 
+// ComputeShares splits block data into shares of an original data square and
+// returns them along with an amount of non-redundant shares. The shares
+// returned are padded to complete a square size that is a power of two
+func (data *Data) ComputeShares() (NamespacedShares, int) {
+	// TODO(ismail): splitting into shares should depend on the block size and layout
+	// see: https://github.com/celestiaorg/celestia-specs/blob/master/specs/block_proposer.md#laying-out-transactions-and-messages
+
+	// reserved shares:
+	txShares := data.Txs.SplitIntoShares()
+	intermRootsShares := data.IntermediateStateRoots.SplitIntoShares()
+	evidenceShares := data.Evidence.SplitIntoShares()
+
+	// application data shares from messages:
+	msgShares := data.Messages.SplitIntoShares()
+	curLen := len(txShares) + len(intermRootsShares) + len(evidenceShares) + len(msgShares)
+
+	// find the number of shares needed to create a square that has a power of
+	// two width
+	wantLen := paddedLen(curLen)
+
+	// ensure that the min square size is used
+	if wantLen < consts.MinSharecount {
+		wantLen = consts.MinSharecount
+	}
+
+	tailShares := TailPaddingShares(wantLen - curLen)
+
+	return append(append(append(append(
+		txShares,
+		intermRootsShares...),
+		evidenceShares...),
+		msgShares...),
+		tailShares...), curLen
+}
+
+// paddedLen calculates the number of shares needed to make a power of 2 square
+// given the current number of shares
+func paddedLen(length int) int {
+	width := uint32(math.Ceil(math.Sqrt(float64(length))))
+	width = nextHighestPowerOf2(width)
+	return int(width * width)
+}
+
+// nextPowerOf2 returns the next highest power of 2 unless the input is a power
+// of two, in which case it returns the input
+func nextHighestPowerOf2(v uint32) uint32 {
+	if v == 0 {
+		return 0
+	}
+
+	// find the next highest power using bit mashing
+	v--
+	v |= v >> 1
+	v |= v >> 2
+	v |= v >> 4
+	v |= v >> 8
+	v |= v >> 16
+	v++
+
+	// return the next highest power
+	return v
+}
+
 type Messages struct {
 	MessagesList []Message `json:"msgs"`
 }
@@ -1120,19 +1184,20 @@ type IntermediateStateRoots struct {
 	RawRootsList []tmbytes.HexBytes `json:"intermediate_roots"`
 }
 
-func (roots IntermediateStateRoots) splitIntoShares() NamespacedShares {
-	shares := make([]NamespacedShare, 0)
+func (roots IntermediateStateRoots) SplitIntoShares() NamespacedShares {
+	rawDatas := make([][]byte, 0, len(roots.RawRootsList))
 	for _, root := range roots.RawRootsList {
 		rawData, err := root.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("app returned intermediate state root that can not be encoded %#v", root))
 		}
-		shares = appendToShares(shares, consts.IntermediateStateRootsNamespaceID, rawData)
+		rawDatas = append(rawDatas, rawData)
 	}
+	shares := splitContiguous(consts.IntermediateStateRootsNamespaceID, rawDatas)
 	return shares
 }
 
-func (msgs Messages) splitIntoShares() NamespacedShares {
+func (msgs Messages) SplitIntoShares() NamespacedShares {
 	shares := make([]NamespacedShare, 0)
 	for _, m := range msgs.MessagesList {
 		rawData, err := m.MarshalDelimited()
@@ -1346,29 +1411,20 @@ func (data *EvidenceData) FromProto(eviData *tmproto.EvidenceList) error {
 	return nil
 }
 
-func (data *EvidenceData) splitIntoShares() NamespacedShares {
-	shares := make([]NamespacedShare, 0)
+func (data *EvidenceData) SplitIntoShares() NamespacedShares {
+	rawDatas := make([][]byte, 0, len(data.Evidence))
 	for _, ev := range data.Evidence {
-		var rawData []byte
-		var err error
-		switch cev := ev.(type) {
-		case *DuplicateVoteEvidence:
-			rawData, err = protoio.MarshalDelimited(cev.ToProto())
-		case *LightClientAttackEvidence:
-			pcev, iErr := cev.ToProto()
-			if iErr != nil {
-				err = iErr
-				break
-			}
-			rawData, err = protoio.MarshalDelimited(pcev)
-		default:
-			panic(fmt.Sprintf("unknown evidence included in evidence pool (don't know how to encode this) %#v", ev))
+		pev, err := EvidenceToProto(ev)
+		if err != nil {
+			panic("failure to convert evidence to equivalent proto type")
 		}
+		rawData, err := protoio.MarshalDelimited(pev)
 		if err != nil {
-			panic(fmt.Sprintf("evidence included in evidence pool that can not be encoded %#v, err: %v", ev, err))
+			panic(err)
 		}
-		shares = appendToShares(shares, consts.EvidenceNamespaceID, rawData)
+		rawDatas = append(rawDatas, rawData)
 	}
+	shares := splitContiguous(consts.EvidenceNamespaceID, rawDatas)
 	return shares
 }
 
diff --git a/types/share_merging.go b/types/share_merging.go
new file mode 100644
index 0000000000..f54bbd32a9
--- /dev/null
+++ b/types/share_merging.go
@@ -0,0 +1,333 @@
+package types
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+
+	"github.com/celestiaorg/rsmt2d"
+	"github.com/gogo/protobuf/proto"
+	tmbytes "github.com/tendermint/tendermint/libs/bytes"
+	"github.com/tendermint/tendermint/pkg/consts"
+	tmproto "github.com/tendermint/tendermint/proto/tendermint/types"
+)
+
+// DataFromSquare extracts block data from an extended data square.
+func DataFromSquare(eds *rsmt2d.ExtendedDataSquare) (Data, error) {
+	originalWidth := eds.Width() / 2
+
+	// sort block data shares by namespace
+	var (
+		sortedTxShares  [][]byte
+		sortedISRShares [][]byte
+		sortedEvdShares [][]byte
+		sortedMsgShares [][]byte
+	)
+
+	// iterate over each row index
+	for x := uint(0); x < originalWidth; x++ {
+		// iterate over each share in the original data square
+		row := eds.Row(x)
+
+		for _, share := range row[:originalWidth] {
+			// sort the data of that share types via namespace
+			nid := share[:consts.NamespaceSize]
+			switch {
+			case bytes.Equal(consts.TxNamespaceID, nid):
+				sortedTxShares = append(sortedTxShares, share)
+
+			case bytes.Equal(consts.IntermediateStateRootsNamespaceID, nid):
+				sortedISRShares = append(sortedISRShares, share)
+
+			case bytes.Equal(consts.EvidenceNamespaceID, nid):
+				sortedEvdShares = append(sortedEvdShares, share)
+
+			case bytes.Equal(consts.TailPaddingNamespaceID, nid):
+				continue
+
+			// ignore unused but reserved namespaces
+			case bytes.Compare(nid, consts.MaxReservedNamespace) < 1:
+				continue
+
+			// every other namespaceID should be a message
+			default:
+				sortedMsgShares = append(sortedMsgShares, share)
+			}
+		}
+	}
+
+	// pass the raw share data to their respective parsers
+	txs, err := parseTxs(sortedTxShares)
+	if err != nil {
+		return Data{}, err
+	}
+
+	isrs, err := parseISRs(sortedISRShares)
+	if err != nil {
+		return Data{}, err
+	}
+
+	evd, err := parseEvd(sortedEvdShares)
+	if err != nil {
+		return Data{}, err
+	}
+
+	msgs, err := parseMsgs(sortedMsgShares)
+	if err != nil {
+		return Data{}, err
+	}
+
+	return Data{
+		Txs:                    txs,
+		IntermediateStateRoots: isrs,
+		Evidence:               evd,
+		Messages:               msgs,
+	}, nil
+}
+
+// parseTxs collects all of the transactions from the shares provided
+func parseTxs(shares [][]byte) (Txs, error) {
+	// parse the sharse
+	rawTxs, err := processContiguousShares(shares)
+	if err != nil {
+		return nil, err
+	}
+
+	// convert to the Tx type
+	txs := make(Txs, len(rawTxs))
+	for i := 0; i < len(txs); i++ {
+		txs[i] = Tx(rawTxs[i])
+	}
+
+	return txs, nil
+}
+
+// parseISRs collects all the intermediate state roots from the shares provided
+func parseISRs(shares [][]byte) (IntermediateStateRoots, error) {
+	rawISRs, err := processContiguousShares(shares)
+	if err != nil {
+		return IntermediateStateRoots{}, err
+	}
+
+	ISRs := make([]tmbytes.HexBytes, len(rawISRs))
+	for i := 0; i < len(ISRs); i++ {
+		ISRs[i] = rawISRs[i]
+	}
+
+	return IntermediateStateRoots{RawRootsList: ISRs}, nil
+}
+
+// parseEvd collects all evidence from the shares provided.
+func parseEvd(shares [][]byte) (EvidenceData, error) {
+	// the raw data returned does not have length delimiters or namespaces and
+	// is ready to be unmarshaled
+	rawEvd, err := processContiguousShares(shares)
+	if err != nil {
+		return EvidenceData{}, err
+	}
+
+	evdList := make(EvidenceList, len(rawEvd))
+
+	// parse into protobuf bytes
+	for i := 0; i < len(rawEvd); i++ {
+		// unmarshal the evidence
+		var protoEvd tmproto.Evidence
+		err := proto.Unmarshal(rawEvd[i], &protoEvd)
+		if err != nil {
+			return EvidenceData{}, err
+		}
+		evd, err := EvidenceFromProto(&protoEvd)
+		if err != nil {
+			return EvidenceData{}, err
+		}
+
+		evdList[i] = evd
+	}
+
+	return EvidenceData{Evidence: evdList}, nil
+}
+
+// parseMsgs collects all messages from the shares provided
+func parseMsgs(shares [][]byte) (Messages, error) {
+	msgList, err := parseMsgShares(shares)
+	if err != nil {
+		return Messages{}, err
+	}
+
+	return Messages{
+		MessagesList: msgList,
+	}, nil
+}
+
+// processContiguousShares takes raw shares and extracts out transactions,
+// intermediate state roots, or evidence. The returned [][]byte do have
+// namespaces or length delimiters and are ready to be unmarshalled
+func processContiguousShares(shares [][]byte) (txs [][]byte, err error) {
+	if len(shares) == 0 {
+		return nil, nil
+	}
+
+	ss := newShareStack(shares)
+	return ss.resolve()
+}
+
+// shareStack hold variables for peel
+type shareStack struct {
+	shares [][]byte
+	txLen  uint64
+	txs    [][]byte
+	cursor int
+}
+
+func newShareStack(shares [][]byte) *shareStack {
+	return &shareStack{shares: shares}
+}
+
+func (ss *shareStack) resolve() ([][]byte, error) {
+	if len(ss.shares) == 0 {
+		return nil, nil
+	}
+	err := ss.peel(ss.shares[0][consts.NamespaceSize+consts.ShareReservedBytes:], true)
+	return ss.txs, err
+}
+
+// peel recursively parses each chunk of data (either a transaction,
+// intermediate state root, or evidence) and adds it to the underlying slice of data.
+func (ss *shareStack) peel(share []byte, delimited bool) (err error) {
+	if delimited {
+		var txLen uint64
+		share, txLen, err = parseDelimiter(share)
+		if err != nil {
+			return err
+		}
+		if txLen == 0 {
+			return nil
+		}
+		ss.txLen = txLen
+	}
+	// safeLen describes the point in the share where it can be safely split. If
+	// split beyond this point, it is possible to break apart a length
+	// delimiter, which will result in incorrect share merging
+	safeLen := len(share) - binary.MaxVarintLen64
+	if safeLen < 0 {
+		safeLen = 0
+	}
+	if ss.txLen <= uint64(safeLen) {
+		ss.txs = append(ss.txs, share[:ss.txLen])
+		share = share[ss.txLen:]
+		return ss.peel(share, true)
+	}
+	// add the next share to the current share to continue merging if possible
+	if len(ss.shares) > ss.cursor+1 {
+		ss.cursor++
+		share := append(share, ss.shares[ss.cursor][consts.NamespaceSize+consts.ShareReservedBytes:]...)
+		return ss.peel(share, false)
+	}
+	// collect any remaining data
+	if ss.txLen <= uint64(len(share)) {
+		ss.txs = append(ss.txs, share[:ss.txLen])
+		share = share[ss.txLen:]
+		return ss.peel(share, true)
+	}
+	return errors.New("failure to parse block data: transaction length exceeded data length")
+}
+
+// parseMsgShares iterates through raw shares and separates the contiguous chunks
+// of data. It is only used for Messages, i.e. shares with a non-reserved namespace.
+func parseMsgShares(shares [][]byte) ([]Message, error) {
+	if len(shares) == 0 {
+		return nil, nil
+	}
+
+	// set the first nid and current share
+	nid := shares[0][:consts.NamespaceSize]
+	currentShare := shares[0][consts.NamespaceSize:]
+	// find and remove the msg len delimiter
+	currentShare, msgLen, err := parseDelimiter(currentShare)
+	if err != nil {
+		return nil, err
+	}
+
+	var msgs []Message
+	for cursor := uint64(0); cursor < uint64(len(shares)); {
+		var msg Message
+		currentShare, nid, cursor, msgLen, msg, err = nextMsg(
+			shares,
+			currentShare,
+			nid,
+			cursor,
+			msgLen,
+		)
+		if err != nil {
+			return nil, err
+		}
+		if msg.Data != nil {
+			msgs = append(msgs, msg)
+		}
+	}
+
+	return msgs, nil
+}
+
+func nextMsg(
+	shares [][]byte,
+	current,
+	nid []byte,
+	cursor,
+	msgLen uint64,
+) ([]byte, []byte, uint64, uint64, Message, error) {
+	switch {
+	// the message uses all of the current share data and at least some of the
+	// next share
+	case msgLen > uint64(len(current)):
+		// add the next share to the current one and try again
+		cursor++
+		current = append(current, shares[cursor][consts.NamespaceSize:]...)
+		return nextMsg(shares, current, nid, cursor, msgLen)
+
+	// the msg we're looking for is contained in the current share
+	case msgLen <= uint64(len(current)):
+		msg := Message{nid, current[:msgLen]}
+		cursor++
+
+		// call it a day if the work is done
+		if cursor >= uint64(len(shares)) {
+			return nil, nil, cursor, 0, msg, nil
+		}
+
+		nextNid := shares[cursor][:consts.NamespaceSize]
+		next, msgLen, err := parseDelimiter(shares[cursor][consts.NamespaceSize:])
+		return next, nextNid, cursor, msgLen, msg, err
+	}
+	// this code is unreachable but the compiler doesn't know that
+	return nil, nil, 0, 0, Message{}, nil
+}
+
+// parseDelimiter finds and returns the length delimiter of the message provided
+// while also removing the delimiter bytes from the input
+func parseDelimiter(input []byte) ([]byte, uint64, error) {
+	if len(input) == 0 {
+		return input, 0, nil
+	}
+
+	l := binary.MaxVarintLen64
+	if len(input) < binary.MaxVarintLen64 {
+		l = len(input)
+	}
+
+	delimiter := zeroPadIfNecessary(input[:l], binary.MaxVarintLen64)
+
+	// read the length of the message
+	r := bytes.NewBuffer(delimiter)
+	msgLen, err := binary.ReadUvarint(r)
+	if err != nil {
+		return nil, 0, err
+	}
+
+	// calculate the number of bytes used by the delimiter
+	lenBuf := make([]byte, binary.MaxVarintLen64)
+	n := binary.PutUvarint(lenBuf, msgLen)
+
+	// return the input without the length delimiter
+	return input[n:], msgLen, nil
+}
diff --git a/types/share_splitting.go b/types/share_splitting.go
new file mode 100644
index 0000000000..08c4aba511
--- /dev/null
+++ b/types/share_splitting.go
@@ -0,0 +1,148 @@
+package types
+
+import (
+	"bytes"
+
+	"github.com/celestiaorg/nmt/namespace"
+	"github.com/tendermint/tendermint/pkg/consts"
+)
+
+// appendToShares appends raw data as shares.
+// Used for messages.
+func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare {
+	if len(rawData) <= consts.MsgShareSize {
+		rawShare := append(append(
+			make([]byte, 0, len(nid)+len(rawData)),
+			nid...),
+			rawData...,
+		)
+		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
+		share := NamespacedShare{paddedShare, nid}
+		shares = append(shares, share)
+	} else { // len(rawData) > MsgShareSize
+		shares = append(shares, splitMessage(rawData, nid)...)
+	}
+	return shares
+}
+
+// splitMessage breaks the data in a message into the minimum number of
+// namespaced shares
+func splitMessage(rawData []byte, nid namespace.ID) []NamespacedShare {
+	shares := make([]NamespacedShare, 0)
+	firstRawShare := append(append(
+		make([]byte, 0, consts.ShareSize),
+		nid...),
+		rawData[:consts.MsgShareSize]...,
+	)
+	shares = append(shares, NamespacedShare{firstRawShare, nid})
+	rawData = rawData[consts.MsgShareSize:]
+	for len(rawData) > 0 {
+		shareSizeOrLen := min(consts.MsgShareSize, len(rawData))
+		rawShare := append(append(
+			make([]byte, 0, consts.ShareSize),
+			nid...),
+			rawData[:shareSizeOrLen]...,
+		)
+		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
+		share := NamespacedShare{paddedShare, nid}
+		shares = append(shares, share)
+		rawData = rawData[shareSizeOrLen:]
+	}
+	return shares
+}
+
+// splitContiguous splits multiple raw data contiguously as shares.
+// Used for transactions, intermediate state roots, and evidence.
+func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare {
+	shares := make([]NamespacedShare, 0)
+	// Index into the outer slice of rawDatas
+	outerIndex := 0
+	// Index into the inner slice of rawDatas
+	innerIndex := 0
+	for outerIndex < len(rawDatas) {
+		var rawData []byte
+		startIndex := 0
+		rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, consts.TxShareSize)
+		rawShare := append(append(append(
+			make([]byte, 0, len(nid)+1+len(rawData)),
+			nid...),
+			byte(startIndex)),
+			rawData...)
+		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
+		share := NamespacedShare{paddedShare, nid}
+		shares = append(shares, share)
+	}
+	return shares
+}
+
+// getNextChunk gets the next chunk for contiguous shares
+// Precondition: none of the slices in rawDatas is zero-length
+// This precondition should always hold at this point since zero-length txs are simply invalid.
+func getNextChunk(rawDatas [][]byte, outerIndex int, innerIndex int, width int) ([]byte, int, int, int) {
+	rawData := make([]byte, 0, width)
+	startIndex := 0
+	firstBytesToFetch := 0
+
+	curIndex := 0
+	for curIndex < width && outerIndex < len(rawDatas) {
+		bytesToFetch := min(len(rawDatas[outerIndex])-innerIndex, width-curIndex)
+		if bytesToFetch == 0 {
+			panic("zero-length contiguous share data is invalid")
+		}
+		if curIndex == 0 {
+			firstBytesToFetch = bytesToFetch
+		}
+		// If we've already placed some data in this chunk, that means
+		// a new data segment begins
+		if curIndex != 0 {
+			// Offset by the fixed reserved bytes at the beginning of the share
+			startIndex = firstBytesToFetch + consts.NamespaceSize + consts.ShareReservedBytes
+		}
+		rawData = append(rawData, rawDatas[outerIndex][innerIndex:innerIndex+bytesToFetch]...)
+		innerIndex += bytesToFetch
+		if innerIndex >= len(rawDatas[outerIndex]) {
+			innerIndex = 0
+			outerIndex++
+		}
+		curIndex += bytesToFetch
+	}
+
+	return rawData, outerIndex, innerIndex, startIndex
+}
+
+// tail is filler for all tail padded shares
+// it is allocated once and used everywhere
+var tailPaddingShare = append(
+	append(make([]byte, 0, consts.ShareSize), consts.TailPaddingNamespaceID...),
+	bytes.Repeat([]byte{0}, consts.ShareSize-consts.NamespaceSize)...,
+)
+
+func TailPaddingShares(n int) NamespacedShares {
+	shares := make([]NamespacedShare, n)
+	for i := 0; i < n; i++ {
+		shares[i] = NamespacedShare{
+			Share: tailPaddingShare,
+			ID:    consts.TailPaddingNamespaceID,
+		}
+	}
+	return shares
+}
+
+func min(a, b int) int {
+	if a <= b {
+		return a
+	}
+	return b
+}
+
+func zeroPadIfNecessary(share []byte, width int) []byte {
+	oldLen := len(share)
+	if oldLen < width {
+		missingBytes := width - oldLen
+		padByte := []byte{0}
+		padding := bytes.Repeat(padByte, missingBytes)
+		share = append(share, padding...)
+		return share
+	}
+	return share
+}
diff --git a/types/shares.go b/types/shares.go
index a96ff36969..30a191183d 100644
--- a/types/shares.go
+++ b/types/shares.go
@@ -1,19 +1,15 @@
 package types
 
 import (
-	"bytes"
 	"encoding/binary"
 
 	"github.com/celestiaorg/nmt/namespace"
-	"github.com/tendermint/tendermint/pkg/consts"
 )
 
 // Share contains the raw share data without the corresponding namespace.
 type Share []byte
 
 // NamespacedShare extends a Share with the corresponding namespace.
-// It implements the namespace.Data interface and hence can be used
-// for pushing the shares to the namespaced Merkle tree.
 type NamespacedShare struct {
 	Share
 	ID namespace.ID
@@ -45,7 +41,6 @@ func (tx Tx) MarshalDelimited() ([]byte, error) {
 	lenBuf := make([]byte, binary.MaxVarintLen64)
 	length := uint64(len(tx))
 	n := binary.PutUvarint(lenBuf, length)
-
 	return append(lenBuf[:n], tx...), nil
 }
 
@@ -55,122 +50,5 @@ func (m Message) MarshalDelimited() ([]byte, error) {
 	lenBuf := make([]byte, binary.MaxVarintLen64)
 	length := uint64(len(m.Data))
 	n := binary.PutUvarint(lenBuf, length)
-
 	return append(lenBuf[:n], m.Data...), nil
 }
-
-// appendToShares appends raw data as shares.
-// Used for messages.
-func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare {
-	if len(rawData) <= consts.MsgShareSize {
-		rawShare := []byte(append(nid, rawData...))
-		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
-		share := NamespacedShare{paddedShare, nid}
-		shares = append(shares, share)
-	} else { // len(rawData) > MsgShareSize
-		shares = append(shares, split(rawData, nid)...)
-	}
-	return shares
-}
-
-// splitContiguous splits multiple raw data contiguously as shares.
-// Used for transactions, intermediate state roots, and evidence.
-func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare {
-	shares := make([]NamespacedShare, 0)
-	// Index into the outer slice of rawDatas
-	outerIndex := 0
-	// Index into the inner slice of rawDatas
-	innerIndex := 0
-	for outerIndex < len(rawDatas) {
-		var rawData []byte
-		startIndex := 0
-		rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, consts.TxShareSize)
-		rawShare := []byte(append(append(nid, byte(startIndex)), rawData...))
-		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
-		share := NamespacedShare{paddedShare, nid}
-		shares = append(shares, share)
-	}
-	return shares
-}
-
-// TODO(ismail): implement corresponding merge method for clients requesting
-// shares for a particular namespace
-func split(rawData []byte, nid namespace.ID) []NamespacedShare {
-	shares := make([]NamespacedShare, 0)
-	firstRawShare := []byte(append(nid, rawData[:consts.MsgShareSize]...))
-	shares = append(shares, NamespacedShare{firstRawShare, nid})
-	rawData = rawData[consts.MsgShareSize:]
-	for len(rawData) > 0 {
-		shareSizeOrLen := min(consts.MsgShareSize, len(rawData))
-		rawShare := make([]byte, consts.NamespaceSize)
-		copy(rawShare, nid)
-		rawShare = append(rawShare, rawData[:shareSizeOrLen]...)
-		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
-		share := NamespacedShare{paddedShare, nid}
-		shares = append(shares, share)
-		rawData = rawData[shareSizeOrLen:]
-	}
-	return shares
-}
-
-// getNextChunk gets the next chunk for contiguous shares
-// Precondition: none of the slices in rawDatas is zero-length
-// This precondition should always hold at this point since zero-length txs are simply invalid.
-func getNextChunk(rawDatas [][]byte, outerIndex int, innerIndex int, width int) ([]byte, int, int, int) {
-	rawData := make([]byte, 0, width)
-	startIndex := 0
-	firstBytesToFetch := 0
-
-	curIndex := 0
-	for curIndex < width && outerIndex < len(rawDatas) {
-		bytesToFetch := min(len(rawDatas[outerIndex])-innerIndex, width-curIndex)
-		if bytesToFetch == 0 {
-			panic("zero-length contiguous share data is invalid")
-		}
-		if curIndex == 0 {
-			firstBytesToFetch = bytesToFetch
-		}
-		// If we've already placed some data in this chunk, that means
-		// a new data segment begins
-		if curIndex != 0 {
-			// Offset by the fixed reserved bytes at the beginning of the share
-			startIndex = firstBytesToFetch + consts.NamespaceSize + consts.ShareReservedBytes
-		}
-		rawData = append(rawData, rawDatas[outerIndex][innerIndex:innerIndex+bytesToFetch]...)
-		innerIndex += bytesToFetch
-		if innerIndex >= len(rawDatas[outerIndex]) {
-			innerIndex = 0
-			outerIndex++
-		}
-		curIndex += bytesToFetch
-	}
-
-	return rawData, outerIndex, innerIndex, startIndex
-}
-
-func GenerateTailPaddingShares(n int, shareWidth int) NamespacedShares {
-	shares := make([]NamespacedShare, n)
-	for i := 0; i < n; i++ {
-		shares[i] = NamespacedShare{bytes.Repeat([]byte{0}, shareWidth), consts.TailPaddingNamespaceID}
-	}
-	return shares
-}
-
-func min(a, b int) int {
-	if a <= b {
-		return a
-	}
-	return b
-}
-
-func zeroPadIfNecessary(share []byte, width int) []byte {
-	oldLen := len(share)
-	if oldLen < width {
-		missingBytes := width - oldLen
-		padByte := []byte{0}
-		padding := bytes.Repeat(padByte, missingBytes)
-		share = append(share, padding...)
-		return share
-	}
-	return share
-}
diff --git a/types/shares_test.go b/types/shares_test.go
index 5fed4814ee..ddf7c29b07 100644
--- a/types/shares_test.go
+++ b/types/shares_test.go
@@ -2,19 +2,25 @@ package types
 
 import (
 	"bytes"
-	"crypto/rand"
+	"context"
+	"fmt"
+	"math"
+	"math/rand"
 	"reflect"
 	"sort"
 	"testing"
+	"time"
 
 	"github.com/celestiaorg/nmt/namespace"
+	"github.com/celestiaorg/rsmt2d"
 	"github.com/stretchr/testify/assert"
 	"github.com/tendermint/tendermint/internal/libs/protoio"
+	tmbytes "github.com/tendermint/tendermint/libs/bytes"
 	"github.com/tendermint/tendermint/pkg/consts"
 )
 
-type splitter interface {
-	splitIntoShares() NamespacedShares
+type Splitter interface {
+	SplitIntoShares() NamespacedShares
 }
 
 func TestMakeShares(t *testing.T) {
@@ -29,7 +35,11 @@ func TestMakeShares(t *testing.T) {
 		VoteA: vote1,
 		VoteB: vote2,
 	}
-	testEvidenceBytes, err := protoio.MarshalDelimited(testEvidence.ToProto())
+	protoTestEvidence, err := EvidenceToProto(testEvidence)
+	if err != nil {
+		t.Error(err)
+	}
+	testEvidenceBytes, err := protoio.MarshalDelimited(protoTestEvidence)
 	largeTx := Tx(bytes.Repeat([]byte("large Tx"), 50))
 	largeTxLenDelimited, _ := largeTx.MarshalDelimited()
 	smolTx := Tx("small Tx")
@@ -44,31 +54,36 @@ func TestMakeShares(t *testing.T) {
 	}
 
 	type args struct {
-		data splitter
+		data Splitter
 	}
 	tests := []struct {
 		name string
 		args args
 		want NamespacedShares
 	}{
-		{"evidence",
-			args{
+		{
+			name: "evidence",
+			args: args{
 				data: &EvidenceData{
 					Evidence: []Evidence{testEvidence},
 				},
-			}, NamespacedShares{NamespacedShare{
-				Share: append(
-					append(reservedEvidenceNamespaceID, byte(0)),
-					testEvidenceBytes[:consts.TxShareSize]...,
-				),
-				ID: reservedEvidenceNamespaceID,
-			}, NamespacedShare{
-				Share: append(
-					append(reservedEvidenceNamespaceID, byte(0)),
-					zeroPadIfNecessary(testEvidenceBytes[consts.TxShareSize:], consts.TxShareSize)...,
-				),
-				ID: reservedEvidenceNamespaceID,
-			}},
+			},
+			want: NamespacedShares{
+				NamespacedShare{
+					Share: append(
+						append(reservedEvidenceNamespaceID, byte(0)),
+						testEvidenceBytes[:consts.TxShareSize]...,
+					),
+					ID: reservedEvidenceNamespaceID,
+				},
+				NamespacedShare{
+					Share: append(
+						append(reservedEvidenceNamespaceID, byte(0)),
+						zeroPadIfNecessary(testEvidenceBytes[consts.TxShareSize:], consts.TxShareSize)...,
+					),
+					ID: reservedEvidenceNamespaceID,
+				},
+			},
 		},
 		{"small LL Tx",
 			args{
@@ -119,7 +134,10 @@ func TestMakeShares(t *testing.T) {
 				},
 				NamespacedShare{
 					Share: append(
-						append(reservedTxNamespaceID, byte(len(largeTxLenDelimited)-consts.TxShareSize+consts.NamespaceSize+consts.ShareReservedBytes)),
+						append(
+							reservedTxNamespaceID,
+							byte(len(largeTxLenDelimited)-consts.TxShareSize+consts.NamespaceSize+consts.ShareReservedBytes),
+						),
 						zeroPadIfNecessary(
 							append(largeTxLenDelimited[consts.TxShareSize:], smolTxLenDelimited...),
 							consts.TxShareSize,
@@ -148,7 +166,7 @@ func TestMakeShares(t *testing.T) {
 		tt := tt // stupid scopelint :-/
 		i := i
 		t.Run(tt.name, func(t *testing.T) {
-			got := tt.args.data.splitIntoShares()
+			got := tt.args.data.SplitIntoShares()
 			if !reflect.DeepEqual(got, tt.want) {
 				t.Errorf("%v: makeShares() = \n%+v\nwant\n%+v\n", i, got, tt.want)
 			}
@@ -197,27 +215,342 @@ func Test_appendToSharesOverwrite(t *testing.T) {
 	assert.Equal(t, extraCopy, []byte(newShare.Share[:consts.MsgShareSize]))
 }
 
-func generateRandomNamespacedShares(count, leafSize int) []NamespacedShare {
-	shares := generateRandNamespacedRawData(count, consts.NamespaceSize, leafSize)
-	nsShares := make(NamespacedShares, count)
+func TestDataFromSquare(t *testing.T) {
+	type test struct {
+		name     string
+		txCount  int
+		isrCount int
+		evdCount int
+		msgCount int
+		maxSize  int // max size of each tx or msg
+	}
+
+	tests := []test{
+		{"one of each random small size", 1, 1, 1, 1, 40},
+		{"one of each random large size", 1, 1, 1, 1, 400},
+		{"many of each random large size", 10, 10, 10, 10, 40},
+		{"many of each random large size", 10, 10, 10, 10, 400},
+		{"only transactions", 10, 0, 0, 0, 400},
+		{"only intermediate state roots", 0, 10, 0, 0, 400},
+		{"only evidence", 0, 0, 10, 0, 400},
+		{"only messages", 0, 0, 0, 10, 400},
+	}
+
+	for _, tc := range tests {
+		tc := tc
+
+		t.Run(tc.name, func(t *testing.T) {
+			// generate random data
+			data := generateRandomBlockData(
+				tc.txCount,
+				tc.isrCount,
+				tc.evdCount,
+				tc.msgCount,
+				tc.maxSize,
+			)
+
+			shares, _ := data.ComputeShares()
+			rawShares := shares.RawShares()
+
+			eds, err := rsmt2d.ComputeExtendedDataSquare(rawShares, rsmt2d.NewRSGF8Codec(), rsmt2d.NewDefaultTree)
+			if err != nil {
+				t.Error(err)
+			}
+
+			res, err := DataFromSquare(eds)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			// we have to compare the evidence by string because the the
+			// timestamps differ not by actual time represented, but by
+			// internals see https://github.com/stretchr/testify/issues/666
+			for i := 0; i < len(data.Evidence.Evidence); i++ {
+				inputEvidence := data.Evidence.Evidence[i].(*DuplicateVoteEvidence)
+				resultEvidence := res.Evidence.Evidence[i].(*DuplicateVoteEvidence)
+				assert.Equal(t, inputEvidence.String(), resultEvidence.String())
+			}
+
+			// compare the original to the result w/o the evidence
+			data.Evidence = EvidenceData{}
+			res.Evidence = EvidenceData{}
+
+			assert.Equal(t, data, res)
+		})
+	}
+}
+
+func TestFuzz_DataFromSquare(t *testing.T) {
+	t.Skip()
+	// run random shares through processContiguousShares for a minute
+	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+	defer cancel()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		default:
+			TestDataFromSquare(t)
+		}
+	}
+}
+
+func Test_processContiguousShares(t *testing.T) {
+	// exactTxShareSize is the length of tx that will fit exactly into a single
+	// share, accounting for namespace id and the length delimiter prepended to
+	// each tx
+	const exactTxShareSize = consts.TxShareSize - 1
+
+	type test struct {
+		name    string
+		txSize  int
+		txCount int
+	}
+
+	// each test is ran twice, once using txSize as an exact size, and again
+	// using it as a cap for randomly sized txs
+	tests := []test{
+		{"single small tx", 10, 1},
+		{"many small txs", 10, 10},
+		{"single big tx", 1000, 1},
+		{"many big txs", 1000, 10},
+		{"single exact size tx", exactTxShareSize, 1},
+		{"many exact size txs", exactTxShareSize, 10},
+	}
+
+	for _, tc := range tests {
+		tc := tc
+
+		// run the tests with identically sized txs
+		t.Run(fmt.Sprintf("%s idendically sized ", tc.name), func(t *testing.T) {
+			txs := generateRandomContiguousShares(tc.txCount, tc.txSize)
+
+			shares := txs.SplitIntoShares()
+
+			parsedTxs, err := processContiguousShares(shares.RawShares())
+			if err != nil {
+				t.Error(err)
+			}
+
+			// check that the data parsed is identical
+			for i := 0; i < len(txs); i++ {
+				assert.Equal(t, []byte(txs[i]), parsedTxs[i])
+			}
+		})
+
+		// run the same tests using randomly sized txs with caps of tc.txSize
+		t.Run(fmt.Sprintf("%s randomly sized", tc.name), func(t *testing.T) {
+			txs := generateRandomlySizedContiguousShares(tc.txCount, tc.txSize)
+
+			shares := txs.SplitIntoShares()
+
+			parsedTxs, err := processContiguousShares(shares.RawShares())
+			if err != nil {
+				t.Error(err)
+			}
+
+			// check that the data parsed is identical to the original
+			for i := 0; i < len(txs); i++ {
+				assert.Equal(t, []byte(txs[i]), parsedTxs[i])
+			}
+		})
+	}
+}
+
+func TestFuzz_processContiguousShares(t *testing.T) {
+	t.Skip()
+	// run random shares through processContiguousShares for a minute
+	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+	defer cancel()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		default:
+			Test_processContiguousShares(t)
+		}
+	}
+}
+
+func Test_parseMsgShares(t *testing.T) {
+	// exactMsgShareSize is the length of message that will fit exactly into a single
+	// share, accounting for namespace id and the length delimiter prepended to
+	// each message
+	const exactMsgShareSize = consts.MsgShareSize - 2
+
+	type test struct {
+		name     string
+		msgSize  int
+		msgCount int
+	}
+
+	// each test is ran twice, once using msgSize as an exact size, and again
+	// using it as a cap for randomly sized leaves
+	tests := []test{
+		{"single small msg", 1, 1},
+		{"many small msgs", 4, 10},
+		{"single big msg", 1000, 1},
+		{"many big msgs", 1000, 10},
+		{"single exact size msg", exactMsgShareSize, 1},
+		{"many exact size msgs", exactMsgShareSize, 10},
+	}
+
+	for _, tc := range tests {
+		tc := tc
+
+		// run the tests with identically sized messagses
+		t.Run(fmt.Sprintf("%s idendically sized ", tc.name), func(t *testing.T) {
+			rawmsgs := make([]Message, tc.msgCount)
+			for i := 0; i < tc.msgCount; i++ {
+				rawmsgs[i] = generateRandomMessage(tc.msgSize)
+			}
+			msgs := Messages{MessagesList: rawmsgs}
+
+			shares := msgs.SplitIntoShares()
+
+			parsedMsgs, err := parseMsgShares(shares.RawShares())
+			if err != nil {
+				t.Error(err)
+			}
+
+			// check that the namesapces and data are the same
+			for i := 0; i < len(msgs.MessagesList); i++ {
+				assert.Equal(t, msgs.MessagesList[i].NamespaceID, parsedMsgs[i].NamespaceID)
+				assert.Equal(t, msgs.MessagesList[i].Data, parsedMsgs[i].Data)
+			}
+		})
+
+		// run the same tests using randomly sized messages with caps of tc.msgSize
+		t.Run(fmt.Sprintf("%s randomly sized", tc.name), func(t *testing.T) {
+			msgs := generateRandomlySizedMessages(tc.msgCount, tc.msgSize)
+			shares := msgs.SplitIntoShares()
+
+			parsedMsgs, err := parseMsgShares(shares.RawShares())
+			if err != nil {
+				t.Error(err)
+			}
+
+			// check that the namesapces and data are the same
+			for i := 0; i < len(msgs.MessagesList); i++ {
+				assert.Equal(t, msgs.MessagesList[i].NamespaceID, parsedMsgs[i].NamespaceID)
+				assert.Equal(t, msgs.MessagesList[i].Data, parsedMsgs[i].Data)
+			}
+		})
+	}
+}
+
+func Test_parseDelimiter(t *testing.T) {
+	for i := uint64(0); i < 100; i++ {
+		tx := generateRandomContiguousShares(1, int(i))[0]
+		input, err := tx.MarshalDelimited()
+		if err != nil {
+			panic(err)
+		}
+		res, txLen, err := parseDelimiter(input)
+		if err != nil {
+			panic(err)
+		}
+		assert.Equal(t, i, txLen)
+		assert.Equal(t, []byte(tx), res)
+	}
+}
+
+// generateRandomBlockData returns randomly generated block data for testing purposes
+func generateRandomBlockData(txCount, isrCount, evdCount, msgCount, maxSize int) Data {
+	var out Data
+	out.Txs = generateRandomlySizedContiguousShares(txCount, maxSize)
+	out.IntermediateStateRoots = generateRandomISR(isrCount)
+	out.Evidence = generateIdenticalEvidence(evdCount)
+	out.Messages = generateRandomlySizedMessages(msgCount, maxSize)
+	return out
+}
+
+func generateRandomlySizedContiguousShares(count, max int) Txs {
+	txs := make(Txs, count)
+	for i := 0; i < count; i++ {
+		size := rand.Intn(max)
+		if size == 0 {
+			size = 1
+		}
+		txs[i] = generateRandomContiguousShares(1, size)[0]
+	}
+	return txs
+}
+
+func generateRandomContiguousShares(count, size int) Txs {
+	txs := make(Txs, count)
+	for i := 0; i < count; i++ {
+		tx := make([]byte, size)
+		_, err := rand.Read(tx)
+		if err != nil {
+			panic(err)
+		}
+		txs[i] = tx
+	}
+	return txs
+}
+
+func generateRandomISR(count int) IntermediateStateRoots {
+	roots := make([]tmbytes.HexBytes, count)
+	for i := 0; i < count; i++ {
+		roots[i] = tmbytes.HexBytes(generateRandomContiguousShares(1, 32)[0])
+	}
+	return IntermediateStateRoots{RawRootsList: roots}
+}
+
+func generateIdenticalEvidence(count int) EvidenceData {
+	evidence := make([]Evidence, count)
+	for i := 0; i < count; i++ {
+		ev := NewMockDuplicateVoteEvidence(math.MaxInt64, time.Now(), "chainID")
+		evidence[i] = ev
+	}
+	return EvidenceData{Evidence: evidence}
+}
+
+func generateRandomlySizedMessages(count, maxMsgSize int) Messages {
+	msgs := make([]Message, count)
+	for i := 0; i < count; i++ {
+		msgs[i] = generateRandomMessage(rand.Intn(maxMsgSize))
+	}
+
+	// this is just to let us use assert.Equal
+	if count == 0 {
+		msgs = nil
+	}
+
+	return Messages{MessagesList: msgs}
+}
+
+func generateRandomMessage(size int) Message {
+	share := generateRandomNamespacedShares(1, size)[0]
+	msg := Message{
+		NamespaceID: share.NamespaceID(),
+		Data:        share.Data(),
+	}
+	return msg
+}
+
+func generateRandomNamespacedShares(count, msgSize int) NamespacedShares {
+	shares := generateRandNamespacedRawData(uint32(count), consts.NamespaceSize, uint32(msgSize))
+	msgs := make([]Message, count)
 	for i, s := range shares {
-		nsShares[i] = NamespacedShare{
-			Share: s[consts.NamespaceSize:],
-			ID:    s[:consts.NamespaceSize],
+		msgs[i] = Message{
+			Data:        s[consts.NamespaceSize:],
+			NamespaceID: s[:consts.NamespaceSize],
 		}
 	}
-	return nsShares
+	return Messages{MessagesList: msgs}.SplitIntoShares()
 }
 
-func generateRandNamespacedRawData(total, nidSize, leafSize int) [][]byte {
+func generateRandNamespacedRawData(total, nidSize, leafSize uint32) [][]byte {
 	data := make([][]byte, total)
-	for i := 0; i < total; i++ {
+	for i := uint32(0); i < total; i++ {
 		nid := make([]byte, nidSize)
 		rand.Read(nid)
 		data[i] = nid
 	}
 	sortByteArrays(data)
-	for i := 0; i < total; i++ {
+	for i := uint32(0); i < total; i++ {
 		d := make([]byte, leafSize)
 		rand.Read(d)
 		data[i] = append(data[i], d...)
diff --git a/types/tx.go b/types/tx.go
index 5abae88832..9eacd55b52 100644
--- a/types/tx.go
+++ b/types/tx.go
@@ -80,15 +80,16 @@ func (txs Txs) Proof(i int) TxProof {
 	}
 }
 
-func (txs Txs) splitIntoShares() NamespacedShares {
-	shares := make([]NamespacedShare, 0)
-	for _, tx := range txs {
+func (txs Txs) SplitIntoShares() NamespacedShares {
+	rawDatas := make([][]byte, len(txs))
+	for i, tx := range txs {
 		rawData, err := tx.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("included Tx in mem-pool that can not be encoded %v", tx))
 		}
-		shares = appendToShares(shares, consts.TxNamespaceID, rawData)
+		rawDatas[i] = rawData
 	}
+	shares := splitContiguous(consts.TxNamespaceID, rawDatas)
 	return shares
 }