From 0648f4157d3bce9c5a9c3e0793c32f12a4f4223f Mon Sep 17 00:00:00 2001 From: Ismail Khoffi Date: Wed, 4 Nov 2020 18:13:20 +0100 Subject: [PATCH 1/7] Basic DA functionality (#83) * move Messages field to the end of Block.Data * Add some constants for share computation and the NMT: - also a bunch of todos regarding shares computation * First (compiling) stab on creating shares * Test with Evidence and fix bug discovered by test * remove resolved todos * introduce split method * Introduce LenDelimitedMarshaler interface and some reformatting * Introduce TxLenDelimitedMarshaler * add some test cases * fix some comments * fix some comments & linter * Add reserved namespaces to params * Move ll-specific consts into a separate file (consts.go) * Add MarshalDelimited to HexBytes * Add tail-padding shares * Add ComputeShares method on Data to compute all shares * Fix compute the next square num and not the next power of two * lints * Unexport MakeShares function: - it's likely to change and it doesn't have to be part of the public API * lints 2 * First stab on computing row/column roots * fix rebase glitches: - move DA related constants out of params.go * refactor MakeBlock to take in interm. state roots and messages * refactor state.MakeBlock too * Add todos LenDelimitedMarshaler and extract appendShares logic * Simplify shares computation: remove LenDelimitedMarshaler abstraction * actually use DA header to compute the DataRoot everywhere (will lead to failing tests for sure) * WIP: Update block related core data structures in protobuf too * WIP: fix zero shares edge-case and get rid of Block.Data.hash (use dataAvailabilityHeader.Hash() instead) * Fixed tests, only 3 failing tests to go: TestReapMaxBytesMaxGas, TestTxFilter, TestMempoolFilters * Fix TestTxFilter: - the size of the wrapping Data{} proto message increased a few bytes * Fix Message proto and `DataFromProto` * Fix last 2 remaining tests related to the increased block/block.Data size * Use infectious lib instead of leopard * proto-lint: snake_case * some lints and minor changes * linter * panic if pushing to tree fails, extend Data.ToProto() * revert renaming in comment * add todo about refactoring as soon as the rsmt2d allows the user to choose the merkle tree --- internal/consensus/replay_test.go | 54 +++++++++++++++++++++++++++++++ state/helpers_test.go | 4 +++ state/tx_filter_test.go | 2 +- types/block_test.go | 12 +++---- 4 files changed, 65 insertions(+), 7 deletions(-) diff --git a/internal/consensus/replay_test.go b/internal/consensus/replay_test.go index 4d1c9c6b26..b8b1f74522 100644 --- a/internal/consensus/replay_test.go +++ b/internal/consensus/replay_test.go @@ -1001,6 +1001,60 @@ func TestHandshakePanicsIfAppReturnsWrongAppHash(t *testing.T) { } } +func makeBlocks(n int, state *sm.State, privVal types.PrivValidator) []*types.Block { + blocks := make([]*types.Block, 0) + + var ( + prevBlock *types.Block + prevBlockMeta *types.BlockMeta + ) + + appHeight := byte(0x01) + for i := 0; i < n; i++ { + height := int64(i + 1) + + block, parts := makeBlock(*state, prevBlock, prevBlockMeta, privVal, height) + blocks = append(blocks, block) + + prevBlock = block + prevBlockMeta = types.NewBlockMeta(block, parts) + + // update state + state.AppHash = []byte{appHeight} + appHeight++ + state.LastBlockHeight = height + } + + return blocks +} + +func makeBlock(state sm.State, lastBlock *types.Block, lastBlockMeta *types.BlockMeta, + privVal types.PrivValidator, height int64) (*types.Block, *types.PartSet) { + + lastCommit := types.NewCommit(height-1, 0, types.BlockID{}, nil) + if height > 1 { + vote, _ := types.MakeVote( + lastBlock.Header.Height, + lastBlockMeta.BlockID, + state.Validators, + privVal, + lastBlock.Header.ChainID, + time.Now()) + lastCommit = types.NewCommit(vote.Height, vote.Round, + lastBlockMeta.BlockID, []types.CommitSig{vote.CommitSig()}) + } + + return state.MakeBlock( + height, + []types.Tx{}, + nil, + nil, + nil, + lastCommit, + state.Validators.GetProposer().Address, + ) +} + type badApp struct { abci.BaseApplication numBlocks byte diff --git a/state/helpers_test.go b/state/helpers_test.go index d28982b2c0..92b19379ba 100644 --- a/state/helpers_test.go +++ b/state/helpers_test.go @@ -22,6 +22,10 @@ import ( "github.com/tendermint/tendermint/types" ) +const ( + nTxsPerBlock = 10 +) + type paramsChangeTestCase struct { height int64 params types.ConsensusParams diff --git a/state/tx_filter_test.go b/state/tx_filter_test.go index c79ee1241e..4092fcbc2c 100644 --- a/state/tx_filter_test.go +++ b/state/tx_filter_test.go @@ -13,7 +13,7 @@ import ( func TestTxFilter(t *testing.T) { genDoc := randomGenesisDoc() - genDoc.ConsensusParams.Block.MaxBytes = 3000 + genDoc.ConsensusParams.Block.MaxBytes = 3040 genDoc.ConsensusParams.Evidence.MaxBytes = 1500 // Max size of Txs is much smaller than size of block, diff --git a/types/block_test.go b/types/block_test.go index d02f374d71..371730b5d0 100644 --- a/types/block_test.go +++ b/types/block_test.go @@ -460,10 +460,10 @@ func TestBlockMaxDataBytes(t *testing.T) { 0: {-10, 1, 0, true, 0}, 1: {10, 1, 0, true, 0}, 2: {841, 1, 0, true, 0}, - 3: {842, 1, 0, false, 0}, - 4: {843, 1, 0, false, 1}, - 5: {954, 2, 0, false, 1}, - 6: {1053, 2, 100, false, 0}, + 3: {845, 1, 0, false, 0}, + 4: {846, 1, 0, false, 1}, + 5: {956, 2, 0, false, 1}, + 6: {1056, 2, 100, false, 0}, } for i, tc := range testCases { @@ -491,8 +491,8 @@ func TestBlockMaxDataBytesNoEvidence(t *testing.T) { 0: {-10, 1, true, 0}, 1: {10, 1, true, 0}, 2: {841, 1, true, 0}, - 3: {842, 1, false, 0}, - 4: {843, 1, false, 1}, + 3: {845, 1, false, 0}, + 4: {846, 1, false, 1}, } for i, tc := range testCases { From c093b422f5561e9dc78fe4a1de4d0829e1f65435 Mon Sep 17 00:00:00 2001 From: evan-forbes Date: Mon, 20 Sep 2021 18:26:35 -0500 Subject: [PATCH 2/7] clean up some unused test helper functions --- internal/consensus/replay_test.go | 54 ------------------------------- state/tx_filter_test.go | 2 +- 2 files changed, 1 insertion(+), 55 deletions(-) diff --git a/internal/consensus/replay_test.go b/internal/consensus/replay_test.go index b8b1f74522..4d1c9c6b26 100644 --- a/internal/consensus/replay_test.go +++ b/internal/consensus/replay_test.go @@ -1001,60 +1001,6 @@ func TestHandshakePanicsIfAppReturnsWrongAppHash(t *testing.T) { } } -func makeBlocks(n int, state *sm.State, privVal types.PrivValidator) []*types.Block { - blocks := make([]*types.Block, 0) - - var ( - prevBlock *types.Block - prevBlockMeta *types.BlockMeta - ) - - appHeight := byte(0x01) - for i := 0; i < n; i++ { - height := int64(i + 1) - - block, parts := makeBlock(*state, prevBlock, prevBlockMeta, privVal, height) - blocks = append(blocks, block) - - prevBlock = block - prevBlockMeta = types.NewBlockMeta(block, parts) - - // update state - state.AppHash = []byte{appHeight} - appHeight++ - state.LastBlockHeight = height - } - - return blocks -} - -func makeBlock(state sm.State, lastBlock *types.Block, lastBlockMeta *types.BlockMeta, - privVal types.PrivValidator, height int64) (*types.Block, *types.PartSet) { - - lastCommit := types.NewCommit(height-1, 0, types.BlockID{}, nil) - if height > 1 { - vote, _ := types.MakeVote( - lastBlock.Header.Height, - lastBlockMeta.BlockID, - state.Validators, - privVal, - lastBlock.Header.ChainID, - time.Now()) - lastCommit = types.NewCommit(vote.Height, vote.Round, - lastBlockMeta.BlockID, []types.CommitSig{vote.CommitSig()}) - } - - return state.MakeBlock( - height, - []types.Tx{}, - nil, - nil, - nil, - lastCommit, - state.Validators.GetProposer().Address, - ) -} - type badApp struct { abci.BaseApplication numBlocks byte diff --git a/state/tx_filter_test.go b/state/tx_filter_test.go index 4092fcbc2c..c79ee1241e 100644 --- a/state/tx_filter_test.go +++ b/state/tx_filter_test.go @@ -13,7 +13,7 @@ import ( func TestTxFilter(t *testing.T) { genDoc := randomGenesisDoc() - genDoc.ConsensusParams.Block.MaxBytes = 3040 + genDoc.ConsensusParams.Block.MaxBytes = 3000 genDoc.ConsensusParams.Evidence.MaxBytes = 1500 // Max size of Txs is much smaller than size of block, From a1e69d1e7aafa1c7223fd57de1f8c561738c2c71 Mon Sep 17 00:00:00 2001 From: evan-forbes Date: Mon, 20 Sep 2021 18:30:39 -0500 Subject: [PATCH 3/7] linter --- state/helpers_test.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/state/helpers_test.go b/state/helpers_test.go index 92b19379ba..d28982b2c0 100644 --- a/state/helpers_test.go +++ b/state/helpers_test.go @@ -22,10 +22,6 @@ import ( "github.com/tendermint/tendermint/types" ) -const ( - nTxsPerBlock = 10 -) - type paramsChangeTestCase struct { height int64 params types.ConsensusParams From cb58051a6bdb4f6866f8379ccf9c4cab7e78296f Mon Sep 17 00:00:00 2001 From: evan-forbes Date: Mon, 20 Sep 2021 18:40:57 -0500 Subject: [PATCH 4/7] still debugging the exact right number of bytes for max data... --- types/block_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/types/block_test.go b/types/block_test.go index 371730b5d0..d02f374d71 100644 --- a/types/block_test.go +++ b/types/block_test.go @@ -460,10 +460,10 @@ func TestBlockMaxDataBytes(t *testing.T) { 0: {-10, 1, 0, true, 0}, 1: {10, 1, 0, true, 0}, 2: {841, 1, 0, true, 0}, - 3: {845, 1, 0, false, 0}, - 4: {846, 1, 0, false, 1}, - 5: {956, 2, 0, false, 1}, - 6: {1056, 2, 100, false, 0}, + 3: {842, 1, 0, false, 0}, + 4: {843, 1, 0, false, 1}, + 5: {954, 2, 0, false, 1}, + 6: {1053, 2, 100, false, 0}, } for i, tc := range testCases { @@ -491,8 +491,8 @@ func TestBlockMaxDataBytesNoEvidence(t *testing.T) { 0: {-10, 1, true, 0}, 1: {10, 1, true, 0}, 2: {841, 1, true, 0}, - 3: {845, 1, false, 0}, - 4: {846, 1, false, 1}, + 3: {842, 1, false, 0}, + 4: {843, 1, false, 1}, } for i, tc := range testCases { From 801cfc8ec4bdcbf48759296da270838b01d25bd5 Mon Sep 17 00:00:00 2001 From: John Adler Date: Thu, 25 Mar 2021 11:49:22 -0400 Subject: [PATCH 5/7] Implement spec-compliant share splitting (#246) * Export block data compute shares. * Refactor to use ShareSize constant directly. * Change message splitting to prefix namespace ID. * Implement chunking for contiguous. * Add termination condition. * Rename append contiguous to split contiguous. * Update test for small tx. * Add test for two contiguous. * Make tx and msg adjusted share sizes exported constants. * Panic on hopefully-unreachable condition instead of silently skipping. * Update hardcoded response for block format. Co-authored-by: Ismail Khoffi --- types/block.go | 12 +++--- types/shares.go | 80 +++++++++++++++++++++++++++++++++------ types/shares_test.go | 90 ++++++++++++++++++++++++++++++++------------ types/tx.go | 4 +- 4 files changed, 142 insertions(+), 44 deletions(-) diff --git a/types/block.go b/types/block.go index 3736c3e1e6..f961fa34e2 100644 --- a/types/block.go +++ b/types/block.go @@ -1120,26 +1120,26 @@ type IntermediateStateRoots struct { RawRootsList []tmbytes.HexBytes `json:"intermediate_roots"` } -func (roots IntermediateStateRoots) splitIntoShares(shareSize int) NamespacedShares { +func (roots IntermediateStateRoots) splitIntoShares() NamespacedShares { shares := make([]NamespacedShare, 0) for _, root := range roots.RawRootsList { rawData, err := root.MarshalDelimited() if err != nil { panic(fmt.Sprintf("app returned intermediate state root that can not be encoded %#v", root)) } - shares = appendToShares(shares, consts.IntermediateStateRootsNamespaceID, rawData, shareSize) + shares = appendToShares(shares, consts.IntermediateStateRootsNamespaceID, rawData) } return shares } -func (msgs Messages) splitIntoShares(shareSize int) NamespacedShares { +func (msgs Messages) splitIntoShares() NamespacedShares { shares := make([]NamespacedShare, 0) for _, m := range msgs.MessagesList { rawData, err := m.MarshalDelimited() if err != nil { panic(fmt.Sprintf("app accepted a Message that can not be encoded %#v", m)) } - shares = appendToShares(shares, m.NamespaceID, rawData, shareSize) + shares = appendToShares(shares, m.NamespaceID, rawData) } return shares } @@ -1346,7 +1346,7 @@ func (data *EvidenceData) FromProto(eviData *tmproto.EvidenceList) error { return nil } -func (data *EvidenceData) splitIntoShares(shareSize int) NamespacedShares { +func (data *EvidenceData) splitIntoShares() NamespacedShares { shares := make([]NamespacedShare, 0) for _, ev := range data.Evidence { var rawData []byte @@ -1367,7 +1367,7 @@ func (data *EvidenceData) splitIntoShares(shareSize int) NamespacedShares { if err != nil { panic(fmt.Sprintf("evidence included in evidence pool that can not be encoded %#v, err: %v", ev, err)) } - shares = appendToShares(shares, consts.EvidenceNamespaceID, rawData, shareSize) + shares = appendToShares(shares, consts.EvidenceNamespaceID, rawData) } return shares } diff --git a/types/shares.go b/types/shares.go index a88677334a..55238a1db0 100644 --- a/types/shares.go +++ b/types/shares.go @@ -59,28 +59,51 @@ func (m Message) MarshalDelimited() ([]byte, error) { return append(lenBuf[:n], m.Data...), nil } -func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte, shareSize int) []NamespacedShare { - if len(rawData) < shareSize { - rawShare := rawData - paddedShare := zeroPadIfNecessary(rawShare, shareSize) +// appendToShares appends raw data as shares. +// Used for messages. +func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare { + if len(rawData) < consts.MsgShareSize { + rawShare := []byte(append(nid, rawData...)) + paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) + share := NamespacedShare{paddedShare, nid} + shares = append(shares, share) + } else { // len(rawData) >= MsgShareSize + shares = append(shares, split(rawData, nid)...) + } + return shares +} + +// splitContiguous splits multiple raw data contiguously as shares. +// Used for transactions, intermediate state roots, and evidence. +func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare { + shares := make([]NamespacedShare, 0) + // Index into the outer slice of rawDatas + outerIndex := 0 + // Index into the inner slice of rawDatas + innerIndex := 0 + for outerIndex < len(rawDatas) { + var rawData []byte + startIndex := 0 + rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, consts.TxShareSize) + rawShare := []byte(append(append(nid, byte(startIndex)), rawData...)) + paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) share := NamespacedShare{paddedShare, nid} shares = append(shares, share) - } else { // len(rawData) >= shareSize - shares = append(shares, split(rawData, shareSize, nid)...) } return shares } // TODO(ismail): implement corresponding merge method for clients requesting // shares for a particular namespace -func split(rawData []byte, shareSize int, nid namespace.ID) []NamespacedShare { +func split(rawData []byte, nid namespace.ID) []NamespacedShare { shares := make([]NamespacedShare, 0) - firstRawShare := rawData[:shareSize] + firstRawShare := []byte(append(nid, rawData[:consts.MsgShareSize]...)) shares = append(shares, NamespacedShare{firstRawShare, nid}) - rawData = rawData[shareSize:] + rawData = rawData[consts.MsgShareSize:] for len(rawData) > 0 { - shareSizeOrLen := min(shareSize, len(rawData)) - paddedShare := zeroPadIfNecessary(rawData[:shareSizeOrLen], shareSize) + shareSizeOrLen := min(consts.MsgShareSize, len(rawData)) + rawShare := []byte(append(nid, rawData[:shareSizeOrLen]...)) + paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) share := NamespacedShare{paddedShare, nid} shares = append(shares, share) rawData = rawData[shareSizeOrLen:] @@ -88,6 +111,41 @@ func split(rawData []byte, shareSize int, nid namespace.ID) []NamespacedShare { return shares } +// getNextChunk gets the next chunk for contiguous shares +// Precondition: none of the slices in rawDatas is zero-length +// This precondition should always hold at this point since zero-length txs are simply invalid. +func getNextChunk(rawDatas [][]byte, outerIndex int, innerIndex int, width int) ([]byte, int, int, int) { + rawData := make([]byte, 0, width) + startIndex := 0 + firstBytesToFetch := 0 + + curIndex := 0 + for curIndex < width && outerIndex < len(rawDatas) { + bytesToFetch := min(len(rawDatas[outerIndex])-innerIndex, width-curIndex) + if bytesToFetch == 0 { + panic("zero-length contiguous share data is invalid") + } + if curIndex == 0 { + firstBytesToFetch = bytesToFetch + } + // If we've already placed some data in this chunk, that means + // a new data segment begins + if curIndex != 0 { + // Offset by the fixed reserved bytes at the beginning of the share + startIndex = firstBytesToFetch + consts.NamespaceSize + consts.ShareReservedBytes + } + rawData = append(rawData, rawDatas[outerIndex][innerIndex:innerIndex+bytesToFetch]...) + innerIndex += bytesToFetch + if innerIndex >= len(rawDatas[outerIndex]) { + innerIndex = 0 + outerIndex++ + } + curIndex += bytesToFetch + } + + return rawData, outerIndex, innerIndex, startIndex +} + func GenerateTailPaddingShares(n int, shareWidth int) NamespacedShares { shares := make([]NamespacedShare, n) for i := 0; i < n; i++ { diff --git a/types/shares_test.go b/types/shares_test.go index a180401071..ee18db6863 100644 --- a/types/shares_test.go +++ b/types/shares_test.go @@ -11,13 +11,12 @@ import ( ) type splitter interface { - splitIntoShares(shareSize int) NamespacedShares + splitIntoShares() NamespacedShares } func TestMakeShares(t *testing.T) { reservedTxNamespaceID := append(bytes.Repeat([]byte{0}, 7), 1) reservedEvidenceNamespaceID := append(bytes.Repeat([]byte{0}, 7), 3) - // resveredIntermediateStateRootsNamespaceID := append(bytes.Repeat([]byte{0}, 7), 2) val := NewMockPV() blockID := makeBlockID([]byte("blockhash"), 1000, []byte("partshash")) blockID2 := makeBlockID([]byte("blockhash2"), 1000, []byte("partshash")) @@ -38,12 +37,11 @@ func TestMakeShares(t *testing.T) { } msg1Marshaled, _ := msg1.MarshalDelimited() if err != nil { - t.Fatalf("Could not encode evidence: %v, error: %v", testEvidence, err) + t.Fatalf("Could not encode evidence: %v, error: %v\n", testEvidence, err) } type args struct { - data splitter - shareSize int + data splitter } tests := []struct { name string @@ -55,50 +53,91 @@ func TestMakeShares(t *testing.T) { data: &EvidenceData{ Evidence: []Evidence{testEvidence}, }, - shareSize: consts.ShareSize, }, NamespacedShares{NamespacedShare{ - Share: testEvidenceBytes[:consts.ShareSize], - ID: reservedEvidenceNamespaceID, + Share: append( + append(reservedEvidenceNamespaceID, byte(0)), + testEvidenceBytes[:consts.TxShareSize]..., + ), + ID: reservedEvidenceNamespaceID, }, NamespacedShare{ - Share: zeroPadIfNecessary(testEvidenceBytes[consts.ShareSize:], consts.ShareSize), - ID: reservedEvidenceNamespaceID, + Share: append( + append(reservedEvidenceNamespaceID, byte(0)), + zeroPadIfNecessary(testEvidenceBytes[consts.TxShareSize:], consts.TxShareSize)..., + ), + ID: reservedEvidenceNamespaceID, }}, }, {"small LL Tx", args{ - data: Txs{smolTx}, - shareSize: consts.ShareSize, + data: Txs{smolTx}, }, NamespacedShares{ NamespacedShare{ - Share: zeroPadIfNecessary(smolTxLenDelimited, consts.ShareSize), - ID: reservedTxNamespaceID, + Share: append( + append(reservedTxNamespaceID, byte(0)), + zeroPadIfNecessary(smolTxLenDelimited, consts.TxShareSize)..., + ), + ID: reservedTxNamespaceID, }, }, }, {"one large LL Tx", args{ - data: Txs{largeTx}, - shareSize: consts.ShareSize, + data: Txs{largeTx}, }, NamespacedShares{ NamespacedShare{ - Share: Share(largeTxLenDelimited[:consts.ShareSize]), - ID: reservedTxNamespaceID, + Share: append( + append(reservedTxNamespaceID, byte(0)), + largeTxLenDelimited[:consts.TxShareSize]..., + ), + ID: reservedTxNamespaceID, }, NamespacedShare{ - Share: zeroPadIfNecessary(largeTxLenDelimited[consts.ShareSize:], consts.ShareSize), - ID: reservedTxNamespaceID, + Share: append( + append(reservedTxNamespaceID, byte(0)), + zeroPadIfNecessary(largeTxLenDelimited[consts.TxShareSize:], consts.TxShareSize)..., + ), + ID: reservedTxNamespaceID, + }, + }, + }, + {"large then small LL Tx", + args{ + data: Txs{largeTx, smolTx}, + }, + NamespacedShares{ + NamespacedShare{ + Share: append( + append(reservedTxNamespaceID, byte(0)), + largeTxLenDelimited[:consts.TxShareSize]..., + ), + ID: reservedTxNamespaceID, + }, + NamespacedShare{ + Share: append( + append(reservedTxNamespaceID, byte(len(largeTxLenDelimited)-consts.TxShareSize+consts.NamespaceSize+consts.ShareReservedBytes)), + zeroPadIfNecessary( + append(largeTxLenDelimited[consts.TxShareSize:], smolTxLenDelimited...), + consts.TxShareSize, + )..., + ), + ID: reservedTxNamespaceID, }, }, }, {"ll-app message", args{ - data: Messages{[]Message{msg1}}, - shareSize: consts.ShareSize, + data: Messages{[]Message{msg1}}, }, NamespacedShares{ - NamespacedShare{zeroPadIfNecessary(msg1Marshaled, consts.ShareSize), msg1.NamespaceID}, + NamespacedShare{ + Share: append( + []byte(msg1.NamespaceID), + zeroPadIfNecessary(msg1Marshaled, consts.MsgShareSize)..., + ), + ID: msg1.NamespaceID, + }, }, }, } @@ -106,8 +145,9 @@ func TestMakeShares(t *testing.T) { tt := tt // stupid scopelint :-/ i := i t.Run(tt.name, func(t *testing.T) { - if got := tt.args.data.splitIntoShares(tt.args.shareSize); !reflect.DeepEqual(got, tt.want) { - t.Errorf("%v: makeShares() = \n%v\nwant\n%v", i, got, tt.want) + got := tt.args.data.splitIntoShares() + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("%v: makeShares() = \n%+v\nwant\n%+v\n", i, got, tt.want) } }) } diff --git a/types/tx.go b/types/tx.go index 1cfdcf5880..5abae88832 100644 --- a/types/tx.go +++ b/types/tx.go @@ -80,14 +80,14 @@ func (txs Txs) Proof(i int) TxProof { } } -func (txs Txs) splitIntoShares(shareSize int) NamespacedShares { +func (txs Txs) splitIntoShares() NamespacedShares { shares := make([]NamespacedShare, 0) for _, tx := range txs { rawData, err := tx.MarshalDelimited() if err != nil { panic(fmt.Sprintf("included Tx in mem-pool that can not be encoded %v", tx)) } - shares = appendToShares(shares, consts.TxNamespaceID, rawData, shareSize) + shares = appendToShares(shares, consts.TxNamespaceID, rawData) } return shares } From eeba8088e8f5a8354b9edaec6b1f8c7141f07897 Mon Sep 17 00:00:00 2001 From: Evan Forbes <42654277+evan-forbes@users.noreply.github.com> Date: Fri, 26 Mar 2021 12:48:48 -0500 Subject: [PATCH 6/7] fix overwrite bug (#251) * fix overwrite bug and stop splitting shares of size MsgShareSize * remove ineffectual code * review feedback: better docs Co-authored-by: Ismail Khoffi * remove uneeded copy and only fix the source of the bug Co-authored-by: Ismail Khoffi * fix overwrite bug while also being consistent with using NamespacedShares * update to the latest rsmt2d for the nmt wrapper Co-authored-by: Ismail Khoffi --- types/shares.go | 8 ++++--- types/shares_test.go | 53 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/types/shares.go b/types/shares.go index 55238a1db0..a96ff36969 100644 --- a/types/shares.go +++ b/types/shares.go @@ -62,12 +62,12 @@ func (m Message) MarshalDelimited() ([]byte, error) { // appendToShares appends raw data as shares. // Used for messages. func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare { - if len(rawData) < consts.MsgShareSize { + if len(rawData) <= consts.MsgShareSize { rawShare := []byte(append(nid, rawData...)) paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) share := NamespacedShare{paddedShare, nid} shares = append(shares, share) - } else { // len(rawData) >= MsgShareSize + } else { // len(rawData) > MsgShareSize shares = append(shares, split(rawData, nid)...) } return shares @@ -102,7 +102,9 @@ func split(rawData []byte, nid namespace.ID) []NamespacedShare { rawData = rawData[consts.MsgShareSize:] for len(rawData) > 0 { shareSizeOrLen := min(consts.MsgShareSize, len(rawData)) - rawShare := []byte(append(nid, rawData[:shareSizeOrLen]...)) + rawShare := make([]byte, consts.NamespaceSize) + copy(rawShare, nid) + rawShare = append(rawShare, rawData[:shareSizeOrLen]...) paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) share := NamespacedShare{paddedShare, nid} shares = append(shares, share) diff --git a/types/shares_test.go b/types/shares_test.go index ee18db6863..5fed4814ee 100644 --- a/types/shares_test.go +++ b/types/shares_test.go @@ -2,10 +2,13 @@ package types import ( "bytes" + "crypto/rand" "reflect" + "sort" "testing" "github.com/celestiaorg/nmt/namespace" + "github.com/stretchr/testify/assert" "github.com/tendermint/tendermint/internal/libs/protoio" "github.com/tendermint/tendermint/pkg/consts" ) @@ -176,3 +179,53 @@ func Test_zeroPadIfNecessary(t *testing.T) { }) } } + +func Test_appendToSharesOverwrite(t *testing.T) { + var shares NamespacedShares + + // generate some arbitrary namespaced shares first share that must be split + newShare := generateRandomNamespacedShares(1, consts.MsgShareSize+1)[0] + + // make a copy of the portion of the share to check if it's overwritten later + extraCopy := make([]byte, consts.MsgShareSize) + copy(extraCopy, newShare.Share[:consts.MsgShareSize]) + + // use appendToShares to add our new share + appendToShares(shares, newShare.ID, newShare.Share) + + // check if the original share data has been overwritten. + assert.Equal(t, extraCopy, []byte(newShare.Share[:consts.MsgShareSize])) +} + +func generateRandomNamespacedShares(count, leafSize int) []NamespacedShare { + shares := generateRandNamespacedRawData(count, consts.NamespaceSize, leafSize) + nsShares := make(NamespacedShares, count) + for i, s := range shares { + nsShares[i] = NamespacedShare{ + Share: s[consts.NamespaceSize:], + ID: s[:consts.NamespaceSize], + } + } + return nsShares +} + +func generateRandNamespacedRawData(total, nidSize, leafSize int) [][]byte { + data := make([][]byte, total) + for i := 0; i < total; i++ { + nid := make([]byte, nidSize) + rand.Read(nid) + data[i] = nid + } + sortByteArrays(data) + for i := 0; i < total; i++ { + d := make([]byte, leafSize) + rand.Read(d) + data[i] = append(data[i], d...) + } + + return data +} + +func sortByteArrays(src [][]byte) { + sort.Slice(src, func(i, j int) bool { return bytes.Compare(src[i], src[j]) < 0 }) +} From 66f720dffe0adc9b40a6fa52f4649bb23303a9ff Mon Sep 17 00:00:00 2001 From: Evan Forbes <42654277+evan-forbes@users.noreply.github.com> Date: Mon, 5 Apr 2021 22:51:00 -0500 Subject: [PATCH 7/7] Spec compliant merge shares (#261) * start spec compliant share merging * refactor and finish unit testing * whoops * linter gods * fix initial changes and use constants * use constant * more polish * docs fix* review feedback: docs and out of range panic protection * review feedback: add panic protection from empty input * use constant instead of recalculating `ShareSize`* don't redeclare existing var* be more explicit with returned nil* use constant instead of recalculating `ShareSize`* review feedback: use consistent capitalization * stop accepting reserved namespaces as normal messages * use a descriptive var name for message length * linter and comparison fix * reorg tests, add test for parse delimiter, DataFromBlock and fix evidence marshal bug * catch error for linter * update test MakeShares to include length delimiters for the SHARE_RESERVED_BYTE * minor iteration change * refactor share splitting to fix bug * fix all bugs with third and final refactor * fix conflict * revert unnecessary changes * review feedback: better docs* reivew feedback: add comment for safeLen * review feedback: remove unnecessay comments * review feedback: split up share merging and splitting into their own files * review feedback: more descriptive var names * fix accidental change * add some constant docs * spelling error Co-authored-by: Hlib Kanunnikov Co-authored-by: John Adler Co-authored-by: Ismail Khoffi --- pkg/consts/consts.go | 3 + types/block.go | 100 +++++++--- types/share_merging.go | 333 ++++++++++++++++++++++++++++++++ types/share_splitting.go | 148 +++++++++++++++ types/shares.go | 122 ------------ types/shares_test.go | 397 +++++++++++++++++++++++++++++++++++---- types/tx.go | 9 +- 7 files changed, 932 insertions(+), 180 deletions(-) create mode 100644 types/share_merging.go create mode 100644 types/share_splitting.go diff --git a/pkg/consts/consts.go b/pkg/consts/consts.go index 703fa0a8bf..c7d9025fb2 100644 --- a/pkg/consts/consts.go +++ b/pkg/consts/consts.go @@ -4,6 +4,7 @@ import ( "crypto/sha256" "github.com/celestiaorg/nmt/namespace" + "github.com/celestiaorg/rsmt2d" ) // This contains all constants of: @@ -61,4 +62,6 @@ var ( // NewBaseHashFunc change accordingly if another hash.Hash should be used as a base hasher in the NMT: NewBaseHashFunc = sha256.New + + DefaultCodec = rsmt2d.NewRSGF8Codec ) diff --git a/types/block.go b/types/block.go index f961fa34e2..7bfa26ee1a 100644 --- a/types/block.go +++ b/types/block.go @@ -4,6 +4,7 @@ import ( "bytes" "errors" "fmt" + "math" "strings" "time" @@ -1112,6 +1113,69 @@ func (data *Data) Hash() tmbytes.HexBytes { return data.hash } +// ComputeShares splits block data into shares of an original data square and +// returns them along with an amount of non-redundant shares. The shares +// returned are padded to complete a square size that is a power of two +func (data *Data) ComputeShares() (NamespacedShares, int) { + // TODO(ismail): splitting into shares should depend on the block size and layout + // see: https://github.com/celestiaorg/celestia-specs/blob/master/specs/block_proposer.md#laying-out-transactions-and-messages + + // reserved shares: + txShares := data.Txs.SplitIntoShares() + intermRootsShares := data.IntermediateStateRoots.SplitIntoShares() + evidenceShares := data.Evidence.SplitIntoShares() + + // application data shares from messages: + msgShares := data.Messages.SplitIntoShares() + curLen := len(txShares) + len(intermRootsShares) + len(evidenceShares) + len(msgShares) + + // find the number of shares needed to create a square that has a power of + // two width + wantLen := paddedLen(curLen) + + // ensure that the min square size is used + if wantLen < consts.MinSharecount { + wantLen = consts.MinSharecount + } + + tailShares := TailPaddingShares(wantLen - curLen) + + return append(append(append(append( + txShares, + intermRootsShares...), + evidenceShares...), + msgShares...), + tailShares...), curLen +} + +// paddedLen calculates the number of shares needed to make a power of 2 square +// given the current number of shares +func paddedLen(length int) int { + width := uint32(math.Ceil(math.Sqrt(float64(length)))) + width = nextHighestPowerOf2(width) + return int(width * width) +} + +// nextPowerOf2 returns the next highest power of 2 unless the input is a power +// of two, in which case it returns the input +func nextHighestPowerOf2(v uint32) uint32 { + if v == 0 { + return 0 + } + + // find the next highest power using bit mashing + v-- + v |= v >> 1 + v |= v >> 2 + v |= v >> 4 + v |= v >> 8 + v |= v >> 16 + v++ + + // return the next highest power + return v +} + type Messages struct { MessagesList []Message `json:"msgs"` } @@ -1120,19 +1184,20 @@ type IntermediateStateRoots struct { RawRootsList []tmbytes.HexBytes `json:"intermediate_roots"` } -func (roots IntermediateStateRoots) splitIntoShares() NamespacedShares { - shares := make([]NamespacedShare, 0) +func (roots IntermediateStateRoots) SplitIntoShares() NamespacedShares { + rawDatas := make([][]byte, 0, len(roots.RawRootsList)) for _, root := range roots.RawRootsList { rawData, err := root.MarshalDelimited() if err != nil { panic(fmt.Sprintf("app returned intermediate state root that can not be encoded %#v", root)) } - shares = appendToShares(shares, consts.IntermediateStateRootsNamespaceID, rawData) + rawDatas = append(rawDatas, rawData) } + shares := splitContiguous(consts.IntermediateStateRootsNamespaceID, rawDatas) return shares } -func (msgs Messages) splitIntoShares() NamespacedShares { +func (msgs Messages) SplitIntoShares() NamespacedShares { shares := make([]NamespacedShare, 0) for _, m := range msgs.MessagesList { rawData, err := m.MarshalDelimited() @@ -1346,29 +1411,20 @@ func (data *EvidenceData) FromProto(eviData *tmproto.EvidenceList) error { return nil } -func (data *EvidenceData) splitIntoShares() NamespacedShares { - shares := make([]NamespacedShare, 0) +func (data *EvidenceData) SplitIntoShares() NamespacedShares { + rawDatas := make([][]byte, 0, len(data.Evidence)) for _, ev := range data.Evidence { - var rawData []byte - var err error - switch cev := ev.(type) { - case *DuplicateVoteEvidence: - rawData, err = protoio.MarshalDelimited(cev.ToProto()) - case *LightClientAttackEvidence: - pcev, iErr := cev.ToProto() - if iErr != nil { - err = iErr - break - } - rawData, err = protoio.MarshalDelimited(pcev) - default: - panic(fmt.Sprintf("unknown evidence included in evidence pool (don't know how to encode this) %#v", ev)) + pev, err := EvidenceToProto(ev) + if err != nil { + panic("failure to convert evidence to equivalent proto type") } + rawData, err := protoio.MarshalDelimited(pev) if err != nil { - panic(fmt.Sprintf("evidence included in evidence pool that can not be encoded %#v, err: %v", ev, err)) + panic(err) } - shares = appendToShares(shares, consts.EvidenceNamespaceID, rawData) + rawDatas = append(rawDatas, rawData) } + shares := splitContiguous(consts.EvidenceNamespaceID, rawDatas) return shares } diff --git a/types/share_merging.go b/types/share_merging.go new file mode 100644 index 0000000000..f54bbd32a9 --- /dev/null +++ b/types/share_merging.go @@ -0,0 +1,333 @@ +package types + +import ( + "bytes" + "encoding/binary" + "errors" + + "github.com/celestiaorg/rsmt2d" + "github.com/gogo/protobuf/proto" + tmbytes "github.com/tendermint/tendermint/libs/bytes" + "github.com/tendermint/tendermint/pkg/consts" + tmproto "github.com/tendermint/tendermint/proto/tendermint/types" +) + +// DataFromSquare extracts block data from an extended data square. +func DataFromSquare(eds *rsmt2d.ExtendedDataSquare) (Data, error) { + originalWidth := eds.Width() / 2 + + // sort block data shares by namespace + var ( + sortedTxShares [][]byte + sortedISRShares [][]byte + sortedEvdShares [][]byte + sortedMsgShares [][]byte + ) + + // iterate over each row index + for x := uint(0); x < originalWidth; x++ { + // iterate over each share in the original data square + row := eds.Row(x) + + for _, share := range row[:originalWidth] { + // sort the data of that share types via namespace + nid := share[:consts.NamespaceSize] + switch { + case bytes.Equal(consts.TxNamespaceID, nid): + sortedTxShares = append(sortedTxShares, share) + + case bytes.Equal(consts.IntermediateStateRootsNamespaceID, nid): + sortedISRShares = append(sortedISRShares, share) + + case bytes.Equal(consts.EvidenceNamespaceID, nid): + sortedEvdShares = append(sortedEvdShares, share) + + case bytes.Equal(consts.TailPaddingNamespaceID, nid): + continue + + // ignore unused but reserved namespaces + case bytes.Compare(nid, consts.MaxReservedNamespace) < 1: + continue + + // every other namespaceID should be a message + default: + sortedMsgShares = append(sortedMsgShares, share) + } + } + } + + // pass the raw share data to their respective parsers + txs, err := parseTxs(sortedTxShares) + if err != nil { + return Data{}, err + } + + isrs, err := parseISRs(sortedISRShares) + if err != nil { + return Data{}, err + } + + evd, err := parseEvd(sortedEvdShares) + if err != nil { + return Data{}, err + } + + msgs, err := parseMsgs(sortedMsgShares) + if err != nil { + return Data{}, err + } + + return Data{ + Txs: txs, + IntermediateStateRoots: isrs, + Evidence: evd, + Messages: msgs, + }, nil +} + +// parseTxs collects all of the transactions from the shares provided +func parseTxs(shares [][]byte) (Txs, error) { + // parse the sharse + rawTxs, err := processContiguousShares(shares) + if err != nil { + return nil, err + } + + // convert to the Tx type + txs := make(Txs, len(rawTxs)) + for i := 0; i < len(txs); i++ { + txs[i] = Tx(rawTxs[i]) + } + + return txs, nil +} + +// parseISRs collects all the intermediate state roots from the shares provided +func parseISRs(shares [][]byte) (IntermediateStateRoots, error) { + rawISRs, err := processContiguousShares(shares) + if err != nil { + return IntermediateStateRoots{}, err + } + + ISRs := make([]tmbytes.HexBytes, len(rawISRs)) + for i := 0; i < len(ISRs); i++ { + ISRs[i] = rawISRs[i] + } + + return IntermediateStateRoots{RawRootsList: ISRs}, nil +} + +// parseEvd collects all evidence from the shares provided. +func parseEvd(shares [][]byte) (EvidenceData, error) { + // the raw data returned does not have length delimiters or namespaces and + // is ready to be unmarshaled + rawEvd, err := processContiguousShares(shares) + if err != nil { + return EvidenceData{}, err + } + + evdList := make(EvidenceList, len(rawEvd)) + + // parse into protobuf bytes + for i := 0; i < len(rawEvd); i++ { + // unmarshal the evidence + var protoEvd tmproto.Evidence + err := proto.Unmarshal(rawEvd[i], &protoEvd) + if err != nil { + return EvidenceData{}, err + } + evd, err := EvidenceFromProto(&protoEvd) + if err != nil { + return EvidenceData{}, err + } + + evdList[i] = evd + } + + return EvidenceData{Evidence: evdList}, nil +} + +// parseMsgs collects all messages from the shares provided +func parseMsgs(shares [][]byte) (Messages, error) { + msgList, err := parseMsgShares(shares) + if err != nil { + return Messages{}, err + } + + return Messages{ + MessagesList: msgList, + }, nil +} + +// processContiguousShares takes raw shares and extracts out transactions, +// intermediate state roots, or evidence. The returned [][]byte do have +// namespaces or length delimiters and are ready to be unmarshalled +func processContiguousShares(shares [][]byte) (txs [][]byte, err error) { + if len(shares) == 0 { + return nil, nil + } + + ss := newShareStack(shares) + return ss.resolve() +} + +// shareStack hold variables for peel +type shareStack struct { + shares [][]byte + txLen uint64 + txs [][]byte + cursor int +} + +func newShareStack(shares [][]byte) *shareStack { + return &shareStack{shares: shares} +} + +func (ss *shareStack) resolve() ([][]byte, error) { + if len(ss.shares) == 0 { + return nil, nil + } + err := ss.peel(ss.shares[0][consts.NamespaceSize+consts.ShareReservedBytes:], true) + return ss.txs, err +} + +// peel recursively parses each chunk of data (either a transaction, +// intermediate state root, or evidence) and adds it to the underlying slice of data. +func (ss *shareStack) peel(share []byte, delimited bool) (err error) { + if delimited { + var txLen uint64 + share, txLen, err = parseDelimiter(share) + if err != nil { + return err + } + if txLen == 0 { + return nil + } + ss.txLen = txLen + } + // safeLen describes the point in the share where it can be safely split. If + // split beyond this point, it is possible to break apart a length + // delimiter, which will result in incorrect share merging + safeLen := len(share) - binary.MaxVarintLen64 + if safeLen < 0 { + safeLen = 0 + } + if ss.txLen <= uint64(safeLen) { + ss.txs = append(ss.txs, share[:ss.txLen]) + share = share[ss.txLen:] + return ss.peel(share, true) + } + // add the next share to the current share to continue merging if possible + if len(ss.shares) > ss.cursor+1 { + ss.cursor++ + share := append(share, ss.shares[ss.cursor][consts.NamespaceSize+consts.ShareReservedBytes:]...) + return ss.peel(share, false) + } + // collect any remaining data + if ss.txLen <= uint64(len(share)) { + ss.txs = append(ss.txs, share[:ss.txLen]) + share = share[ss.txLen:] + return ss.peel(share, true) + } + return errors.New("failure to parse block data: transaction length exceeded data length") +} + +// parseMsgShares iterates through raw shares and separates the contiguous chunks +// of data. It is only used for Messages, i.e. shares with a non-reserved namespace. +func parseMsgShares(shares [][]byte) ([]Message, error) { + if len(shares) == 0 { + return nil, nil + } + + // set the first nid and current share + nid := shares[0][:consts.NamespaceSize] + currentShare := shares[0][consts.NamespaceSize:] + // find and remove the msg len delimiter + currentShare, msgLen, err := parseDelimiter(currentShare) + if err != nil { + return nil, err + } + + var msgs []Message + for cursor := uint64(0); cursor < uint64(len(shares)); { + var msg Message + currentShare, nid, cursor, msgLen, msg, err = nextMsg( + shares, + currentShare, + nid, + cursor, + msgLen, + ) + if err != nil { + return nil, err + } + if msg.Data != nil { + msgs = append(msgs, msg) + } + } + + return msgs, nil +} + +func nextMsg( + shares [][]byte, + current, + nid []byte, + cursor, + msgLen uint64, +) ([]byte, []byte, uint64, uint64, Message, error) { + switch { + // the message uses all of the current share data and at least some of the + // next share + case msgLen > uint64(len(current)): + // add the next share to the current one and try again + cursor++ + current = append(current, shares[cursor][consts.NamespaceSize:]...) + return nextMsg(shares, current, nid, cursor, msgLen) + + // the msg we're looking for is contained in the current share + case msgLen <= uint64(len(current)): + msg := Message{nid, current[:msgLen]} + cursor++ + + // call it a day if the work is done + if cursor >= uint64(len(shares)) { + return nil, nil, cursor, 0, msg, nil + } + + nextNid := shares[cursor][:consts.NamespaceSize] + next, msgLen, err := parseDelimiter(shares[cursor][consts.NamespaceSize:]) + return next, nextNid, cursor, msgLen, msg, err + } + // this code is unreachable but the compiler doesn't know that + return nil, nil, 0, 0, Message{}, nil +} + +// parseDelimiter finds and returns the length delimiter of the message provided +// while also removing the delimiter bytes from the input +func parseDelimiter(input []byte) ([]byte, uint64, error) { + if len(input) == 0 { + return input, 0, nil + } + + l := binary.MaxVarintLen64 + if len(input) < binary.MaxVarintLen64 { + l = len(input) + } + + delimiter := zeroPadIfNecessary(input[:l], binary.MaxVarintLen64) + + // read the length of the message + r := bytes.NewBuffer(delimiter) + msgLen, err := binary.ReadUvarint(r) + if err != nil { + return nil, 0, err + } + + // calculate the number of bytes used by the delimiter + lenBuf := make([]byte, binary.MaxVarintLen64) + n := binary.PutUvarint(lenBuf, msgLen) + + // return the input without the length delimiter + return input[n:], msgLen, nil +} diff --git a/types/share_splitting.go b/types/share_splitting.go new file mode 100644 index 0000000000..08c4aba511 --- /dev/null +++ b/types/share_splitting.go @@ -0,0 +1,148 @@ +package types + +import ( + "bytes" + + "github.com/celestiaorg/nmt/namespace" + "github.com/tendermint/tendermint/pkg/consts" +) + +// appendToShares appends raw data as shares. +// Used for messages. +func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare { + if len(rawData) <= consts.MsgShareSize { + rawShare := append(append( + make([]byte, 0, len(nid)+len(rawData)), + nid...), + rawData..., + ) + paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) + share := NamespacedShare{paddedShare, nid} + shares = append(shares, share) + } else { // len(rawData) > MsgShareSize + shares = append(shares, splitMessage(rawData, nid)...) + } + return shares +} + +// splitMessage breaks the data in a message into the minimum number of +// namespaced shares +func splitMessage(rawData []byte, nid namespace.ID) []NamespacedShare { + shares := make([]NamespacedShare, 0) + firstRawShare := append(append( + make([]byte, 0, consts.ShareSize), + nid...), + rawData[:consts.MsgShareSize]..., + ) + shares = append(shares, NamespacedShare{firstRawShare, nid}) + rawData = rawData[consts.MsgShareSize:] + for len(rawData) > 0 { + shareSizeOrLen := min(consts.MsgShareSize, len(rawData)) + rawShare := append(append( + make([]byte, 0, consts.ShareSize), + nid...), + rawData[:shareSizeOrLen]..., + ) + paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) + share := NamespacedShare{paddedShare, nid} + shares = append(shares, share) + rawData = rawData[shareSizeOrLen:] + } + return shares +} + +// splitContiguous splits multiple raw data contiguously as shares. +// Used for transactions, intermediate state roots, and evidence. +func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare { + shares := make([]NamespacedShare, 0) + // Index into the outer slice of rawDatas + outerIndex := 0 + // Index into the inner slice of rawDatas + innerIndex := 0 + for outerIndex < len(rawDatas) { + var rawData []byte + startIndex := 0 + rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, consts.TxShareSize) + rawShare := append(append(append( + make([]byte, 0, len(nid)+1+len(rawData)), + nid...), + byte(startIndex)), + rawData...) + paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) + share := NamespacedShare{paddedShare, nid} + shares = append(shares, share) + } + return shares +} + +// getNextChunk gets the next chunk for contiguous shares +// Precondition: none of the slices in rawDatas is zero-length +// This precondition should always hold at this point since zero-length txs are simply invalid. +func getNextChunk(rawDatas [][]byte, outerIndex int, innerIndex int, width int) ([]byte, int, int, int) { + rawData := make([]byte, 0, width) + startIndex := 0 + firstBytesToFetch := 0 + + curIndex := 0 + for curIndex < width && outerIndex < len(rawDatas) { + bytesToFetch := min(len(rawDatas[outerIndex])-innerIndex, width-curIndex) + if bytesToFetch == 0 { + panic("zero-length contiguous share data is invalid") + } + if curIndex == 0 { + firstBytesToFetch = bytesToFetch + } + // If we've already placed some data in this chunk, that means + // a new data segment begins + if curIndex != 0 { + // Offset by the fixed reserved bytes at the beginning of the share + startIndex = firstBytesToFetch + consts.NamespaceSize + consts.ShareReservedBytes + } + rawData = append(rawData, rawDatas[outerIndex][innerIndex:innerIndex+bytesToFetch]...) + innerIndex += bytesToFetch + if innerIndex >= len(rawDatas[outerIndex]) { + innerIndex = 0 + outerIndex++ + } + curIndex += bytesToFetch + } + + return rawData, outerIndex, innerIndex, startIndex +} + +// tail is filler for all tail padded shares +// it is allocated once and used everywhere +var tailPaddingShare = append( + append(make([]byte, 0, consts.ShareSize), consts.TailPaddingNamespaceID...), + bytes.Repeat([]byte{0}, consts.ShareSize-consts.NamespaceSize)..., +) + +func TailPaddingShares(n int) NamespacedShares { + shares := make([]NamespacedShare, n) + for i := 0; i < n; i++ { + shares[i] = NamespacedShare{ + Share: tailPaddingShare, + ID: consts.TailPaddingNamespaceID, + } + } + return shares +} + +func min(a, b int) int { + if a <= b { + return a + } + return b +} + +func zeroPadIfNecessary(share []byte, width int) []byte { + oldLen := len(share) + if oldLen < width { + missingBytes := width - oldLen + padByte := []byte{0} + padding := bytes.Repeat(padByte, missingBytes) + share = append(share, padding...) + return share + } + return share +} diff --git a/types/shares.go b/types/shares.go index a96ff36969..30a191183d 100644 --- a/types/shares.go +++ b/types/shares.go @@ -1,19 +1,15 @@ package types import ( - "bytes" "encoding/binary" "github.com/celestiaorg/nmt/namespace" - "github.com/tendermint/tendermint/pkg/consts" ) // Share contains the raw share data without the corresponding namespace. type Share []byte // NamespacedShare extends a Share with the corresponding namespace. -// It implements the namespace.Data interface and hence can be used -// for pushing the shares to the namespaced Merkle tree. type NamespacedShare struct { Share ID namespace.ID @@ -45,7 +41,6 @@ func (tx Tx) MarshalDelimited() ([]byte, error) { lenBuf := make([]byte, binary.MaxVarintLen64) length := uint64(len(tx)) n := binary.PutUvarint(lenBuf, length) - return append(lenBuf[:n], tx...), nil } @@ -55,122 +50,5 @@ func (m Message) MarshalDelimited() ([]byte, error) { lenBuf := make([]byte, binary.MaxVarintLen64) length := uint64(len(m.Data)) n := binary.PutUvarint(lenBuf, length) - return append(lenBuf[:n], m.Data...), nil } - -// appendToShares appends raw data as shares. -// Used for messages. -func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare { - if len(rawData) <= consts.MsgShareSize { - rawShare := []byte(append(nid, rawData...)) - paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) - share := NamespacedShare{paddedShare, nid} - shares = append(shares, share) - } else { // len(rawData) > MsgShareSize - shares = append(shares, split(rawData, nid)...) - } - return shares -} - -// splitContiguous splits multiple raw data contiguously as shares. -// Used for transactions, intermediate state roots, and evidence. -func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare { - shares := make([]NamespacedShare, 0) - // Index into the outer slice of rawDatas - outerIndex := 0 - // Index into the inner slice of rawDatas - innerIndex := 0 - for outerIndex < len(rawDatas) { - var rawData []byte - startIndex := 0 - rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, consts.TxShareSize) - rawShare := []byte(append(append(nid, byte(startIndex)), rawData...)) - paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) - share := NamespacedShare{paddedShare, nid} - shares = append(shares, share) - } - return shares -} - -// TODO(ismail): implement corresponding merge method for clients requesting -// shares for a particular namespace -func split(rawData []byte, nid namespace.ID) []NamespacedShare { - shares := make([]NamespacedShare, 0) - firstRawShare := []byte(append(nid, rawData[:consts.MsgShareSize]...)) - shares = append(shares, NamespacedShare{firstRawShare, nid}) - rawData = rawData[consts.MsgShareSize:] - for len(rawData) > 0 { - shareSizeOrLen := min(consts.MsgShareSize, len(rawData)) - rawShare := make([]byte, consts.NamespaceSize) - copy(rawShare, nid) - rawShare = append(rawShare, rawData[:shareSizeOrLen]...) - paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) - share := NamespacedShare{paddedShare, nid} - shares = append(shares, share) - rawData = rawData[shareSizeOrLen:] - } - return shares -} - -// getNextChunk gets the next chunk for contiguous shares -// Precondition: none of the slices in rawDatas is zero-length -// This precondition should always hold at this point since zero-length txs are simply invalid. -func getNextChunk(rawDatas [][]byte, outerIndex int, innerIndex int, width int) ([]byte, int, int, int) { - rawData := make([]byte, 0, width) - startIndex := 0 - firstBytesToFetch := 0 - - curIndex := 0 - for curIndex < width && outerIndex < len(rawDatas) { - bytesToFetch := min(len(rawDatas[outerIndex])-innerIndex, width-curIndex) - if bytesToFetch == 0 { - panic("zero-length contiguous share data is invalid") - } - if curIndex == 0 { - firstBytesToFetch = bytesToFetch - } - // If we've already placed some data in this chunk, that means - // a new data segment begins - if curIndex != 0 { - // Offset by the fixed reserved bytes at the beginning of the share - startIndex = firstBytesToFetch + consts.NamespaceSize + consts.ShareReservedBytes - } - rawData = append(rawData, rawDatas[outerIndex][innerIndex:innerIndex+bytesToFetch]...) - innerIndex += bytesToFetch - if innerIndex >= len(rawDatas[outerIndex]) { - innerIndex = 0 - outerIndex++ - } - curIndex += bytesToFetch - } - - return rawData, outerIndex, innerIndex, startIndex -} - -func GenerateTailPaddingShares(n int, shareWidth int) NamespacedShares { - shares := make([]NamespacedShare, n) - for i := 0; i < n; i++ { - shares[i] = NamespacedShare{bytes.Repeat([]byte{0}, shareWidth), consts.TailPaddingNamespaceID} - } - return shares -} - -func min(a, b int) int { - if a <= b { - return a - } - return b -} - -func zeroPadIfNecessary(share []byte, width int) []byte { - oldLen := len(share) - if oldLen < width { - missingBytes := width - oldLen - padByte := []byte{0} - padding := bytes.Repeat(padByte, missingBytes) - share = append(share, padding...) - return share - } - return share -} diff --git a/types/shares_test.go b/types/shares_test.go index 5fed4814ee..ddf7c29b07 100644 --- a/types/shares_test.go +++ b/types/shares_test.go @@ -2,19 +2,25 @@ package types import ( "bytes" - "crypto/rand" + "context" + "fmt" + "math" + "math/rand" "reflect" "sort" "testing" + "time" "github.com/celestiaorg/nmt/namespace" + "github.com/celestiaorg/rsmt2d" "github.com/stretchr/testify/assert" "github.com/tendermint/tendermint/internal/libs/protoio" + tmbytes "github.com/tendermint/tendermint/libs/bytes" "github.com/tendermint/tendermint/pkg/consts" ) -type splitter interface { - splitIntoShares() NamespacedShares +type Splitter interface { + SplitIntoShares() NamespacedShares } func TestMakeShares(t *testing.T) { @@ -29,7 +35,11 @@ func TestMakeShares(t *testing.T) { VoteA: vote1, VoteB: vote2, } - testEvidenceBytes, err := protoio.MarshalDelimited(testEvidence.ToProto()) + protoTestEvidence, err := EvidenceToProto(testEvidence) + if err != nil { + t.Error(err) + } + testEvidenceBytes, err := protoio.MarshalDelimited(protoTestEvidence) largeTx := Tx(bytes.Repeat([]byte("large Tx"), 50)) largeTxLenDelimited, _ := largeTx.MarshalDelimited() smolTx := Tx("small Tx") @@ -44,31 +54,36 @@ func TestMakeShares(t *testing.T) { } type args struct { - data splitter + data Splitter } tests := []struct { name string args args want NamespacedShares }{ - {"evidence", - args{ + { + name: "evidence", + args: args{ data: &EvidenceData{ Evidence: []Evidence{testEvidence}, }, - }, NamespacedShares{NamespacedShare{ - Share: append( - append(reservedEvidenceNamespaceID, byte(0)), - testEvidenceBytes[:consts.TxShareSize]..., - ), - ID: reservedEvidenceNamespaceID, - }, NamespacedShare{ - Share: append( - append(reservedEvidenceNamespaceID, byte(0)), - zeroPadIfNecessary(testEvidenceBytes[consts.TxShareSize:], consts.TxShareSize)..., - ), - ID: reservedEvidenceNamespaceID, - }}, + }, + want: NamespacedShares{ + NamespacedShare{ + Share: append( + append(reservedEvidenceNamespaceID, byte(0)), + testEvidenceBytes[:consts.TxShareSize]..., + ), + ID: reservedEvidenceNamespaceID, + }, + NamespacedShare{ + Share: append( + append(reservedEvidenceNamespaceID, byte(0)), + zeroPadIfNecessary(testEvidenceBytes[consts.TxShareSize:], consts.TxShareSize)..., + ), + ID: reservedEvidenceNamespaceID, + }, + }, }, {"small LL Tx", args{ @@ -119,7 +134,10 @@ func TestMakeShares(t *testing.T) { }, NamespacedShare{ Share: append( - append(reservedTxNamespaceID, byte(len(largeTxLenDelimited)-consts.TxShareSize+consts.NamespaceSize+consts.ShareReservedBytes)), + append( + reservedTxNamespaceID, + byte(len(largeTxLenDelimited)-consts.TxShareSize+consts.NamespaceSize+consts.ShareReservedBytes), + ), zeroPadIfNecessary( append(largeTxLenDelimited[consts.TxShareSize:], smolTxLenDelimited...), consts.TxShareSize, @@ -148,7 +166,7 @@ func TestMakeShares(t *testing.T) { tt := tt // stupid scopelint :-/ i := i t.Run(tt.name, func(t *testing.T) { - got := tt.args.data.splitIntoShares() + got := tt.args.data.SplitIntoShares() if !reflect.DeepEqual(got, tt.want) { t.Errorf("%v: makeShares() = \n%+v\nwant\n%+v\n", i, got, tt.want) } @@ -197,27 +215,342 @@ func Test_appendToSharesOverwrite(t *testing.T) { assert.Equal(t, extraCopy, []byte(newShare.Share[:consts.MsgShareSize])) } -func generateRandomNamespacedShares(count, leafSize int) []NamespacedShare { - shares := generateRandNamespacedRawData(count, consts.NamespaceSize, leafSize) - nsShares := make(NamespacedShares, count) +func TestDataFromSquare(t *testing.T) { + type test struct { + name string + txCount int + isrCount int + evdCount int + msgCount int + maxSize int // max size of each tx or msg + } + + tests := []test{ + {"one of each random small size", 1, 1, 1, 1, 40}, + {"one of each random large size", 1, 1, 1, 1, 400}, + {"many of each random large size", 10, 10, 10, 10, 40}, + {"many of each random large size", 10, 10, 10, 10, 400}, + {"only transactions", 10, 0, 0, 0, 400}, + {"only intermediate state roots", 0, 10, 0, 0, 400}, + {"only evidence", 0, 0, 10, 0, 400}, + {"only messages", 0, 0, 0, 10, 400}, + } + + for _, tc := range tests { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + // generate random data + data := generateRandomBlockData( + tc.txCount, + tc.isrCount, + tc.evdCount, + tc.msgCount, + tc.maxSize, + ) + + shares, _ := data.ComputeShares() + rawShares := shares.RawShares() + + eds, err := rsmt2d.ComputeExtendedDataSquare(rawShares, rsmt2d.NewRSGF8Codec(), rsmt2d.NewDefaultTree) + if err != nil { + t.Error(err) + } + + res, err := DataFromSquare(eds) + if err != nil { + t.Fatal(err) + } + + // we have to compare the evidence by string because the the + // timestamps differ not by actual time represented, but by + // internals see https://github.com/stretchr/testify/issues/666 + for i := 0; i < len(data.Evidence.Evidence); i++ { + inputEvidence := data.Evidence.Evidence[i].(*DuplicateVoteEvidence) + resultEvidence := res.Evidence.Evidence[i].(*DuplicateVoteEvidence) + assert.Equal(t, inputEvidence.String(), resultEvidence.String()) + } + + // compare the original to the result w/o the evidence + data.Evidence = EvidenceData{} + res.Evidence = EvidenceData{} + + assert.Equal(t, data, res) + }) + } +} + +func TestFuzz_DataFromSquare(t *testing.T) { + t.Skip() + // run random shares through processContiguousShares for a minute + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + for { + select { + case <-ctx.Done(): + return + default: + TestDataFromSquare(t) + } + } +} + +func Test_processContiguousShares(t *testing.T) { + // exactTxShareSize is the length of tx that will fit exactly into a single + // share, accounting for namespace id and the length delimiter prepended to + // each tx + const exactTxShareSize = consts.TxShareSize - 1 + + type test struct { + name string + txSize int + txCount int + } + + // each test is ran twice, once using txSize as an exact size, and again + // using it as a cap for randomly sized txs + tests := []test{ + {"single small tx", 10, 1}, + {"many small txs", 10, 10}, + {"single big tx", 1000, 1}, + {"many big txs", 1000, 10}, + {"single exact size tx", exactTxShareSize, 1}, + {"many exact size txs", exactTxShareSize, 10}, + } + + for _, tc := range tests { + tc := tc + + // run the tests with identically sized txs + t.Run(fmt.Sprintf("%s idendically sized ", tc.name), func(t *testing.T) { + txs := generateRandomContiguousShares(tc.txCount, tc.txSize) + + shares := txs.SplitIntoShares() + + parsedTxs, err := processContiguousShares(shares.RawShares()) + if err != nil { + t.Error(err) + } + + // check that the data parsed is identical + for i := 0; i < len(txs); i++ { + assert.Equal(t, []byte(txs[i]), parsedTxs[i]) + } + }) + + // run the same tests using randomly sized txs with caps of tc.txSize + t.Run(fmt.Sprintf("%s randomly sized", tc.name), func(t *testing.T) { + txs := generateRandomlySizedContiguousShares(tc.txCount, tc.txSize) + + shares := txs.SplitIntoShares() + + parsedTxs, err := processContiguousShares(shares.RawShares()) + if err != nil { + t.Error(err) + } + + // check that the data parsed is identical to the original + for i := 0; i < len(txs); i++ { + assert.Equal(t, []byte(txs[i]), parsedTxs[i]) + } + }) + } +} + +func TestFuzz_processContiguousShares(t *testing.T) { + t.Skip() + // run random shares through processContiguousShares for a minute + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + for { + select { + case <-ctx.Done(): + return + default: + Test_processContiguousShares(t) + } + } +} + +func Test_parseMsgShares(t *testing.T) { + // exactMsgShareSize is the length of message that will fit exactly into a single + // share, accounting for namespace id and the length delimiter prepended to + // each message + const exactMsgShareSize = consts.MsgShareSize - 2 + + type test struct { + name string + msgSize int + msgCount int + } + + // each test is ran twice, once using msgSize as an exact size, and again + // using it as a cap for randomly sized leaves + tests := []test{ + {"single small msg", 1, 1}, + {"many small msgs", 4, 10}, + {"single big msg", 1000, 1}, + {"many big msgs", 1000, 10}, + {"single exact size msg", exactMsgShareSize, 1}, + {"many exact size msgs", exactMsgShareSize, 10}, + } + + for _, tc := range tests { + tc := tc + + // run the tests with identically sized messagses + t.Run(fmt.Sprintf("%s idendically sized ", tc.name), func(t *testing.T) { + rawmsgs := make([]Message, tc.msgCount) + for i := 0; i < tc.msgCount; i++ { + rawmsgs[i] = generateRandomMessage(tc.msgSize) + } + msgs := Messages{MessagesList: rawmsgs} + + shares := msgs.SplitIntoShares() + + parsedMsgs, err := parseMsgShares(shares.RawShares()) + if err != nil { + t.Error(err) + } + + // check that the namesapces and data are the same + for i := 0; i < len(msgs.MessagesList); i++ { + assert.Equal(t, msgs.MessagesList[i].NamespaceID, parsedMsgs[i].NamespaceID) + assert.Equal(t, msgs.MessagesList[i].Data, parsedMsgs[i].Data) + } + }) + + // run the same tests using randomly sized messages with caps of tc.msgSize + t.Run(fmt.Sprintf("%s randomly sized", tc.name), func(t *testing.T) { + msgs := generateRandomlySizedMessages(tc.msgCount, tc.msgSize) + shares := msgs.SplitIntoShares() + + parsedMsgs, err := parseMsgShares(shares.RawShares()) + if err != nil { + t.Error(err) + } + + // check that the namesapces and data are the same + for i := 0; i < len(msgs.MessagesList); i++ { + assert.Equal(t, msgs.MessagesList[i].NamespaceID, parsedMsgs[i].NamespaceID) + assert.Equal(t, msgs.MessagesList[i].Data, parsedMsgs[i].Data) + } + }) + } +} + +func Test_parseDelimiter(t *testing.T) { + for i := uint64(0); i < 100; i++ { + tx := generateRandomContiguousShares(1, int(i))[0] + input, err := tx.MarshalDelimited() + if err != nil { + panic(err) + } + res, txLen, err := parseDelimiter(input) + if err != nil { + panic(err) + } + assert.Equal(t, i, txLen) + assert.Equal(t, []byte(tx), res) + } +} + +// generateRandomBlockData returns randomly generated block data for testing purposes +func generateRandomBlockData(txCount, isrCount, evdCount, msgCount, maxSize int) Data { + var out Data + out.Txs = generateRandomlySizedContiguousShares(txCount, maxSize) + out.IntermediateStateRoots = generateRandomISR(isrCount) + out.Evidence = generateIdenticalEvidence(evdCount) + out.Messages = generateRandomlySizedMessages(msgCount, maxSize) + return out +} + +func generateRandomlySizedContiguousShares(count, max int) Txs { + txs := make(Txs, count) + for i := 0; i < count; i++ { + size := rand.Intn(max) + if size == 0 { + size = 1 + } + txs[i] = generateRandomContiguousShares(1, size)[0] + } + return txs +} + +func generateRandomContiguousShares(count, size int) Txs { + txs := make(Txs, count) + for i := 0; i < count; i++ { + tx := make([]byte, size) + _, err := rand.Read(tx) + if err != nil { + panic(err) + } + txs[i] = tx + } + return txs +} + +func generateRandomISR(count int) IntermediateStateRoots { + roots := make([]tmbytes.HexBytes, count) + for i := 0; i < count; i++ { + roots[i] = tmbytes.HexBytes(generateRandomContiguousShares(1, 32)[0]) + } + return IntermediateStateRoots{RawRootsList: roots} +} + +func generateIdenticalEvidence(count int) EvidenceData { + evidence := make([]Evidence, count) + for i := 0; i < count; i++ { + ev := NewMockDuplicateVoteEvidence(math.MaxInt64, time.Now(), "chainID") + evidence[i] = ev + } + return EvidenceData{Evidence: evidence} +} + +func generateRandomlySizedMessages(count, maxMsgSize int) Messages { + msgs := make([]Message, count) + for i := 0; i < count; i++ { + msgs[i] = generateRandomMessage(rand.Intn(maxMsgSize)) + } + + // this is just to let us use assert.Equal + if count == 0 { + msgs = nil + } + + return Messages{MessagesList: msgs} +} + +func generateRandomMessage(size int) Message { + share := generateRandomNamespacedShares(1, size)[0] + msg := Message{ + NamespaceID: share.NamespaceID(), + Data: share.Data(), + } + return msg +} + +func generateRandomNamespacedShares(count, msgSize int) NamespacedShares { + shares := generateRandNamespacedRawData(uint32(count), consts.NamespaceSize, uint32(msgSize)) + msgs := make([]Message, count) for i, s := range shares { - nsShares[i] = NamespacedShare{ - Share: s[consts.NamespaceSize:], - ID: s[:consts.NamespaceSize], + msgs[i] = Message{ + Data: s[consts.NamespaceSize:], + NamespaceID: s[:consts.NamespaceSize], } } - return nsShares + return Messages{MessagesList: msgs}.SplitIntoShares() } -func generateRandNamespacedRawData(total, nidSize, leafSize int) [][]byte { +func generateRandNamespacedRawData(total, nidSize, leafSize uint32) [][]byte { data := make([][]byte, total) - for i := 0; i < total; i++ { + for i := uint32(0); i < total; i++ { nid := make([]byte, nidSize) rand.Read(nid) data[i] = nid } sortByteArrays(data) - for i := 0; i < total; i++ { + for i := uint32(0); i < total; i++ { d := make([]byte, leafSize) rand.Read(d) data[i] = append(data[i], d...) diff --git a/types/tx.go b/types/tx.go index 5abae88832..9eacd55b52 100644 --- a/types/tx.go +++ b/types/tx.go @@ -80,15 +80,16 @@ func (txs Txs) Proof(i int) TxProof { } } -func (txs Txs) splitIntoShares() NamespacedShares { - shares := make([]NamespacedShare, 0) - for _, tx := range txs { +func (txs Txs) SplitIntoShares() NamespacedShares { + rawDatas := make([][]byte, len(txs)) + for i, tx := range txs { rawData, err := tx.MarshalDelimited() if err != nil { panic(fmt.Sprintf("included Tx in mem-pool that can not be encoded %v", tx)) } - shares = appendToShares(shares, consts.TxNamespaceID, rawData) + rawDatas[i] = rawData } + shares := splitContiguous(consts.TxNamespaceID, rawDatas) return shares }