From 4beddda8cdd85b7437fa15e767992e352efdf9bf Mon Sep 17 00:00:00 2001 From: Evan Forbes <42654277+evan-forbes@users.noreply.github.com> Date: Mon, 5 Apr 2021 22:51:00 -0500 Subject: [PATCH] Spec compliant merge shares (#261) * start spec compliant share merging * refactor and finish unit testing * whoops * linter gods * fix initial changes and use constants * use constant * more polish * docs fix* review feedback: docs and out of range panic protection * review feedback: add panic protection from empty input * use constant instead of recalculating `ShareSize`* don't redeclare existing var* be more explicit with returned nil* use constant instead of recalculating `ShareSize`* review feedback: use consistent capitalization * stop accepting reserved namespaces as normal messages * use a descriptive var name for message length * linter and comparison fix * reorg tests, add test for parse delimiter, DataFromBlock and fix evidence marshal bug * catch error for linter * update test MakeShares to include length delimiters for the SHARE_RESERVED_BYTE * minor iteration change * refactor share splitting to fix bug * fix all bugs with third and final refactor * fix conflict * revert unnecessary changes * review feedback: better docs* reivew feedback: add comment for safeLen * review feedback: remove unnecessay comments * review feedback: split up share merging and splitting into their own files * review feedback: more descriptive var names * fix accidental change * add some constant docs * spelling error Co-authored-by: Hlib Kanunnikov Co-authored-by: John Adler Co-authored-by: Ismail Khoffi --- pkg/consts/consts.go | 3 + types/block.go | 100 +++++++--- types/share_merging.go | 333 ++++++++++++++++++++++++++++++++ types/share_splitting.go | 148 +++++++++++++++ types/shares.go | 122 ------------ types/shares_test.go | 397 +++++++++++++++++++++++++++++++++++---- types/tx.go | 9 +- 7 files changed, 932 insertions(+), 180 deletions(-) create mode 100644 types/share_merging.go create mode 100644 types/share_splitting.go diff --git a/pkg/consts/consts.go b/pkg/consts/consts.go index 703fa0a8bf..c7d9025fb2 100644 --- a/pkg/consts/consts.go +++ b/pkg/consts/consts.go @@ -4,6 +4,7 @@ import ( "crypto/sha256" "github.com/celestiaorg/nmt/namespace" + "github.com/celestiaorg/rsmt2d" ) // This contains all constants of: @@ -61,4 +62,6 @@ var ( // NewBaseHashFunc change accordingly if another hash.Hash should be used as a base hasher in the NMT: NewBaseHashFunc = sha256.New + + DefaultCodec = rsmt2d.NewRSGF8Codec ) diff --git a/types/block.go b/types/block.go index f961fa34e2..7bfa26ee1a 100644 --- a/types/block.go +++ b/types/block.go @@ -4,6 +4,7 @@ import ( "bytes" "errors" "fmt" + "math" "strings" "time" @@ -1112,6 +1113,69 @@ func (data *Data) Hash() tmbytes.HexBytes { return data.hash } +// ComputeShares splits block data into shares of an original data square and +// returns them along with an amount of non-redundant shares. The shares +// returned are padded to complete a square size that is a power of two +func (data *Data) ComputeShares() (NamespacedShares, int) { + // TODO(ismail): splitting into shares should depend on the block size and layout + // see: https://github.com/celestiaorg/celestia-specs/blob/master/specs/block_proposer.md#laying-out-transactions-and-messages + + // reserved shares: + txShares := data.Txs.SplitIntoShares() + intermRootsShares := data.IntermediateStateRoots.SplitIntoShares() + evidenceShares := data.Evidence.SplitIntoShares() + + // application data shares from messages: + msgShares := data.Messages.SplitIntoShares() + curLen := len(txShares) + len(intermRootsShares) + len(evidenceShares) + len(msgShares) + + // find the number of shares needed to create a square that has a power of + // two width + wantLen := paddedLen(curLen) + + // ensure that the min square size is used + if wantLen < consts.MinSharecount { + wantLen = consts.MinSharecount + } + + tailShares := TailPaddingShares(wantLen - curLen) + + return append(append(append(append( + txShares, + intermRootsShares...), + evidenceShares...), + msgShares...), + tailShares...), curLen +} + +// paddedLen calculates the number of shares needed to make a power of 2 square +// given the current number of shares +func paddedLen(length int) int { + width := uint32(math.Ceil(math.Sqrt(float64(length)))) + width = nextHighestPowerOf2(width) + return int(width * width) +} + +// nextPowerOf2 returns the next highest power of 2 unless the input is a power +// of two, in which case it returns the input +func nextHighestPowerOf2(v uint32) uint32 { + if v == 0 { + return 0 + } + + // find the next highest power using bit mashing + v-- + v |= v >> 1 + v |= v >> 2 + v |= v >> 4 + v |= v >> 8 + v |= v >> 16 + v++ + + // return the next highest power + return v +} + type Messages struct { MessagesList []Message `json:"msgs"` } @@ -1120,19 +1184,20 @@ type IntermediateStateRoots struct { RawRootsList []tmbytes.HexBytes `json:"intermediate_roots"` } -func (roots IntermediateStateRoots) splitIntoShares() NamespacedShares { - shares := make([]NamespacedShare, 0) +func (roots IntermediateStateRoots) SplitIntoShares() NamespacedShares { + rawDatas := make([][]byte, 0, len(roots.RawRootsList)) for _, root := range roots.RawRootsList { rawData, err := root.MarshalDelimited() if err != nil { panic(fmt.Sprintf("app returned intermediate state root that can not be encoded %#v", root)) } - shares = appendToShares(shares, consts.IntermediateStateRootsNamespaceID, rawData) + rawDatas = append(rawDatas, rawData) } + shares := splitContiguous(consts.IntermediateStateRootsNamespaceID, rawDatas) return shares } -func (msgs Messages) splitIntoShares() NamespacedShares { +func (msgs Messages) SplitIntoShares() NamespacedShares { shares := make([]NamespacedShare, 0) for _, m := range msgs.MessagesList { rawData, err := m.MarshalDelimited() @@ -1346,29 +1411,20 @@ func (data *EvidenceData) FromProto(eviData *tmproto.EvidenceList) error { return nil } -func (data *EvidenceData) splitIntoShares() NamespacedShares { - shares := make([]NamespacedShare, 0) +func (data *EvidenceData) SplitIntoShares() NamespacedShares { + rawDatas := make([][]byte, 0, len(data.Evidence)) for _, ev := range data.Evidence { - var rawData []byte - var err error - switch cev := ev.(type) { - case *DuplicateVoteEvidence: - rawData, err = protoio.MarshalDelimited(cev.ToProto()) - case *LightClientAttackEvidence: - pcev, iErr := cev.ToProto() - if iErr != nil { - err = iErr - break - } - rawData, err = protoio.MarshalDelimited(pcev) - default: - panic(fmt.Sprintf("unknown evidence included in evidence pool (don't know how to encode this) %#v", ev)) + pev, err := EvidenceToProto(ev) + if err != nil { + panic("failure to convert evidence to equivalent proto type") } + rawData, err := protoio.MarshalDelimited(pev) if err != nil { - panic(fmt.Sprintf("evidence included in evidence pool that can not be encoded %#v, err: %v", ev, err)) + panic(err) } - shares = appendToShares(shares, consts.EvidenceNamespaceID, rawData) + rawDatas = append(rawDatas, rawData) } + shares := splitContiguous(consts.EvidenceNamespaceID, rawDatas) return shares } diff --git a/types/share_merging.go b/types/share_merging.go new file mode 100644 index 0000000000..f54bbd32a9 --- /dev/null +++ b/types/share_merging.go @@ -0,0 +1,333 @@ +package types + +import ( + "bytes" + "encoding/binary" + "errors" + + "github.com/celestiaorg/rsmt2d" + "github.com/gogo/protobuf/proto" + tmbytes "github.com/tendermint/tendermint/libs/bytes" + "github.com/tendermint/tendermint/pkg/consts" + tmproto "github.com/tendermint/tendermint/proto/tendermint/types" +) + +// DataFromSquare extracts block data from an extended data square. +func DataFromSquare(eds *rsmt2d.ExtendedDataSquare) (Data, error) { + originalWidth := eds.Width() / 2 + + // sort block data shares by namespace + var ( + sortedTxShares [][]byte + sortedISRShares [][]byte + sortedEvdShares [][]byte + sortedMsgShares [][]byte + ) + + // iterate over each row index + for x := uint(0); x < originalWidth; x++ { + // iterate over each share in the original data square + row := eds.Row(x) + + for _, share := range row[:originalWidth] { + // sort the data of that share types via namespace + nid := share[:consts.NamespaceSize] + switch { + case bytes.Equal(consts.TxNamespaceID, nid): + sortedTxShares = append(sortedTxShares, share) + + case bytes.Equal(consts.IntermediateStateRootsNamespaceID, nid): + sortedISRShares = append(sortedISRShares, share) + + case bytes.Equal(consts.EvidenceNamespaceID, nid): + sortedEvdShares = append(sortedEvdShares, share) + + case bytes.Equal(consts.TailPaddingNamespaceID, nid): + continue + + // ignore unused but reserved namespaces + case bytes.Compare(nid, consts.MaxReservedNamespace) < 1: + continue + + // every other namespaceID should be a message + default: + sortedMsgShares = append(sortedMsgShares, share) + } + } + } + + // pass the raw share data to their respective parsers + txs, err := parseTxs(sortedTxShares) + if err != nil { + return Data{}, err + } + + isrs, err := parseISRs(sortedISRShares) + if err != nil { + return Data{}, err + } + + evd, err := parseEvd(sortedEvdShares) + if err != nil { + return Data{}, err + } + + msgs, err := parseMsgs(sortedMsgShares) + if err != nil { + return Data{}, err + } + + return Data{ + Txs: txs, + IntermediateStateRoots: isrs, + Evidence: evd, + Messages: msgs, + }, nil +} + +// parseTxs collects all of the transactions from the shares provided +func parseTxs(shares [][]byte) (Txs, error) { + // parse the sharse + rawTxs, err := processContiguousShares(shares) + if err != nil { + return nil, err + } + + // convert to the Tx type + txs := make(Txs, len(rawTxs)) + for i := 0; i < len(txs); i++ { + txs[i] = Tx(rawTxs[i]) + } + + return txs, nil +} + +// parseISRs collects all the intermediate state roots from the shares provided +func parseISRs(shares [][]byte) (IntermediateStateRoots, error) { + rawISRs, err := processContiguousShares(shares) + if err != nil { + return IntermediateStateRoots{}, err + } + + ISRs := make([]tmbytes.HexBytes, len(rawISRs)) + for i := 0; i < len(ISRs); i++ { + ISRs[i] = rawISRs[i] + } + + return IntermediateStateRoots{RawRootsList: ISRs}, nil +} + +// parseEvd collects all evidence from the shares provided. +func parseEvd(shares [][]byte) (EvidenceData, error) { + // the raw data returned does not have length delimiters or namespaces and + // is ready to be unmarshaled + rawEvd, err := processContiguousShares(shares) + if err != nil { + return EvidenceData{}, err + } + + evdList := make(EvidenceList, len(rawEvd)) + + // parse into protobuf bytes + for i := 0; i < len(rawEvd); i++ { + // unmarshal the evidence + var protoEvd tmproto.Evidence + err := proto.Unmarshal(rawEvd[i], &protoEvd) + if err != nil { + return EvidenceData{}, err + } + evd, err := EvidenceFromProto(&protoEvd) + if err != nil { + return EvidenceData{}, err + } + + evdList[i] = evd + } + + return EvidenceData{Evidence: evdList}, nil +} + +// parseMsgs collects all messages from the shares provided +func parseMsgs(shares [][]byte) (Messages, error) { + msgList, err := parseMsgShares(shares) + if err != nil { + return Messages{}, err + } + + return Messages{ + MessagesList: msgList, + }, nil +} + +// processContiguousShares takes raw shares and extracts out transactions, +// intermediate state roots, or evidence. The returned [][]byte do have +// namespaces or length delimiters and are ready to be unmarshalled +func processContiguousShares(shares [][]byte) (txs [][]byte, err error) { + if len(shares) == 0 { + return nil, nil + } + + ss := newShareStack(shares) + return ss.resolve() +} + +// shareStack hold variables for peel +type shareStack struct { + shares [][]byte + txLen uint64 + txs [][]byte + cursor int +} + +func newShareStack(shares [][]byte) *shareStack { + return &shareStack{shares: shares} +} + +func (ss *shareStack) resolve() ([][]byte, error) { + if len(ss.shares) == 0 { + return nil, nil + } + err := ss.peel(ss.shares[0][consts.NamespaceSize+consts.ShareReservedBytes:], true) + return ss.txs, err +} + +// peel recursively parses each chunk of data (either a transaction, +// intermediate state root, or evidence) and adds it to the underlying slice of data. +func (ss *shareStack) peel(share []byte, delimited bool) (err error) { + if delimited { + var txLen uint64 + share, txLen, err = parseDelimiter(share) + if err != nil { + return err + } + if txLen == 0 { + return nil + } + ss.txLen = txLen + } + // safeLen describes the point in the share where it can be safely split. If + // split beyond this point, it is possible to break apart a length + // delimiter, which will result in incorrect share merging + safeLen := len(share) - binary.MaxVarintLen64 + if safeLen < 0 { + safeLen = 0 + } + if ss.txLen <= uint64(safeLen) { + ss.txs = append(ss.txs, share[:ss.txLen]) + share = share[ss.txLen:] + return ss.peel(share, true) + } + // add the next share to the current share to continue merging if possible + if len(ss.shares) > ss.cursor+1 { + ss.cursor++ + share := append(share, ss.shares[ss.cursor][consts.NamespaceSize+consts.ShareReservedBytes:]...) + return ss.peel(share, false) + } + // collect any remaining data + if ss.txLen <= uint64(len(share)) { + ss.txs = append(ss.txs, share[:ss.txLen]) + share = share[ss.txLen:] + return ss.peel(share, true) + } + return errors.New("failure to parse block data: transaction length exceeded data length") +} + +// parseMsgShares iterates through raw shares and separates the contiguous chunks +// of data. It is only used for Messages, i.e. shares with a non-reserved namespace. +func parseMsgShares(shares [][]byte) ([]Message, error) { + if len(shares) == 0 { + return nil, nil + } + + // set the first nid and current share + nid := shares[0][:consts.NamespaceSize] + currentShare := shares[0][consts.NamespaceSize:] + // find and remove the msg len delimiter + currentShare, msgLen, err := parseDelimiter(currentShare) + if err != nil { + return nil, err + } + + var msgs []Message + for cursor := uint64(0); cursor < uint64(len(shares)); { + var msg Message + currentShare, nid, cursor, msgLen, msg, err = nextMsg( + shares, + currentShare, + nid, + cursor, + msgLen, + ) + if err != nil { + return nil, err + } + if msg.Data != nil { + msgs = append(msgs, msg) + } + } + + return msgs, nil +} + +func nextMsg( + shares [][]byte, + current, + nid []byte, + cursor, + msgLen uint64, +) ([]byte, []byte, uint64, uint64, Message, error) { + switch { + // the message uses all of the current share data and at least some of the + // next share + case msgLen > uint64(len(current)): + // add the next share to the current one and try again + cursor++ + current = append(current, shares[cursor][consts.NamespaceSize:]...) + return nextMsg(shares, current, nid, cursor, msgLen) + + // the msg we're looking for is contained in the current share + case msgLen <= uint64(len(current)): + msg := Message{nid, current[:msgLen]} + cursor++ + + // call it a day if the work is done + if cursor >= uint64(len(shares)) { + return nil, nil, cursor, 0, msg, nil + } + + nextNid := shares[cursor][:consts.NamespaceSize] + next, msgLen, err := parseDelimiter(shares[cursor][consts.NamespaceSize:]) + return next, nextNid, cursor, msgLen, msg, err + } + // this code is unreachable but the compiler doesn't know that + return nil, nil, 0, 0, Message{}, nil +} + +// parseDelimiter finds and returns the length delimiter of the message provided +// while also removing the delimiter bytes from the input +func parseDelimiter(input []byte) ([]byte, uint64, error) { + if len(input) == 0 { + return input, 0, nil + } + + l := binary.MaxVarintLen64 + if len(input) < binary.MaxVarintLen64 { + l = len(input) + } + + delimiter := zeroPadIfNecessary(input[:l], binary.MaxVarintLen64) + + // read the length of the message + r := bytes.NewBuffer(delimiter) + msgLen, err := binary.ReadUvarint(r) + if err != nil { + return nil, 0, err + } + + // calculate the number of bytes used by the delimiter + lenBuf := make([]byte, binary.MaxVarintLen64) + n := binary.PutUvarint(lenBuf, msgLen) + + // return the input without the length delimiter + return input[n:], msgLen, nil +} diff --git a/types/share_splitting.go b/types/share_splitting.go new file mode 100644 index 0000000000..08c4aba511 --- /dev/null +++ b/types/share_splitting.go @@ -0,0 +1,148 @@ +package types + +import ( + "bytes" + + "github.com/celestiaorg/nmt/namespace" + "github.com/tendermint/tendermint/pkg/consts" +) + +// appendToShares appends raw data as shares. +// Used for messages. +func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare { + if len(rawData) <= consts.MsgShareSize { + rawShare := append(append( + make([]byte, 0, len(nid)+len(rawData)), + nid...), + rawData..., + ) + paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) + share := NamespacedShare{paddedShare, nid} + shares = append(shares, share) + } else { // len(rawData) > MsgShareSize + shares = append(shares, splitMessage(rawData, nid)...) + } + return shares +} + +// splitMessage breaks the data in a message into the minimum number of +// namespaced shares +func splitMessage(rawData []byte, nid namespace.ID) []NamespacedShare { + shares := make([]NamespacedShare, 0) + firstRawShare := append(append( + make([]byte, 0, consts.ShareSize), + nid...), + rawData[:consts.MsgShareSize]..., + ) + shares = append(shares, NamespacedShare{firstRawShare, nid}) + rawData = rawData[consts.MsgShareSize:] + for len(rawData) > 0 { + shareSizeOrLen := min(consts.MsgShareSize, len(rawData)) + rawShare := append(append( + make([]byte, 0, consts.ShareSize), + nid...), + rawData[:shareSizeOrLen]..., + ) + paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) + share := NamespacedShare{paddedShare, nid} + shares = append(shares, share) + rawData = rawData[shareSizeOrLen:] + } + return shares +} + +// splitContiguous splits multiple raw data contiguously as shares. +// Used for transactions, intermediate state roots, and evidence. +func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare { + shares := make([]NamespacedShare, 0) + // Index into the outer slice of rawDatas + outerIndex := 0 + // Index into the inner slice of rawDatas + innerIndex := 0 + for outerIndex < len(rawDatas) { + var rawData []byte + startIndex := 0 + rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, consts.TxShareSize) + rawShare := append(append(append( + make([]byte, 0, len(nid)+1+len(rawData)), + nid...), + byte(startIndex)), + rawData...) + paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) + share := NamespacedShare{paddedShare, nid} + shares = append(shares, share) + } + return shares +} + +// getNextChunk gets the next chunk for contiguous shares +// Precondition: none of the slices in rawDatas is zero-length +// This precondition should always hold at this point since zero-length txs are simply invalid. +func getNextChunk(rawDatas [][]byte, outerIndex int, innerIndex int, width int) ([]byte, int, int, int) { + rawData := make([]byte, 0, width) + startIndex := 0 + firstBytesToFetch := 0 + + curIndex := 0 + for curIndex < width && outerIndex < len(rawDatas) { + bytesToFetch := min(len(rawDatas[outerIndex])-innerIndex, width-curIndex) + if bytesToFetch == 0 { + panic("zero-length contiguous share data is invalid") + } + if curIndex == 0 { + firstBytesToFetch = bytesToFetch + } + // If we've already placed some data in this chunk, that means + // a new data segment begins + if curIndex != 0 { + // Offset by the fixed reserved bytes at the beginning of the share + startIndex = firstBytesToFetch + consts.NamespaceSize + consts.ShareReservedBytes + } + rawData = append(rawData, rawDatas[outerIndex][innerIndex:innerIndex+bytesToFetch]...) + innerIndex += bytesToFetch + if innerIndex >= len(rawDatas[outerIndex]) { + innerIndex = 0 + outerIndex++ + } + curIndex += bytesToFetch + } + + return rawData, outerIndex, innerIndex, startIndex +} + +// tail is filler for all tail padded shares +// it is allocated once and used everywhere +var tailPaddingShare = append( + append(make([]byte, 0, consts.ShareSize), consts.TailPaddingNamespaceID...), + bytes.Repeat([]byte{0}, consts.ShareSize-consts.NamespaceSize)..., +) + +func TailPaddingShares(n int) NamespacedShares { + shares := make([]NamespacedShare, n) + for i := 0; i < n; i++ { + shares[i] = NamespacedShare{ + Share: tailPaddingShare, + ID: consts.TailPaddingNamespaceID, + } + } + return shares +} + +func min(a, b int) int { + if a <= b { + return a + } + return b +} + +func zeroPadIfNecessary(share []byte, width int) []byte { + oldLen := len(share) + if oldLen < width { + missingBytes := width - oldLen + padByte := []byte{0} + padding := bytes.Repeat(padByte, missingBytes) + share = append(share, padding...) + return share + } + return share +} diff --git a/types/shares.go b/types/shares.go index a96ff36969..30a191183d 100644 --- a/types/shares.go +++ b/types/shares.go @@ -1,19 +1,15 @@ package types import ( - "bytes" "encoding/binary" "github.com/celestiaorg/nmt/namespace" - "github.com/tendermint/tendermint/pkg/consts" ) // Share contains the raw share data without the corresponding namespace. type Share []byte // NamespacedShare extends a Share with the corresponding namespace. -// It implements the namespace.Data interface and hence can be used -// for pushing the shares to the namespaced Merkle tree. type NamespacedShare struct { Share ID namespace.ID @@ -45,7 +41,6 @@ func (tx Tx) MarshalDelimited() ([]byte, error) { lenBuf := make([]byte, binary.MaxVarintLen64) length := uint64(len(tx)) n := binary.PutUvarint(lenBuf, length) - return append(lenBuf[:n], tx...), nil } @@ -55,122 +50,5 @@ func (m Message) MarshalDelimited() ([]byte, error) { lenBuf := make([]byte, binary.MaxVarintLen64) length := uint64(len(m.Data)) n := binary.PutUvarint(lenBuf, length) - return append(lenBuf[:n], m.Data...), nil } - -// appendToShares appends raw data as shares. -// Used for messages. -func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare { - if len(rawData) <= consts.MsgShareSize { - rawShare := []byte(append(nid, rawData...)) - paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) - share := NamespacedShare{paddedShare, nid} - shares = append(shares, share) - } else { // len(rawData) > MsgShareSize - shares = append(shares, split(rawData, nid)...) - } - return shares -} - -// splitContiguous splits multiple raw data contiguously as shares. -// Used for transactions, intermediate state roots, and evidence. -func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare { - shares := make([]NamespacedShare, 0) - // Index into the outer slice of rawDatas - outerIndex := 0 - // Index into the inner slice of rawDatas - innerIndex := 0 - for outerIndex < len(rawDatas) { - var rawData []byte - startIndex := 0 - rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, consts.TxShareSize) - rawShare := []byte(append(append(nid, byte(startIndex)), rawData...)) - paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) - share := NamespacedShare{paddedShare, nid} - shares = append(shares, share) - } - return shares -} - -// TODO(ismail): implement corresponding merge method for clients requesting -// shares for a particular namespace -func split(rawData []byte, nid namespace.ID) []NamespacedShare { - shares := make([]NamespacedShare, 0) - firstRawShare := []byte(append(nid, rawData[:consts.MsgShareSize]...)) - shares = append(shares, NamespacedShare{firstRawShare, nid}) - rawData = rawData[consts.MsgShareSize:] - for len(rawData) > 0 { - shareSizeOrLen := min(consts.MsgShareSize, len(rawData)) - rawShare := make([]byte, consts.NamespaceSize) - copy(rawShare, nid) - rawShare = append(rawShare, rawData[:shareSizeOrLen]...) - paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize) - share := NamespacedShare{paddedShare, nid} - shares = append(shares, share) - rawData = rawData[shareSizeOrLen:] - } - return shares -} - -// getNextChunk gets the next chunk for contiguous shares -// Precondition: none of the slices in rawDatas is zero-length -// This precondition should always hold at this point since zero-length txs are simply invalid. -func getNextChunk(rawDatas [][]byte, outerIndex int, innerIndex int, width int) ([]byte, int, int, int) { - rawData := make([]byte, 0, width) - startIndex := 0 - firstBytesToFetch := 0 - - curIndex := 0 - for curIndex < width && outerIndex < len(rawDatas) { - bytesToFetch := min(len(rawDatas[outerIndex])-innerIndex, width-curIndex) - if bytesToFetch == 0 { - panic("zero-length contiguous share data is invalid") - } - if curIndex == 0 { - firstBytesToFetch = bytesToFetch - } - // If we've already placed some data in this chunk, that means - // a new data segment begins - if curIndex != 0 { - // Offset by the fixed reserved bytes at the beginning of the share - startIndex = firstBytesToFetch + consts.NamespaceSize + consts.ShareReservedBytes - } - rawData = append(rawData, rawDatas[outerIndex][innerIndex:innerIndex+bytesToFetch]...) - innerIndex += bytesToFetch - if innerIndex >= len(rawDatas[outerIndex]) { - innerIndex = 0 - outerIndex++ - } - curIndex += bytesToFetch - } - - return rawData, outerIndex, innerIndex, startIndex -} - -func GenerateTailPaddingShares(n int, shareWidth int) NamespacedShares { - shares := make([]NamespacedShare, n) - for i := 0; i < n; i++ { - shares[i] = NamespacedShare{bytes.Repeat([]byte{0}, shareWidth), consts.TailPaddingNamespaceID} - } - return shares -} - -func min(a, b int) int { - if a <= b { - return a - } - return b -} - -func zeroPadIfNecessary(share []byte, width int) []byte { - oldLen := len(share) - if oldLen < width { - missingBytes := width - oldLen - padByte := []byte{0} - padding := bytes.Repeat(padByte, missingBytes) - share = append(share, padding...) - return share - } - return share -} diff --git a/types/shares_test.go b/types/shares_test.go index 5fed4814ee..ddf7c29b07 100644 --- a/types/shares_test.go +++ b/types/shares_test.go @@ -2,19 +2,25 @@ package types import ( "bytes" - "crypto/rand" + "context" + "fmt" + "math" + "math/rand" "reflect" "sort" "testing" + "time" "github.com/celestiaorg/nmt/namespace" + "github.com/celestiaorg/rsmt2d" "github.com/stretchr/testify/assert" "github.com/tendermint/tendermint/internal/libs/protoio" + tmbytes "github.com/tendermint/tendermint/libs/bytes" "github.com/tendermint/tendermint/pkg/consts" ) -type splitter interface { - splitIntoShares() NamespacedShares +type Splitter interface { + SplitIntoShares() NamespacedShares } func TestMakeShares(t *testing.T) { @@ -29,7 +35,11 @@ func TestMakeShares(t *testing.T) { VoteA: vote1, VoteB: vote2, } - testEvidenceBytes, err := protoio.MarshalDelimited(testEvidence.ToProto()) + protoTestEvidence, err := EvidenceToProto(testEvidence) + if err != nil { + t.Error(err) + } + testEvidenceBytes, err := protoio.MarshalDelimited(protoTestEvidence) largeTx := Tx(bytes.Repeat([]byte("large Tx"), 50)) largeTxLenDelimited, _ := largeTx.MarshalDelimited() smolTx := Tx("small Tx") @@ -44,31 +54,36 @@ func TestMakeShares(t *testing.T) { } type args struct { - data splitter + data Splitter } tests := []struct { name string args args want NamespacedShares }{ - {"evidence", - args{ + { + name: "evidence", + args: args{ data: &EvidenceData{ Evidence: []Evidence{testEvidence}, }, - }, NamespacedShares{NamespacedShare{ - Share: append( - append(reservedEvidenceNamespaceID, byte(0)), - testEvidenceBytes[:consts.TxShareSize]..., - ), - ID: reservedEvidenceNamespaceID, - }, NamespacedShare{ - Share: append( - append(reservedEvidenceNamespaceID, byte(0)), - zeroPadIfNecessary(testEvidenceBytes[consts.TxShareSize:], consts.TxShareSize)..., - ), - ID: reservedEvidenceNamespaceID, - }}, + }, + want: NamespacedShares{ + NamespacedShare{ + Share: append( + append(reservedEvidenceNamespaceID, byte(0)), + testEvidenceBytes[:consts.TxShareSize]..., + ), + ID: reservedEvidenceNamespaceID, + }, + NamespacedShare{ + Share: append( + append(reservedEvidenceNamespaceID, byte(0)), + zeroPadIfNecessary(testEvidenceBytes[consts.TxShareSize:], consts.TxShareSize)..., + ), + ID: reservedEvidenceNamespaceID, + }, + }, }, {"small LL Tx", args{ @@ -119,7 +134,10 @@ func TestMakeShares(t *testing.T) { }, NamespacedShare{ Share: append( - append(reservedTxNamespaceID, byte(len(largeTxLenDelimited)-consts.TxShareSize+consts.NamespaceSize+consts.ShareReservedBytes)), + append( + reservedTxNamespaceID, + byte(len(largeTxLenDelimited)-consts.TxShareSize+consts.NamespaceSize+consts.ShareReservedBytes), + ), zeroPadIfNecessary( append(largeTxLenDelimited[consts.TxShareSize:], smolTxLenDelimited...), consts.TxShareSize, @@ -148,7 +166,7 @@ func TestMakeShares(t *testing.T) { tt := tt // stupid scopelint :-/ i := i t.Run(tt.name, func(t *testing.T) { - got := tt.args.data.splitIntoShares() + got := tt.args.data.SplitIntoShares() if !reflect.DeepEqual(got, tt.want) { t.Errorf("%v: makeShares() = \n%+v\nwant\n%+v\n", i, got, tt.want) } @@ -197,27 +215,342 @@ func Test_appendToSharesOverwrite(t *testing.T) { assert.Equal(t, extraCopy, []byte(newShare.Share[:consts.MsgShareSize])) } -func generateRandomNamespacedShares(count, leafSize int) []NamespacedShare { - shares := generateRandNamespacedRawData(count, consts.NamespaceSize, leafSize) - nsShares := make(NamespacedShares, count) +func TestDataFromSquare(t *testing.T) { + type test struct { + name string + txCount int + isrCount int + evdCount int + msgCount int + maxSize int // max size of each tx or msg + } + + tests := []test{ + {"one of each random small size", 1, 1, 1, 1, 40}, + {"one of each random large size", 1, 1, 1, 1, 400}, + {"many of each random large size", 10, 10, 10, 10, 40}, + {"many of each random large size", 10, 10, 10, 10, 400}, + {"only transactions", 10, 0, 0, 0, 400}, + {"only intermediate state roots", 0, 10, 0, 0, 400}, + {"only evidence", 0, 0, 10, 0, 400}, + {"only messages", 0, 0, 0, 10, 400}, + } + + for _, tc := range tests { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + // generate random data + data := generateRandomBlockData( + tc.txCount, + tc.isrCount, + tc.evdCount, + tc.msgCount, + tc.maxSize, + ) + + shares, _ := data.ComputeShares() + rawShares := shares.RawShares() + + eds, err := rsmt2d.ComputeExtendedDataSquare(rawShares, rsmt2d.NewRSGF8Codec(), rsmt2d.NewDefaultTree) + if err != nil { + t.Error(err) + } + + res, err := DataFromSquare(eds) + if err != nil { + t.Fatal(err) + } + + // we have to compare the evidence by string because the the + // timestamps differ not by actual time represented, but by + // internals see https://github.com/stretchr/testify/issues/666 + for i := 0; i < len(data.Evidence.Evidence); i++ { + inputEvidence := data.Evidence.Evidence[i].(*DuplicateVoteEvidence) + resultEvidence := res.Evidence.Evidence[i].(*DuplicateVoteEvidence) + assert.Equal(t, inputEvidence.String(), resultEvidence.String()) + } + + // compare the original to the result w/o the evidence + data.Evidence = EvidenceData{} + res.Evidence = EvidenceData{} + + assert.Equal(t, data, res) + }) + } +} + +func TestFuzz_DataFromSquare(t *testing.T) { + t.Skip() + // run random shares through processContiguousShares for a minute + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + for { + select { + case <-ctx.Done(): + return + default: + TestDataFromSquare(t) + } + } +} + +func Test_processContiguousShares(t *testing.T) { + // exactTxShareSize is the length of tx that will fit exactly into a single + // share, accounting for namespace id and the length delimiter prepended to + // each tx + const exactTxShareSize = consts.TxShareSize - 1 + + type test struct { + name string + txSize int + txCount int + } + + // each test is ran twice, once using txSize as an exact size, and again + // using it as a cap for randomly sized txs + tests := []test{ + {"single small tx", 10, 1}, + {"many small txs", 10, 10}, + {"single big tx", 1000, 1}, + {"many big txs", 1000, 10}, + {"single exact size tx", exactTxShareSize, 1}, + {"many exact size txs", exactTxShareSize, 10}, + } + + for _, tc := range tests { + tc := tc + + // run the tests with identically sized txs + t.Run(fmt.Sprintf("%s idendically sized ", tc.name), func(t *testing.T) { + txs := generateRandomContiguousShares(tc.txCount, tc.txSize) + + shares := txs.SplitIntoShares() + + parsedTxs, err := processContiguousShares(shares.RawShares()) + if err != nil { + t.Error(err) + } + + // check that the data parsed is identical + for i := 0; i < len(txs); i++ { + assert.Equal(t, []byte(txs[i]), parsedTxs[i]) + } + }) + + // run the same tests using randomly sized txs with caps of tc.txSize + t.Run(fmt.Sprintf("%s randomly sized", tc.name), func(t *testing.T) { + txs := generateRandomlySizedContiguousShares(tc.txCount, tc.txSize) + + shares := txs.SplitIntoShares() + + parsedTxs, err := processContiguousShares(shares.RawShares()) + if err != nil { + t.Error(err) + } + + // check that the data parsed is identical to the original + for i := 0; i < len(txs); i++ { + assert.Equal(t, []byte(txs[i]), parsedTxs[i]) + } + }) + } +} + +func TestFuzz_processContiguousShares(t *testing.T) { + t.Skip() + // run random shares through processContiguousShares for a minute + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + for { + select { + case <-ctx.Done(): + return + default: + Test_processContiguousShares(t) + } + } +} + +func Test_parseMsgShares(t *testing.T) { + // exactMsgShareSize is the length of message that will fit exactly into a single + // share, accounting for namespace id and the length delimiter prepended to + // each message + const exactMsgShareSize = consts.MsgShareSize - 2 + + type test struct { + name string + msgSize int + msgCount int + } + + // each test is ran twice, once using msgSize as an exact size, and again + // using it as a cap for randomly sized leaves + tests := []test{ + {"single small msg", 1, 1}, + {"many small msgs", 4, 10}, + {"single big msg", 1000, 1}, + {"many big msgs", 1000, 10}, + {"single exact size msg", exactMsgShareSize, 1}, + {"many exact size msgs", exactMsgShareSize, 10}, + } + + for _, tc := range tests { + tc := tc + + // run the tests with identically sized messagses + t.Run(fmt.Sprintf("%s idendically sized ", tc.name), func(t *testing.T) { + rawmsgs := make([]Message, tc.msgCount) + for i := 0; i < tc.msgCount; i++ { + rawmsgs[i] = generateRandomMessage(tc.msgSize) + } + msgs := Messages{MessagesList: rawmsgs} + + shares := msgs.SplitIntoShares() + + parsedMsgs, err := parseMsgShares(shares.RawShares()) + if err != nil { + t.Error(err) + } + + // check that the namesapces and data are the same + for i := 0; i < len(msgs.MessagesList); i++ { + assert.Equal(t, msgs.MessagesList[i].NamespaceID, parsedMsgs[i].NamespaceID) + assert.Equal(t, msgs.MessagesList[i].Data, parsedMsgs[i].Data) + } + }) + + // run the same tests using randomly sized messages with caps of tc.msgSize + t.Run(fmt.Sprintf("%s randomly sized", tc.name), func(t *testing.T) { + msgs := generateRandomlySizedMessages(tc.msgCount, tc.msgSize) + shares := msgs.SplitIntoShares() + + parsedMsgs, err := parseMsgShares(shares.RawShares()) + if err != nil { + t.Error(err) + } + + // check that the namesapces and data are the same + for i := 0; i < len(msgs.MessagesList); i++ { + assert.Equal(t, msgs.MessagesList[i].NamespaceID, parsedMsgs[i].NamespaceID) + assert.Equal(t, msgs.MessagesList[i].Data, parsedMsgs[i].Data) + } + }) + } +} + +func Test_parseDelimiter(t *testing.T) { + for i := uint64(0); i < 100; i++ { + tx := generateRandomContiguousShares(1, int(i))[0] + input, err := tx.MarshalDelimited() + if err != nil { + panic(err) + } + res, txLen, err := parseDelimiter(input) + if err != nil { + panic(err) + } + assert.Equal(t, i, txLen) + assert.Equal(t, []byte(tx), res) + } +} + +// generateRandomBlockData returns randomly generated block data for testing purposes +func generateRandomBlockData(txCount, isrCount, evdCount, msgCount, maxSize int) Data { + var out Data + out.Txs = generateRandomlySizedContiguousShares(txCount, maxSize) + out.IntermediateStateRoots = generateRandomISR(isrCount) + out.Evidence = generateIdenticalEvidence(evdCount) + out.Messages = generateRandomlySizedMessages(msgCount, maxSize) + return out +} + +func generateRandomlySizedContiguousShares(count, max int) Txs { + txs := make(Txs, count) + for i := 0; i < count; i++ { + size := rand.Intn(max) + if size == 0 { + size = 1 + } + txs[i] = generateRandomContiguousShares(1, size)[0] + } + return txs +} + +func generateRandomContiguousShares(count, size int) Txs { + txs := make(Txs, count) + for i := 0; i < count; i++ { + tx := make([]byte, size) + _, err := rand.Read(tx) + if err != nil { + panic(err) + } + txs[i] = tx + } + return txs +} + +func generateRandomISR(count int) IntermediateStateRoots { + roots := make([]tmbytes.HexBytes, count) + for i := 0; i < count; i++ { + roots[i] = tmbytes.HexBytes(generateRandomContiguousShares(1, 32)[0]) + } + return IntermediateStateRoots{RawRootsList: roots} +} + +func generateIdenticalEvidence(count int) EvidenceData { + evidence := make([]Evidence, count) + for i := 0; i < count; i++ { + ev := NewMockDuplicateVoteEvidence(math.MaxInt64, time.Now(), "chainID") + evidence[i] = ev + } + return EvidenceData{Evidence: evidence} +} + +func generateRandomlySizedMessages(count, maxMsgSize int) Messages { + msgs := make([]Message, count) + for i := 0; i < count; i++ { + msgs[i] = generateRandomMessage(rand.Intn(maxMsgSize)) + } + + // this is just to let us use assert.Equal + if count == 0 { + msgs = nil + } + + return Messages{MessagesList: msgs} +} + +func generateRandomMessage(size int) Message { + share := generateRandomNamespacedShares(1, size)[0] + msg := Message{ + NamespaceID: share.NamespaceID(), + Data: share.Data(), + } + return msg +} + +func generateRandomNamespacedShares(count, msgSize int) NamespacedShares { + shares := generateRandNamespacedRawData(uint32(count), consts.NamespaceSize, uint32(msgSize)) + msgs := make([]Message, count) for i, s := range shares { - nsShares[i] = NamespacedShare{ - Share: s[consts.NamespaceSize:], - ID: s[:consts.NamespaceSize], + msgs[i] = Message{ + Data: s[consts.NamespaceSize:], + NamespaceID: s[:consts.NamespaceSize], } } - return nsShares + return Messages{MessagesList: msgs}.SplitIntoShares() } -func generateRandNamespacedRawData(total, nidSize, leafSize int) [][]byte { +func generateRandNamespacedRawData(total, nidSize, leafSize uint32) [][]byte { data := make([][]byte, total) - for i := 0; i < total; i++ { + for i := uint32(0); i < total; i++ { nid := make([]byte, nidSize) rand.Read(nid) data[i] = nid } sortByteArrays(data) - for i := 0; i < total; i++ { + for i := uint32(0); i < total; i++ { d := make([]byte, leafSize) rand.Read(d) data[i] = append(data[i], d...) diff --git a/types/tx.go b/types/tx.go index 5abae88832..9eacd55b52 100644 --- a/types/tx.go +++ b/types/tx.go @@ -80,15 +80,16 @@ func (txs Txs) Proof(i int) TxProof { } } -func (txs Txs) splitIntoShares() NamespacedShares { - shares := make([]NamespacedShare, 0) - for _, tx := range txs { +func (txs Txs) SplitIntoShares() NamespacedShares { + rawDatas := make([][]byte, len(txs)) + for i, tx := range txs { rawData, err := tx.MarshalDelimited() if err != nil { panic(fmt.Sprintf("included Tx in mem-pool that can not be encoded %v", tx)) } - shares = appendToShares(shares, consts.TxNamespaceID, rawData) + rawDatas[i] = rawData } + shares := splitContiguous(consts.TxNamespaceID, rawDatas) return shares }