diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..7092c7ab8f --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +version: 2 +updates: + - package-ecosystem: gomod + directory: "/" + schedule: + interval: daily + open-pull-requests-limit: 10 + labels: + - T:dependencies + allow: + - dependency-name: "*/celestiaorg/*" diff --git a/README.md b/README.md index ac7241c3b9..c5403d4acc 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,4 @@ Additional tooling can be found in [/docs/tools](/docs/tools). ### Research -- [The latest gossip on BFT consensus](https://arxiv.org/abs/1807.04938) -- [Master's Thesis on Tendermint](https://atrium.lib.uoguelph.ca/xmlui/handle/10214/9769) -- [Original Whitepaper: "Tendermint: Consensus Without Mining"](https://tendermint.com/static/docs/tendermint.pdf) -- [Blog](https://blog.cosmos.network/tendermint/home) +We are hiring Go engineers! Join us in building the future of blockchain scaling and interoperability. [Apply here](https://jobs.lever.co/celestia). diff --git a/blockchain/msgs_test.go b/blockchain/msgs_test.go index 89310bcc27..540b59d976 100644 --- a/blockchain/msgs_test.go +++ b/blockchain/msgs_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/require" bcproto "github.com/tendermint/tendermint/proto/tendermint/blockchain" + "github.com/tendermint/tendermint/state/test/factory" "github.com/tendermint/tendermint/types" ) @@ -80,7 +81,7 @@ func TestBcStatusResponseMessageValidateBasic(t *testing.T) { // nolint:lll // ignore line length in tests func TestBlockchainMessageVectors(t *testing.T) { - block := types.MakeBlock(int64(3), []types.Tx{types.Tx("Hello World")}, nil, nil, nil) + block := types.MakeBlock(int64(3), factory.MakeData([]types.Tx{types.Tx("Hello World")}, nil, nil), nil) block.Version.Block = 11 // overwrite updated protocol version bpb, err := block.ToProto() diff --git a/blockchain/v0/reactor_test.go b/blockchain/v0/reactor_test.go index 8f08948ae9..427d81045a 100644 --- a/blockchain/v0/reactor_test.go +++ b/blockchain/v0/reactor_test.go @@ -20,6 +20,7 @@ import ( "github.com/tendermint/tendermint/proxy" sm "github.com/tendermint/tendermint/state" "github.com/tendermint/tendermint/store" + "github.com/tendermint/tendermint/test/factory" "github.com/tendermint/tendermint/types" tmtime "github.com/tendermint/tendermint/types/time" ) @@ -289,7 +290,12 @@ func makeTxs(height int64) (txs []types.Tx) { } func makeBlock(height int64, state sm.State, lastCommit *types.Commit) *types.Block { - block, _ := state.MakeBlock(height, makeTxs(height), nil, nil, lastCommit, state.Validators.GetProposer().Address) + block, _ := state.MakeBlock( + height, + factory.MakeData(makeTxs(height), nil, nil), + lastCommit, + state.Validators.GetProposer().Address, + ) return block } diff --git a/blockchain/v1/peer_test.go b/blockchain/v1/peer_test.go index 167fc2a56c..86e683714d 100644 --- a/blockchain/v1/peer_test.go +++ b/blockchain/v1/peer_test.go @@ -11,6 +11,7 @@ import ( "github.com/tendermint/tendermint/libs/log" tmrand "github.com/tendermint/tendermint/libs/rand" "github.com/tendermint/tendermint/p2p" + "github.com/tendermint/tendermint/state/test/factory" "github.com/tendermint/tendermint/types" ) @@ -276,5 +277,5 @@ func checkByStoppingPeerTimer(t *testing.T, peer *BpPeer, running bool) { } func makeSmallBlock(height int) *types.Block { - return types.MakeBlock(int64(height), []types.Tx{types.Tx("foo")}, nil, nil, nil) + return types.MakeBlock(int64(height), factory.MakeDataFromTxs([]types.Tx{types.Tx("foo")}), nil) } diff --git a/blockchain/v1/pool_test.go b/blockchain/v1/pool_test.go index d628ae3760..83339cfe79 100644 --- a/blockchain/v1/pool_test.go +++ b/blockchain/v1/pool_test.go @@ -8,6 +8,7 @@ import ( "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/p2p" + "github.com/tendermint/tendermint/state/test/factory" "github.com/tendermint/tendermint/types" ) @@ -81,7 +82,7 @@ func makeBlockPool(bcr *testBcR, height int64, peers []BpPeer, blocks map[int64] bPool.peers[p.id].RequestSent(h) if p.create { // simulate that a block at height h has been received - _ = bPool.peers[p.id].AddBlock(types.MakeBlock(h, txs, nil, nil, nil), 100) + _ = bPool.peers[p.id].AddBlock(types.MakeBlock(h, factory.MakeDataFromTxs(txs), nil), 100) } } return bPool @@ -392,7 +393,7 @@ func TestBlockPoolAddBlock(t *testing.T) { pool: makeBlockPool(testBcR, 10, []BpPeer{{ID: "P1", Height: 100}}, map[int64]tPBlocks{}), args: args{ peerID: "P2", - block: types.MakeBlock(int64(10), txs, nil, nil, nil), + block: types.MakeBlock(int64(10), factory.MakeDataFromTxs(txs), nil), blockSize: 100, }, poolWanted: makeBlockPool(testBcR, 10, []BpPeer{{ID: "P1", Height: 100}}, map[int64]tPBlocks{}), @@ -404,7 +405,7 @@ func TestBlockPoolAddBlock(t *testing.T) { map[int64]tPBlocks{10: {"P1", false}}), args: args{ peerID: "P1", - block: types.MakeBlock(int64(11), txs, nil, nil, nil), + block: types.MakeBlock(int64(11), factory.MakeDataFromTxs(txs), nil), blockSize: 100, }, poolWanted: makeBlockPool(testBcR, 10, @@ -418,7 +419,7 @@ func TestBlockPoolAddBlock(t *testing.T) { map[int64]tPBlocks{10: {"P1", true}, 11: {"P1", false}}), args: args{ peerID: "P1", - block: types.MakeBlock(int64(10), txs, nil, nil, nil), + block: types.MakeBlock(int64(10), factory.MakeDataFromTxs(txs), nil), blockSize: 100, }, poolWanted: makeBlockPool(testBcR, 10, @@ -432,7 +433,7 @@ func TestBlockPoolAddBlock(t *testing.T) { map[int64]tPBlocks{10: {"P1", false}}), args: args{ peerID: "P2", - block: types.MakeBlock(int64(10), txs, nil, nil, nil), + block: types.MakeBlock(int64(10), factory.MakeDataFromTxs(txs), nil), blockSize: 100, }, poolWanted: makeBlockPool(testBcR, 10, @@ -446,7 +447,7 @@ func TestBlockPoolAddBlock(t *testing.T) { map[int64]tPBlocks{10: {"P1", false}}), args: args{ peerID: "P1", - block: types.MakeBlock(int64(10), txs, nil, nil, nil), + block: types.MakeBlock(int64(10), factory.MakeDataFromTxs(txs), nil), blockSize: 100, }, poolWanted: makeBlockPool(testBcR, 10, diff --git a/blockchain/v1/reactor_fsm_test.go b/blockchain/v1/reactor_fsm_test.go index 24290a4f6e..4813bb06b6 100644 --- a/blockchain/v1/reactor_fsm_test.go +++ b/blockchain/v1/reactor_fsm_test.go @@ -11,6 +11,7 @@ import ( tmmath "github.com/tendermint/tendermint/libs/math" tmrand "github.com/tendermint/tendermint/libs/rand" "github.com/tendermint/tendermint/p2p" + "github.com/tendermint/tendermint/state/test/factory" "github.com/tendermint/tendermint/types" ) @@ -142,7 +143,7 @@ func sBlockRespEv(current, expected string, peerID p2p.ID, height int64, prevBlo data: bReactorEventData{ peerID: peerID, height: height, - block: types.MakeBlock(height, txs, nil, nil, nil), + block: types.MakeBlock(height, factory.MakeDataFromTxs(txs), nil), length: 100}, wantState: expected, wantNewBlocks: append(prevBlocks, height), @@ -159,7 +160,7 @@ func sBlockRespEvErrored(current, expected string, data: bReactorEventData{ peerID: peerID, height: height, - block: types.MakeBlock(height, txs, nil, nil, nil), + block: types.MakeBlock(height, factory.MakeDataFromTxs(txs), nil), length: 100}, wantState: expected, wantErr: wantErr, diff --git a/blockchain/v1/reactor_test.go b/blockchain/v1/reactor_test.go index 999c273180..a3ca61c43a 100644 --- a/blockchain/v1/reactor_test.go +++ b/blockchain/v1/reactor_test.go @@ -21,6 +21,7 @@ import ( tmproto "github.com/tendermint/tendermint/proto/tendermint/types" "github.com/tendermint/tendermint/proxy" sm "github.com/tendermint/tendermint/state" + "github.com/tendermint/tendermint/state/test/factory" "github.com/tendermint/tendermint/store" "github.com/tendermint/tendermint/types" tmtime "github.com/tendermint/tendermint/types/time" @@ -356,7 +357,12 @@ func makeTxs(height int64) (txs []types.Tx) { } func makeBlock(height int64, state sm.State, lastCommit *types.Commit) *types.Block { - block, _ := state.MakeBlock(height, makeTxs(height), nil, nil, lastCommit, state.Validators.GetProposer().Address) + block, _ := state.MakeBlock( + height, + factory.MakeDataFromTxs(makeTxs(height)), + lastCommit, + state.Validators.GetProposer().Address, + ) return block } diff --git a/blockchain/v2/reactor_test.go b/blockchain/v2/reactor_test.go index fe877de6a8..20235e39e2 100644 --- a/blockchain/v2/reactor_test.go +++ b/blockchain/v2/reactor_test.go @@ -25,6 +25,7 @@ import ( bcproto "github.com/tendermint/tendermint/proto/tendermint/blockchain" "github.com/tendermint/tendermint/proxy" sm "github.com/tendermint/tendermint/state" + "github.com/tendermint/tendermint/state/test/factory" "github.com/tendermint/tendermint/store" "github.com/tendermint/tendermint/types" tmtime "github.com/tendermint/tendermint/types/time" @@ -456,7 +457,12 @@ func makeTxs(height int64) (txs []types.Tx) { } func makeBlock(height int64, state sm.State, lastCommit *types.Commit) *types.Block { - block, _ := state.MakeBlock(height, makeTxs(height), nil, nil, lastCommit, state.Validators.GetProposer().Address) + block, _ := state.MakeBlock( + height, + factory.MakeDataFromTxs(makeTxs(height)), + lastCommit, + state.Validators.GetProposer().Address, + ) return block } diff --git a/consensus/replay_test.go b/consensus/replay_test.go index 827bbdaf2c..920b78cb01 100644 --- a/consensus/replay_test.go +++ b/consensus/replay_test.go @@ -31,6 +31,7 @@ import ( tmproto "github.com/tendermint/tendermint/proto/tendermint/types" "github.com/tendermint/tendermint/proxy" sm "github.com/tendermint/tendermint/state" + "github.com/tendermint/tendermint/state/test/factory" "github.com/tendermint/tendermint/types" ) @@ -990,7 +991,12 @@ func makeBlock(state sm.State, lastBlock *types.Block, lastBlockMeta *types.Bloc lastBlockMeta.BlockID, []types.CommitSig{vote.CommitSig()}) } - return state.MakeBlock(height, []types.Tx{}, nil, nil, lastCommit, state.Validators.GetProposer().Address) + return state.MakeBlock( + height, + factory.MakeDataFromTxs([]types.Tx{}), + lastCommit, + state.Validators.GetProposer().Address, + ) } type badApp struct { diff --git a/docs/celestia-architecture/README.md b/docs/celestia-architecture/README.md new file mode 100644 index 0000000000..f48698d1fa --- /dev/null +++ b/docs/celestia-architecture/README.md @@ -0,0 +1,63 @@ +--- +order: 1 +parent: + order: false +--- + +# Tendermint and Celestia + +celestia-core is not meant to be used as a general purpose framework. +Instead, its main purpose is to provide certain components (mainly consensus but also a p2p layer for Tx gossiping) for the Celestia main chain. +Hence, we do not provide any extensive documentation here. + +Instead of keeping a copy of the Tendermint documentation, we refer to the existing extensive and maintained documentation and specification: + +- +- +- + +Reading these will give you a lot of background and context on Tendermint which will also help you understand how celestia-core and [celestia-app](https://github.com/celestiaorg/celestia-app) interact with each other. + +## Celestia + +As mentioned above, celestia-core aims to be more focused on the Celestia use-case than vanilla Tendermint. +Moving forward we might provide a clear overview on the changes we incorporated. +For now, we refer to the Celestia specific ADRs in this repository as well as to the Celestia specification: + +- [celestia-specs](https://github.com/celestiaorg/celestia-specs) + +## Architecture Decision Records (ADR) + +This is a location to record all high-level architecture decisions in this repository. + +You can read more about the ADR concept in this [blog post](https://product.reverb.com/documenting-architecture-decisions-the-reverb-way-a3563bb24bd0#.78xhdix6t). + +An ADR should provide: + +- Context on the relevant goals and the current state +- Proposed changes to achieve the goals +- Summary of pros and cons +- References +- Changelog + +Note the distinction between an ADR and a spec. The ADR provides the context, intuition, reasoning, and +justification for a change in architecture, or for the architecture of something +new. The spec is much more compressed and streamlined summary of everything as +it stands today. + +If recorded decisions turned out to be lacking, convene a discussion, record the new decisions here, and then modify the code to match. + +Note the context/background should be written in the present tense. + +To start a new ADR, you can use this template: [adr-template.md](./adr-template.md) + +### Table of Contents + +- [ADR 001: Erasure Coding Block Propagation](./adr-001-block-propagation.md) +- [ADR 002: Sampling erasure coded Block chunks](./adr-002-ipld-da-sampling.md) +- [ADR 003: Retrieving Application messages](./adr-003-application-data-retrieval.md) +- [ADR 004: Data Availability Sampling Light Client](./adr-004-mvp-light-client.md) +- [ADR 005: Decouple BlockID and PartSetHeader](./adr-005-decouple-blockid-and-partsetheader.md) +- [ADR 006: Row Propagation](./adr-006-row-propagation.md) +- [ADR 007: Minimal Changes to Tendermint](./adr-007-minimal-changes-to-tendermint.md) +- [ADR 008: Updating to Tendermint v0.35.x](./adr-008-updating-to-tendermint-v0.35.x.md) diff --git a/docs/celestia-architecture/adr-001-block-propagation.md b/docs/celestia-architecture/adr-001-block-propagation.md new file mode 100644 index 0000000000..6f015be391 --- /dev/null +++ b/docs/celestia-architecture/adr-001-block-propagation.md @@ -0,0 +1,124 @@ +# ADR 001: Erasure Coding Block Propagation + +## Changelog + +- 16-2-2021: Created + +## Context + +Block propagation is currently done by splitting the block into arbitrary chunks and gossiping them to validators via a gossip routine. While this does not have downsides it does not meet the needs of the Celestia chain. The celestia chain requires blocks to be encoded in a different way and for the proposer to not propagate the chunks to peers. + +Celestia wants validators to pull the block from a IPFS network. What does this mean? As I touched on earlier the proposer pushes the block to the network, this in turn means that each validator downloads and reconstructs the block each time to verify it. Instead Celestia will encode and split up the block via erasure codes, stored locally in the nodes IPFS daemon. After the proposer has sent the block to IPFS and received the CIDs it will include them into the proposal. This proposal will be gossiped to other validators, once a validator receives the proposal it will begin requesting the CIDs included in the proposal. + +There are two forms of a validator, one that downloads the block and one that samples it. What does sampling mean? Sampling is the act of checking that a portion or entire block is available for download. + +## Detailed Design + +The proposed design is as follows. + +### Types + +The proposal and vote types have a BlockID, this will be replaced with a header hash. The proposal will contain add fields. + +The current proposal will be updated to include required fields. The entirety of the message will be reworked at a later date. To see the extent of the needed changes you can visit the [spec repo](https://github.com/celestiaorg/celestia-specs/blob/master/specs/proto/consensus.proto#L19) + +```proto +message Proposal { + SignedMsgType type = 1; + int64 height = 2; + int32 round = 3; + int32 pol_round = 4; + + +++ + // 32-byte hash + bytes last_header_hash = 5; + // 32-byte hash + bytes last_commit_hash = 6; + // 32-byte hash + bytes consensus_root = 7; + FeeHeader fee_header = 8; + // 32-byte hash + bytes state_commitment = 9; + uint64 available_data_original_shares_used = 10; + AvailableDataHeader available_data_header = 11; + +++ + + google.protobuf.Timestamp timestamp = 12 + [(gogoproto.nullable) = false, (gogoproto.stdtime) = true]; + bytes signature = 12; +} +``` + +```proto +// Vote represents a prevote, precommit, or commit vote from validators for +// consensus. +message Vote { + SignedMsgType type = 1; + int64 height = 2; + int32 round = 3; + +++ + bytes header_hash = 4; + +++ + google.protobuf.Timestamp timestamp = 5 + [(gogoproto.nullable) = false, (gogoproto.stdtime) = true]; + bytes validator_address = 6; + int32 validator_index = 7; + bytes signature = 8; +} +``` + +See [specs](https://github.com/celestiaorg/celestia-specs/blob/master/specs/data_structures.md#vote) for more details on the vote. + +### Disk Storage + +Currently celestia-core stores all blocks in its store. Going forward only the headers of the blocks within the unbonding period will be stored. This will drastically reduce the amount of storage required by a celestia-core node. After the unbonding period all headers will have the option of being pruned. + +Proposed amendment to `BlockStore` interface + +```go +type BlockStore interface { + Base() int64 + Height() int64 + Size() int64 + + LoadBlockMeta(height int64) *types.BlockMeta + LoadHeader(height int64) *types.Header + LoadDAHeader(height int64) *types.DataAvailabilityHeader + + SaveHeaders(header *types.Header, daHeader *types.DataAvailabilityHeader, seenCommit *types.Commit) + + PruneHeaders(height int64) (uint64, error) + + LoadBlockCommit(height int64) *types.Commit + LoadSeenCommit(height int64) *types.Commit +} +``` + +Along side these changes the rpc layer will need to change. Instead of querying the LL-core store, the node will redirect the query through IPFS. + +Example: + +When a user requests a block from the LL node, the request will be set to the IPLD plugin. If the IPLD does not have the requested block, it will make a request to the celestia IPFS network for the required CIDs. If the full node does not have the DAheader they will not be able to request the block data. + +![user request flow](./assets/user-request.png) + +The goal is to not change the public interface for RPC's. It is yet to be seen if this possible. This means that CIDs will need to be set and loaded from the store in order to get all the related block information an user requires. + +## Status + +Proposed + + +### Positive + +- Minimal breakage to public interface +- Only store the block in a single place (IPFS) +- Reduce the public interface of the storage within Celestia. + +### Negative + +- User requests may take more time to process + +### Neutral + +## References diff --git a/docs/celestia-architecture/adr-002-ipld-da-sampling.md b/docs/celestia-architecture/adr-002-ipld-da-sampling.md new file mode 100644 index 0000000000..10a642f619 --- /dev/null +++ b/docs/celestia-architecture/adr-002-ipld-da-sampling.md @@ -0,0 +1,280 @@ +# ADR 002: Sampling erasure coded Block chunks + +## Changelog + +- 26-2-2021: Created + +## Context + +In Tendermint's block gossiping each peer gossips random parts of block data to peers. +For Celestia, we need nodes (from light-clients to validators) to be able to sample row-/column-chunks of the erasure coded +block (aka the extended data square) from the network. +This is necessary for Data Availability proofs. + +![extended_square.png](img/extended_square.png) + +A high-level, implementation-independent formalization of above mentioned sampling and Data Availability proofs can be found in: +[_Fraud and Data Availability Proofs: Detecting Invalid Blocks in Light Clients_](https://fc21.ifca.ai/papers/83.pdf). + +For the time being, besides the academic paper, no other formalization or specification of the protocol exists. +Currently, the Celestia specification itself only describes the [erasure coding](https://github.com/celestiaorg/celestia-specs/blob/master/specs/data_structures.md#erasure-coding) +and how to construct the extended data square from the block data. + +This ADR: +- describes the high-level requirements +- defines the API that and how it can be used by different components of Celestia (block gossiping, block sync, DA proofs) +- documents decision on how to implement this. + + +The core data structures and the erasure coding of the block are already implemented in celestia-core ([#17], [#19], [#83]). +While there are no ADRs for these changes, we can refer to the Celestia specification in this case. +For this aspect, the existing implementation and specification should already be on par for the most part. +The exact arrangement of the data as described in this [rationale document](https://github.com/celestiaorg/celestia-specs/blob/master/rationale/message_block_layout.md) +in the specification can happen at app-side of the ABCI boundary. +The latter was implemented in [celestiaorg/celestia-app#21](https://github.com/celestiaorg/celestia-app/pull/21) +leveraging a new ABCI method, added in [#110](https://github.com/celestiaorg/celestia-core/pull/110). +This new method is a sub-set of the proposed ABCI changes aka [ABCI++](https://github.com/tendermint/spec/pull/254). + +Mustafa Al-Bassam (@musalbas) implemented a [prototype](https://github.com/celestiaorg/celestia-prototype) +whose main purpose is to realistically analyse the protocol. +Although the prototype does not make any network requests and only operates locally, it can partly serve as a reference implementation. +It uses the [rsmt2d] library. + +The implementation will essentially use IPFS' APIs. For reading (and writing) chunks it +will use the IPLD [`DagService`](https://github.com/ipfs/go-ipld-format/blob/d2e09424ddee0d7e696d01143318d32d0fb1ae63/merkledag.go#L54), +more precisely the [`NodeGetter`](https://github.com/ipfs/go-ipld-format/blob/d2e09424ddee0d7e696d01143318d32d0fb1ae63/merkledag.go#L18-L27) +and [`NodeAdder`](https://github.com/ipfs/go-ipld-format/blob/d2e09424ddee0d7e696d01143318d32d0fb1ae63/merkledag.go#L29-L39). +As an optimization, we can also use a [`Batch`](https://github.com/ipfs/go-ipld-format/blob/d2e09424ddee0d7e696d01143318d32d0fb1ae63/batch.go#L29) +to batch adding and removing nodes. +This will be achieved by passing around a [CoreAPI](https://github.com/ipfs/interface-go-ipfs-core/blob/b935dfe5375eac7ea3c65b14b3f9a0242861d0b3/coreapi.go#L15) +object, which derive from the IPFS node which is created along a with a tendermint node (see [#152]). +This code snippet does exactly that (see the [go-ipfs documentation] for more examples): +```go +// This constructs an IPFS node instance +node, _ := core.NewNode(ctx, nodeOptions) +// This attaches the Core API to the constructed node +coreApi := coreapi.NewCoreAPI(node) +``` + +The above mentioned IPLD methods operate on so called [ipld.Nodes]. +When computing the data root, we can pass in a [`NodeVisitor`](https://github.com/celestia/nmt/blob/b22170d6f23796a186c07e87e4ef9856282ffd1a/nmt.go#L22) +into the Namespaced Merkle Tree library to create these (each inner- and leaf-node in the tree becomes an ipld node). +As a peer that requests such an IPLD node, the Celestia IPLD plugin provides the [function](https://github.com/celestiaorg/celestia-core/blob/ceb881a177b6a4a7e456c7c4ab1dd0eb2b263066/p2p/ipld/plugin/nodes/nodes.go#L175) +`NmtNodeParser` to transform the retrieved raw data back into an `ipld.Node`. + +A more high-level description on the changes required to rip out the current block gossiping routine, +including changes to block storage-, RPC-layer, and potential changes to reactors is either handled in [ADR 001](./adr-001-block-propagation.md), +and/or in a few smaller, separate followup ADRs. + +## Alternative Approaches + +Instead of creating a full IPFS node object and passing it around as explained above + - use API (http) + - use ipld-light + - use alternative client + +Also, for better performance + - use [graph-sync], [IPLD selectors], e.g. via [ipld-prime] + +Also, there is the idea, that nodes only receive the [Header] with the data root only +and, in an additional step/request, download the DA header using the library, too. +While this feature is not considered here, and we assume each node that uses this library has the DA header, this assumption +is likely to change when flesh out other parts of the system in more detail. +Note that this also means that light clients would still need to validate that the data root and merkelizing the DA header yield the same result. + +## Decision + +> This section records the decision that was made. +> It is best to record as much info as possible from the discussion that happened. This aids in not having to go back to the Pull Request to get the needed information. + +> - TODO: briefly summarize github, discord, and slack discussions (?) +> - also mention Mustafa's prototype and compare both apis briefly (RequestSamples, RespondSamples, ProcessSamplesResponse) +> - mention [ipld experiments] + + + +## Detailed Design + +Add a package to the library that provides the following features: + 1. sample a given number of random row/col indices of extended data square given a DA header and indicate if successful or timeout/other error occurred + 2. store the block in the network by adding it to the peer's local Merkle-DAG whose content is discoverable via a DHT + 3. store the sampled chunks in the network + 4. reconstruct the whole block from a given DA header + 5. get all messages of a particular namespace ID. + +We mention 5. here mostly for completeness. Its details will be described / implemented in a separate ADR / PR. + +Apart from the above mentioned features, we informally collect additional requirements: +- where randomness is needed, the randomness source should be configurable +- all replies by the network should be verified if this is not sufficiently covered by the used libraries already (IPFS) +- where possible, the requests to the network should happen in parallel (without DoSing the proposer for instance). + +This library should be implemented as two new packages: + +First, a sub-package should be added to the layzledger-core [p2p] package +which does not know anything about the core data structures (Block, DA header etc). +It handles the actual network requests to the IPFS network and operates on IPFS/IPLD objects +directly and hence should live under [p2p/ipld]. +To a some extent this part of the stack already exists. + +Second, a high-level API that can "live" closer to the actual types, e.g., in a sub-package in [celestia-core/types] +or in a new sub-package `da`. + +We first describe the high-level library here and describe functions in +more detail inline with their godoc comments below. + +### API that operates on celestia-core types + +As mentioned above this part of the library has knowledge of the core types (and hence depends on them). +It does not deal with IPFS internals. + +```go +// ValidateAvailability implements the protocol described in https://fc21.ifca.ai/papers/83.pdf. +// Specifically all steps of the protocol described in section +// _5.2 Random Sampling and Network Block Recovery_ are carried out. +// +// In more detail it will first create numSamples random unique coordinates. +// Then, it will ask the network for the leaf data corresponding to these coordinates. +// Additionally to the number of requests, the caller can pass in a callback, +// which will be called on for each retrieved leaf with a verified Merkle proof. +// +// Among other use-cases, the callback can be useful to monitoring (progress), or, +// to process the leaf data the moment it was validated. +// The context can be used to provide a timeout. +// TODO: Should there be a constant = lower bound for #samples +func ValidateAvailability( + ctx contex.Context, + dah *DataAvailabilityHeader, + numSamples int, + onLeafValidity func(namespace.PrefixedData8), +) error { /* ... */} + +// RetrieveBlockData can be used to recover the block Data. +// It will carry out a similar protocol as described for ValidateAvailability. +// The key difference is that it will sample enough chunks until it can recover the +// full extended data square, including original data (e.g. by using rsmt2d.RepairExtendedDataSquare). +func RetrieveBlockData( + ctx contex.Context, + dah *DataAvailabilityHeader, + api coreiface.CoreAPI, + codec rsmt2d.Codec, + ) (types.Data, error) {/* ... */} + +// PutBlock operates directly on the Block. +// It first computes the erasure coding, aka the extended data square. +// Row by row ir calls a lower level library which handles adding the +// the row to the Merkle Dag, in our case a Namespaced Merkle Tree. +// Note, that this method could also fill the DA header. +// The data will be pinned by default. +func (b *Block) PutBlock(ctx contex.Context, nodeAdder ipld.NodeAdder) error +``` + +We now describe the lower-level library that will be used by above methods. +Again we provide more details inline in the godoc comments directly. + +`PutBlock` is a method on `Block` as the erasure coding can then be cached, e.g. in a private field +in the block. + +### Changes to the lower level API closer to IPFS (p2p/ipld) + +```go +// GetLeafData takes in a Namespaced Merkle tree root transformed into a Cid +// and the leaf index to retrieve. +// Callers also need to pass in the total number of leaves of that tree. +// Internally, this will be translated to a IPLD path and corresponds to +// an ipfs dag get request, e.g. namespacedCID/0/1/0/0/1. +// The retrieved data should be pinned by default. +func GetLeafData( + ctx context.Context, + rootCid cid.Cid, + leafIndex uint32, + totalLeafs uint32, // this corresponds to the extended square width + api coreiface.CoreAPI, +) ([]byte, error) +``` + +`GetLeafData` can be used by above `ValidateAvailability` and `RetrieveBlock` and +`PutLeaves` by `PutBlock`. + +### A Note on IPFS/IPLD + +In IPFS all data is _content addressed_ which basically means the data is identified by its hash. +Particularly, in the Celestia case, the root CID identifies the Namespaced Merkle tree including all its contents (inner and leaf nodes). +This means that if a `GetLeafData` request succeeds, the retrieved leaf data is in fact the leaf data in the tree. +We do not need to additionally verify Merkle proofs per leaf as this will essentially be done via IPFS on each layer while +resolving and getting to the leaf data. + +> TODO: validate this assumption and link to code that shows how this is done internally + +### Implementation plan + +As fully integrating Data Available proofs into tendermint, is a rather larger change we break up the work into the +following packages (not mentioning the implementation work that was already done): + +1. Flesh out the changes in the consensus messages ([celestia-specs#126], [celestia-specs#127]) +2. Flesh out the changes that would be necessary to replace the current block gossiping ([ADR 001](./adr-001-block-propagation.md)) +3. Add the possibility of storing and retrieving block data (samples or whole block) to celestia-core (this ADR and related PRs). +4. Integrate above API (3.) as an addition into celestia-core without directly replacing the tendermint counterparts (block gossip etc). +5. Rip out each component that will be redundant with above integration in one or even several smaller PRs: + - block gossiping (see ADR 001) + - modify block store (see ADR 001) + - make downloading full Blocks optional (flag/config) + - route some RPC requests to IPFS (see ADR 001) + + +## Status + +Proposed + +## Consequences + +### Positive + +- simplicity & ease of implementation +- can re-use an existing networking and p2p stack (go-ipfs) +- potential support of large, cool, and helpful community +- high-level API definitions independent of the used stack + +### Negative + +- latency +- being connected to the public IPFS network might be overkill if peers should in fact only care about a subset that participates in the Celestia protocol +- dependency on a large code-base with lots of features and options of which we only need a small subset of + +### Neutral +- two different p2p layers exist in celestia-core + +## References + +- https://github.com/celestiaorg/celestia-core/issues/85 +- https://github.com/celestiaorg/celestia-core/issues/167 + +- https://docs.ipld.io/#nodes +- https://arxiv.org/abs/1809.09044 +- https://fc21.ifca.ai/papers/83.pdf +- https://github.com/tendermint/spec/pull/254 + + +[#17]: https://github.com/celestiaorg/celestia-core/pull/17 +[#19]: https://github.com/celestiaorg/celestia-core/pull/19 +[#83]: https://github.com/celestiaorg/celestia-core/pull/83 + +[#152]: https://github.com/celestiaorg/celestia-core/pull/152 + +[celestia-specs#126]: https://github.com/celestiaorg/celestia-specs/issues/126 +[celestia-specs#127]: https://github.com/celestiaorg/celestia-specs/pulls/127 +[Header]: https://github.com/celestiaorg/celestia-specs/blob/master/specs/data_structures.md#header + +[go-ipfs documentation]: https://github.com/ipfs/go-ipfs/tree/master/docs/examples/go-ipfs-as-a-library#use-go-ipfs-as-a-library-to-spawn-a-node-and-add-a-file +[ipld experiments]: https://github.com/celestia/ipld-plugin-experiments +[ipld.Nodes]: https://github.com/ipfs/go-ipld-format/blob/d2e09424ddee0d7e696d01143318d32d0fb1ae63/format.go#L22-L45 +[graph-sync]: https://github.com/ipld/specs/blob/master/block-layer/graphsync/graphsync.md +[IPLD selectors]: https://github.com/ipld/specs/blob/master/selectors/selectors.md +[ipld-prime]: https://github.com/ipld/go-ipld-prime + +[rsmt2d]: https://github.com/celestia/rsmt2d + + +[p2p]: https://github.com/celestiaorg/celestia-core/tree/0eccfb24e2aa1bb9c4428e20dd7828c93f300e60/p2p +[p2p/ipld]: https://github.com/celestiaorg/celestia-core/tree/0eccfb24e2aa1bb9c4428e20dd7828c93f300e60/p2p/ipld +[celestia-core/types]: https://github.com/celestiaorg/celestia-core/tree/0eccfb24e2aa1bb9c4428e20dd7828c93f300e60/types diff --git a/docs/celestia-architecture/adr-003-application-data-retrieval.md b/docs/celestia-architecture/adr-003-application-data-retrieval.md new file mode 100644 index 0000000000..fdefa51cb8 --- /dev/null +++ b/docs/celestia-architecture/adr-003-application-data-retrieval.md @@ -0,0 +1,141 @@ +# ADR 003: Retrieving Application messages + +## Changelog + +- 2021-04-25: initial draft + +## Context + +This ADR builds on top of [ADR 002](adr-002-ipld-da-sampling.md) and will use the implemented APIs described there. +The reader should familiarize themselves at least with the high-level concepts the as well as in the [specs](https://github.com/celestiaorg/celestia-specs/blob/master/specs/data_structures.md#2d-reed-solomon-encoding-scheme). + +The academic [paper](https://arxiv.org/abs/1905.09274) describes the motivation and context for this API. +The main motivation can be quoted from section 3.3 of that paper: + +> (Property1) **Application message retrieval partitioning.** Client nodes must be able to download all of the messages relevant to the applications they use [...], without needing to downloading any messages for other applications. + +> (Property2) **Application message retrieval completeness.** When client nodes download messages relevant to the applications they use [...], they must be able to verify that the messages they received are the complete set of messages relevant to their applications, for specific +blocks, and that there are no omitted messages. + + + +The main data structure that enables above properties is called a Namespaced Merkle Tree (NMT), an ordered binary Merkle tree where: +1. each node in the tree includes the range of namespaces of the messages in all descendants of each node +2. leaves in the tree are ordered by the namespace identifiers of the leaf messages + +A more formal description can be found the [specification](https://github.com/celestiaorg/celestia-specs/blob/de5f4f74f56922e9fa735ef79d9e6e6492a2bad1/specs/data_structures.md#namespace-merkle-tree). +An implementation can be found in [this repository](https://github.com/celestiaorg/nmt). + +This ADR basically describes version of the [`GetWithProof`](https://github.com/celestiaorg/nmt/blob/ddcc72040149c115f83b2199eafabf3127ae12ac/nmt.go#L193-L196) of the NMT that leverages the fact that IPFS uses content addressing and that we have implemented an [IPLD plugin](https://github.com/celestiaorg/celestia-core/tree/37502aac69d755c189df37642b87327772f4ac2a/p2p/ipld) for an NMT. + +**Note**: The APIs defined here will be particularly relevant for Optimistic Rollup (full) nodes that want to download their Rollup's data (see [celestiaorg/optimint#48](https://github.com/celestiaorg/optimint/issues/48)). +Another potential use-case of this API could be for so-called [light validator nodes](https://github.com/celestiaorg/celestia-specs/blob/master/specs/node_types.md#node-type-definitions) that want to download and replay the state-relevant portion of the block data, i.e. transactions with [reserved namespace IDs](https://github.com/celestiaorg/celestia-specs/blob/master/specs/consensus.md#reserved-namespace-ids). + +## Alternative Approaches + +The approach described below will rely on IPFS' block exchange protocol (bitswap) and DHT; IPFS's implementation will be used as a black box to find peers that can serve the requested data. +This will likely be much slower than it potentially could be and for a first implementation we intentionally do not incorporate the optimizations that we could. + +We briefly mention potential optimizations for the future here: +- Use of [graphsync](https://github.com/ipld/specs/blob/5d3a3485c5fe2863d613cd9d6e18f96e5e568d16/block-layer/graphsync/graphsync.md) instead of [bitswap](https://docs.ipfs.io/concepts/bitswap/) and use of [IPLD selectors](https://github.com/ipld/specs/blob/5d3a3485c5fe2863d613cd9d6e18f96e5e568d16/design/history/exploration-reports/2018.10-selectors-design-goals.md) +- expose an API to be able to download application specific data by namespace (including proofs) with the minimal number of round-trips (e.g. finding nodes that expose an RPC endpoint like [`GetWithProof`](https://github.com/celestiaorg/nmt/blob/ddcc72040149c115f83b2199eafabf3127ae12ac/nmt.go#L193-L196)) + +## Decision + +Most discussions on this particular API happened either on calls or on other non-documented way. +We only describe the decision in this section. + +We decide to implement the simplest approach first. +We first describe the protocol informally here and explain why this fulfils (Property1) and (Property2) in the [Context](#context) section above. + +In the case that leaves with the requested namespace exist, this basically boils down to the following: traverse the tree starting from the root until finding first leaf (start) with the namespace in question, then directly request and download all leaves coming after the start until the namespace changes to a greater than the requested one again. +In the case that no leaves with the requested namespace exist in the tree, we traverse the tree to find the leaf in the position in the tree where the namespace would have been and download the neighbouring leaves. + +This is pretty much what the [`ProveNamespace`](https://github.com/celestiaorg/nmt/blob/ddcc72040149c115f83b2199eafabf3127ae12ac/nmt.go#L132-L146) method does but using IPFS we can simply locate and then request the leaves, and the corresponding inner proof nodes will automatically be downloaded on the way, too. + +## Detailed Design + +We define one function that returns all shares of a block belonging to a requested namespace and block (via the block's data availability header). +See [`ComputeShares`](https://github.com/celestiaorg/celestia-core/blob/1a08b430a8885654b6e020ac588b1080e999170c/types/block.go#L1371) for reference how encode the block data into namespace shares. + +```go +// RetrieveShares returns all raw data (raw shares) of the passed-in +// namespace ID nID and included in the block with the DataAvailabilityHeader dah. +func RetrieveShares( + ctx context.Context, + nID namespace.ID, + dah *types.DataAvailabilityHeader, + api coreiface.CoreAPI, +) ([][]byte, error) { + // 1. Find the row root(s) that contains the namespace ID nID + // 2. Traverse the corresponding tree(s) according to the + // above informally described algorithm and get the corresponding + // leaves (if any) + // 3. Return all (raw) shares corresponding to the nID +} + +``` + +Additionally, we define two functions that use the first one above to: +1. return all the parsed (non-padding) data with [reserved namespace IDs](https://github.com/celestiaorg/celestia-specs/blob/de5f4f74f56922e9fa735ef79d9e6e6492a2bad1/specs/consensus.md#reserved-namespace-ids): transactions, intermediate state roots, evidence. +2. return all application specific blobs (shares) belonging to one namespace ID parsed as a slice of Messages ([specification](https://github.com/celestiaorg/celestia-specs/blob/de5f4f74f56922e9fa735ef79d9e6e6492a2bad1/specs/data_structures.md#message) and [code](https://github.com/celestiaorg/celestia-core/blob/1a08b430a8885654b6e020ac588b1080e999170c/types/block.go#L1336)). + +The latter two methods might require moving or exporting a few currently unexported functions that (currently) live in [share_merging.go](https://github.com/celestiaorg/celestia-core/blob/1a08b430a8885654b6e020ac588b1080e999170c/types/share_merging.go#L57-L76) and could be implemented in a separate pull request. + +```go +// RetrieveStateRelevantMessages returns all state-relevant transactions +// (transactions, intermediate state roots, and evidence) included in a block +// with the DataAvailabilityHeader dah. +func RetrieveStateRelevantMessages( + ctx context.Context, + nID namespace.ID, + dah *types.DataAvailabilityHeader, + api coreiface.CoreAPI, +) (Txs, IntermediateStateRoots, EvidenceData, error) { + // like RetrieveShares but for all reserved namespaces + // additionally the shares are parsed (merged) into the + // corresponding types in the return arguments +} +``` + +```go +// RetrieveMessages returns all Messages of the passed-in +// namespace ID and included in the block with the DataAvailabilityHeader dah. +func RetrieveMessages( + ctx context.Context, + nID namespace.ID, + dah *types.DataAvailabilityHeader, + api coreiface.CoreAPI, +) (Messages, error) { + // like RetrieveShares but this additionally parsed the shares + // into the Messages type +} +``` + +## Status + +Proposed + +## Consequences + +This API will most likely be used by Rollups too. +We should document it properly and move it together with relevant parts from ADR 002 into a separate go-package. + +### Positive + +- easy to implement with the existing code (see [ADR 002](https://github.com/celestiaorg/celestia-core/blob/47d6c965704e102ae877b2f4e10aeab782d9c648/docs/adr/adr-002-ipld-da-sampling.md#detailed-design)) +- resilient data retrieval via a p2p network +- dependence on a mature and well-tested code-base with a large and welcoming community + +### Negative + +- with IPFS, we inherit the fact that potentially a lot of round-trips are done until the data is fully downloaded; in other words: this could end up way slower than potentially possible +- anyone interacting with that API needs to run an IPFS node + +### Neutral + +- optimizations can happen incrementally once we have an initial working version + +## References + +We've linked to all references throughout the ADR. diff --git a/docs/celestia-architecture/adr-004-mvp-light-client.md b/docs/celestia-architecture/adr-004-mvp-light-client.md new file mode 100644 index 0000000000..4dac26b890 --- /dev/null +++ b/docs/celestia-architecture/adr-004-mvp-light-client.md @@ -0,0 +1,292 @@ +# ADR 004: Data Availability Sampling Light Client + +## Changelog + +- 2021-05-03: Initial Draft + +## Context + +We decided to augment the existing [RPC-based Tendermint light client](https://github.com/tendermint/tendermint/blob/bc643b19c48495077e0394d3e21e1d2a52c99548/light/doc.go#L2-L126) by adding the possibility to additionally validate blocks by doing Data Availability Sampling (DAS). +In general, DAS gives light clients assurance that the data behind the block header they validated is actually available in the network and hence, that state fraud proofs could be generated. +See [ADR 002](adr-002-ipld-da-sampling.md) for more context on DAS. + +A great introduction on the Tendermint light client (and light clients in general) can be found in this series of [blog posts](https://medium.com/tendermint/everything-you-need-to-know-about-the-tendermint-light-client-f80d03856f98) as well as this [paper](https://arxiv.org/abs/2010.07031). + +This ADR describes the changes necessary to augment the existing Tendermint light client implementation with DAS from a UX as well as from a protocol perspective. + +## Alternative Approaches + +Ideally, the light client should not just request [signed headers](https://github.com/tendermint/tendermint/blob/bc643b19c48495077e0394d3e21e1d2a52c99548/light/doc.go#L35-L52) from [a few pre-configured peers](https://github.com/tendermint/tendermint/blob/bc643b19c48495077e0394d3e21e1d2a52c99548/light/setup.go#L51-L52) but instead also discover peers from a p2p network. +We will eventually implement this. For more context, we refer to this [issue](https://github.com/celestiaorg/celestia-core/issues/86). +This would require that the (signed) headers are provided via other means than the RPC. +See this [abandoned pull request](https://github.com/tendermint/tendermint/pull/4508) and [issue](https://github.com/tendermint/tendermint/issues/4456) in the Tendermint repository and also this [suggestion](https://github.com/celestiaorg/celestia-core/issues/86#issuecomment-831182564) by [@Wondertan](https://github.com/Wondertan) in this repository. + +For some use-cases—like DAS light validator nodes, or the light clients of a Data Availability Layer that are run by full nodes of an Optimistic Rollup—it would even make sense that the light client (passively) participates in the consensus protocol to some extent; i.e. runs a subset of the consensus reactor to Consensus messages ([Votes](https://github.com/tendermint/tendermint/blob/bc643b19c48495077e0394d3e21e1d2a52c99548/types/vote.go#L48-L59) etc.) come in as early as possible. +Then light clients would not need to wait for the canonical commit to be included in the next [block](https://github.com/tendermint/tendermint/blob/bc643b19c48495077e0394d3e21e1d2a52c99548/types/block.go#L48). + +For the RPC-based light client it could also make sense to add a new RPC endpoint to tendermint for clients to retrieve the [`DataAvailabilityHeader`](https://github.com/celestiaorg/celestia-core/blob/50f722a510dd2ba8e3d31931c9d83132d6318d4b/types/block.go#L52-L69) (DAHeader), or embed the DAHeader. +The [Commit](https://github.com/celestiaorg/celestia-core/blob/cbf1f1a4a0472373289a9834b0d33e0918237b7f/rpc/core/routes.go#L25) only contains the [SignedHeader](https://github.com/celestiaorg/celestia-core/blob/cbf1f1a4a0472373289a9834b0d33e0918237b7f/rpc/core/types/responses.go#L32-L36) (Header and Commit signatures). +Not all light clients will need the full DAHeader though (e.g. super-light-clients do not). + + +## Decision + +For our MVP, we [decide](https://github.com/celestiaorg/celestia-core/issues/307) to only modify the existing RPC-endpoint based light client. +This is mostly because we want to ship our MVP as quickly as possible but independently of this it makes sense to provide a familiar experience for engineers coming from the Cosmos ecosystem. + +We will later implement the above mentioned variants. +How exactly will be described in separate ADRs though. + +## Detailed Design + +From a user perspective very little changes: +the existing light client command gets an additional flag that indicates whether to run DAS or not. +Additionally, the light client operator can decide the number of successful samples to make to deem the block available (and hence valid). + +In case DAS is enabled, the light client will need to: +1. retrieve the DAHeader corresponding to the data root in the Header +2. request a parameterizable number of random samples. + +If the all sampling requests succeed, the whole block is available ([with some high enough probability](https://arxiv.org/abs/1809.09044)). + +### UX + +The main change to the light client [command](https://github.com/celestiaorg/celestia-core/blob/master/cmd/tendermint/commands/light.go#L32-L104) is to add in a new flag to indicate if it should run DAS or not. +Additionally, the user can choose the number of succeeding samples required for a block to be considered available. + +```diff +=================================================================== +diff --git a/cmd/tendermint/commands/light.go b/cmd/tendermint/commands/light.go +--- a/cmd/tendermint/commands/light.go (revision 48b043014f0243edd1e8ebad8cd0564ab9100407) ++++ b/cmd/tendermint/commands/light.go (date 1620546761822) +@@ -64,6 +64,8 @@ + dir string + maxOpenConnections int + ++ daSampling bool ++ numSamples uint32 + sequential bool + trustingPeriod time.Duration + trustedHeight int64 +@@ -101,6 +103,10 @@ + LightCmd.Flags().BoolVar(&sequential, "sequential", false, + "sequential verification. Verify all headers sequentially as opposed to using skipping verification", + ) ++ LightCmd.Flags().BoolVar(&daSampling, "da-sampling", false, ++ "data availability sampling. Verify each header (sequential verification), additionally verify data availability via data availability sampling", ++ ) ++ LightCmd.Flags().Uint32Var(&numSamples, "num-samples", 15, "Number of data availability samples until block data deemed available.") + } +``` + +For the Data Availability sampling, the light client will have to run an IPFS node. +It makes sense to make this mostly opaque to the user as everything around IPFS can be [configured](https://github.com/ipfs/go-ipfs/blob/d6322f485af222e319c893eeac51c44a9859e901/docs/config.md) in the `$IPFS_PATH`. +This IPFS path should simply be a sub-directory inside the light client's [directory](https://github.com/celestiaorg/celestia-core/blob/cbf1f1a4a0472373289a9834b0d33e0918237b7f/cmd/tendermint/commands/light.go#L86-L87). +We can later add the ability to let users configure the IPFS setup more granular. + +**Note:** DAS should only be compatible to sequential verification. +In case a light client is parametrized to run DAS and skipping verification, the CLI should return an easy-to-understand warning or even an error explaining why this does not make sense. + +### Light Client Protocol with DAS + +#### Light Store + +The light client stores data in its own [badgerdb instance](https://github.com/celestiaorg/celestia-core/blob/50f722a510dd2ba8e3d31931c9d83132d6318d4b/cmd/tendermint/commands/light.go#L125) in the given directory: + +```go +db, err := badgerdb.NewDB("light-client-db", dir) +``` + +While it is not critical for this feature, we should at least try to re-use that same DB instance for the local ipld store. +Otherwise, we introduce yet another DB instance; something we want to avoid, especially on the long run (see [#283](https://github.com/celestiaorg/celestia-core/issues/283)). +For the first implementation, it might still be simpler to create a separate DB instance and tackle cleaning this up in a separate pull request, e.g. together with other [instances]([#283](https://github.com/celestiaorg/celestia-core/issues/283)). + +#### RPC + +No changes to the RPC endpoints are absolutely required. +Although, for convenience and ease of use, we should either add the `DAHeader` to the existing [Commit](https://github.com/celestiaorg/celestia-core/blob/cbf1f1a4a0472373289a9834b0d33e0918237b7f/rpc/core/routes.go#L25) endpoint, or, introduce a new endpoint to retrieve the `DAHeader` on demand and for a certain height or block hash. + +The first has the downside that not every light client needs the DAHeader. +The second explicitly reveals to full-nodes which clients are doing DAS and which not. + +**Implementation Note:** The additional (or modified) RPC endpoint could work as a simple first step until we implement downloading the DAHeader from a given data root in the header. +Also, the light client uses a so called [`Provider`](https://github.com/tendermint/tendermint/blob/7f30bc96f014b27fbe74a546ea912740eabdda74/light/provider/provider.go#L9-L26) to retrieve [LightBlocks](https://github.com/tendermint/tendermint/blob/7f30bc96f014b27fbe74a546ea912740eabdda74/types/light.go#L11-L16), i.e. signed headers and validator sets. +Currently, only the [`http` provider](https://github.com/tendermint/tendermint/blob/7f30bc96f014b27fbe74a546ea912740eabdda74/light/provider/http/http.go#L1) is implemented. +Hence, as _a first implementation step_, we should augment the `Provider` and the `LightBlock` to optionally include the DAHeader (details below). +In parallel but in a separate pull request, we add a separate RPC endpoint to download the DAHeader for a certain height. + +#### Store DataAvailabilityHeader + +For full nodes to be able to serve the `DataAvailabilityHeader` without having to recompute it each time, it needs to be stored somewhere. +While this is independent of the concrete serving mechanism, it is more so relevant for the RPC endpoint. +There is ongoing work to make the Tendermint Store only store Headers and the DataAvailabilityHeader in [#218](https://github.com/celestiaorg/celestia-core/pull/218/) / [#182](https://github.com/celestiaorg/celestia-core/issues/182). + +At the time writing this ADR, another pull request ([#312](https://github.com/celestiaorg/celestia-core/pull/312)) is in the works with a more isolated change that adds the `DataAvailabilityHeader` to the `BlockID`. +Hence, the DAHeader is [stored](https://github.com/celestiaorg/celestia-core/blob/50f722a510dd2ba8e3d31931c9d83132d6318d4b/store/store.go#L355-L367) along the [`BlockMeta`](https://github.com/celestiaorg/celestia-core/blob/50f722a510dd2ba8e3d31931c9d83132d6318d4b/types/block_meta.go#L11-L17) there. + +For a first implementation, we could first build on top of #312 and adapt to the changed storage API where only headers and the DAHeader are stored inside tendermint's store (as drafted in #218). +A major downside of storing block data inside of tendermint's store as well as in the IPFS' block store is that is not only redundantly stored data but also IO intense work that will slow down full nodes. + + +#### DAS + +The changes for DAS are very simple from a high-level perspective assuming that the light client has the ability to download the DAHeader along with the required data (signed header + validator set) of a given height: + +Every time the light client validates a retrieved light-block, it additionally starts DAS in the background (once). +For a DAS light client it is important to use [sequential](https://github.com/tendermint/tendermint/blob/f366ae3c875a4f4f61f37f4b39383558ac5a58cc/light/client.go#L46-L53) verification and not [skipping](https://github.com/tendermint/tendermint/blob/f366ae3c875a4f4f61f37f4b39383558ac5a58cc/light/client.go#L55-L69) verification. +Skipping verification only works under the assumption that 2/3+1 of voting power is honest. +The whole point of doing DAS (and state fraud proofs) is to remove that assumption. +See also this related issue in the LL specification: [#159](https://github.com/celestiaorg/celestia-specs/issues/159). + +Independent of the existing implementation, there are three ways this could be implemented: +1. the DAS light client only accepts a header as valid and trusts it after DAS succeeds (additionally to the tendermint verification), and it waits until DAS succeeds (or there was an error or timeout on the way) +2. (aka 1.5) the DAS light client stages headers where the tendermint verification passes as valid and spins up DAS sampling rotines in the background; the staged headers are committed as valid iff all routines successfully return in time +3. the DAS light client optimistically accepts a header as valid and trusts it if the regular tendermint verification succeeds; the DAS is run in the background (with potentially much longer timeouts as in 1.) and after the background routine returns (or errs or times out), the already trusted headers are marked as unavailable; this might require rolling back the already trusted headers + +We note that from an implementation point of view 1. is not only the simplest approach, but it would also work best with the currently implemented light client design. +It is the approach that should be implemented first. + +The 2. approach can be seen as an optimization where the higher latency DAS can be conducted in parallel for various heights. +This could speed up catching-up (sequentially) if the light client went offline (shorter than the weak subjectivity time window). + +The 3. approach is the most general of all, but it moves the responsibility to wait or to rollback headers to the caller and hence is undesirable as it offers too much flexibility. + + +#### Data Structures + +##### LightBlock + +As mentioned above the LightBlock should optionally contain the DataAvailabilityHeader. +```diff +Index: types/light.go +=================================================================== +diff --git a/types/light.go b/types/light.go +--- a/types/light.go (revision 64044aa2f2f2266d1476013595aa33bb274ba161) ++++ b/types/light.go (date 1620481205049) +@@ -13,6 +13,9 @@ + type LightBlock struct { + *SignedHeader `json:"signed_header"` + ValidatorSet *ValidatorSet `json:"validator_set"` ++ ++ // DataAvailabilityHeader is only populated for DAS light clients for others it can be nil. ++ DataAvailabilityHeader *DataAvailabilityHeader `json:"data_availability_header"` + } +``` + +Alternatively, we could introduce a `DASLightBlock` that embeds a `LightBlock` and has the `DataAvailabilityHeader` as the only (non-optional) field. +This would be more explict as it is a new type. +Instead, adding a field to the existing `LightBlock`is backwards compatible and does not require any further code changes; the new type requires `To`- and `FromProto` functions at least. + +##### Provider + +The [`Provider`](https://github.com/tendermint/tendermint/blob/7f30bc96f014b27fbe74a546ea912740eabdda74/light/provider/provider.go#L9-L26) should be changed to additionally provide the `DataAvailabilityHeader` to enable DAS light clients. +Implementations of the interface need to additionally retrieve the `DataAvailabilityHeader` for the [modified LightBlock](#lightblock). +Users of the provider need to indicate this to the provider. + +We could either augment the `LightBlock` method with a flag, add a new method solely for providing the `DataAvailabilityHeader`, or, we could introduce a new method for DAS light clients. + +The latter is preferable because it is the most explicit and clear, and it still keeps places where DAS is not used without any code changes. + +Hence: + +```diff +Index: light/provider/provider.go +=================================================================== +diff --git a/light/provider/provider.go b/light/provider/provider.go +--- a/light/provider/provider.go (revision 7d06ae28196e8765c9747aca9db7d2732f56cfc3) ++++ b/light/provider/provider.go (date 1620298115962) +@@ -21,6 +21,14 @@ + // error is returned. + LightBlock(ctx context.Context, height int64) (*types.LightBlock, error) + ++ // DASLightBlock returns the LightBlock containing the DataAvailabilityHeader. ++ // Other than including the DataAvailabilityHeader it behaves exactly the same ++ // as LightBlock. ++ // ++ // It can be used by DAS light clients. ++ DASLightBlock(ctx context.Context, height int64) (*types.LightBlock, error) ++ ++ + // ReportEvidence reports an evidence of misbehavior. + ReportEvidence(context.Context, types.Evidence) error + } +``` +Alternatively, with the exact same result, we could embed the existing `Provider` into a new interface: e.g. `DASProvider` that adds this method. +This is completely equivalent as above and which approach is better will become more clear when we spent more time on the implementation. + +Regular light clients will call `LightBlock` and DAS light clients will call `DASLightBlock`. +In the first case the result will be the same as for vanilla Tendermint and in the second case the returned `LightBlock` will additionally contain the `DataAvailabilityHeader` of the requested height. + +#### Running an IPFS node + +We already have methods to [initialize](https://github.com/celestiaorg/celestia-core/blob/cbf1f1a4a0472373289a9834b0d33e0918237b7f/cmd/tendermint/commands/init.go#L116-L157) and [run](https://github.com/celestiaorg/celestia-core/blob/cbf1f1a4a0472373289a9834b0d33e0918237b7f/node/node.go#L1449-L1488) an IPFS node in place. +These need to be refactored such that they can effectively be for the light client as well. +This means: +1. these methods need to be exported and available in a place that does not introduce interdependence of go packages +2. users should be able to run a light client with a single command and hence most of the initialization logic should be coupled with creating the actual IPFS node and [made independent](https://github.com/celestiaorg/celestia-core/blob/cbf1f1a4a0472373289a9834b0d33e0918237b7f/cmd/tendermint/commands/init.go#L119-L120) of the `tendermint init` command + +An example for 2. can be found in the IPFS [code](https://github.com/ipfs/go-ipfs/blob/cd72589cfd41a5397bb8fc9765392bca904b596a/cmd/ipfs/daemon.go#L239) itself. +We might want to provide a slightly different default initialization though (see how this is [overridable](https://github.com/ipfs/go-ipfs/blob/cd72589cfd41a5397bb8fc9765392bca904b596a/cmd/ipfs/daemon.go#L164-L165) in the ipfs daemon cmd). + +We note that for operating a fully functional light client the IPFS node could be running in client mode [`dht.ModeClient`](https://github.com/libp2p/go-libp2p-kad-dht/blob/09d923fcf68218181b5cd329bf5199e767bd33c3/dht_options.go#L29-L30) but be actually want light clients to also respond to incoming queries, e.g. from other light clients. +Hence, they should by default run in [`dht.ModeServer`](https://github.com/libp2p/go-libp2p-kad-dht/blob/09d923fcf68218181b5cd329bf5199e767bd33c3/dht_options.go#L31-L32). +In an environment were any bandwidth must be saved, or, were the network conditions do not allow the server mode, we make it easy to change the default behavior. + +##### Client + +We add another [`Option`](https://github.com/tendermint/tendermint/blob/a91680efee3653e3de620f24eb8ddca1c95ce8f9/light/client.go#L43-L117) to the [`Client`](https://github.com/tendermint/tendermint/blob/a91680efee3653e3de620f24eb8ddca1c95ce8f9/light/client.go#L173) that indicates that this client does DAS. + +This option indicates: +1. to do sequential verification and +2. to request [`DASLightBlocks`](#lightblock) from the [provider](#provider). + +All other changes should only affect unexported methods only. + +##### ValidateAvailability + +In order for the light clients to perform DAS to validate availability, they do not need to be aware of the fact that an IPFS node is run. +Instead, we can use the existing [`ValidateAvailability`](https://github.com/celestiaorg/celestia-core/blame/master/p2p/ipld/validate.go#L23-L28) function (as defined in [ADR 002](adr-002-ipld-da-sampling.md) and implemented in [#270](https://github.com/celestiaorg/celestia-core/pull/270)). +Note that this expects an ipfs core API object `CoreAPI` to be passed in. +Using that interface has the major benefit that we could even change the requirement that the light client itself runs the IPFS node without changing most of the validation logic. +E.g., the IPFS node (with our custom IPLD plugin) could run in different process (or machine), and we could still just pass in that same `CoreAPI` interface. + +Orthogonal to this ADR, we also note that we could change all IPFS readonly methods to accept the minimal interface they actually use, namely something that implements `ResolveNode` (and maybe additionally a `NodeGetter`). + +`ValidateAvailability` needs to be called each time a header is validated. +A DAS light client will have to request the `DASLightBlock` for this as per above to be able to pass in a `DataAvailabilityHeader`. + +#### Testing + +Ideally, we add the DAS light client to the existing e2e tests. +It might be worth to catch up with some relevant changes from tendermint upstream. +In particular, [tendermint/tendermint#6196](https://github.com/tendermint/tendermint/pull/6196) and previous changes that it depends on. + +Additionally, we should provide a simple example in the documentation that walks through the DAS light client. +It would be good if the light client logs some (info) output related to DAS to provide feedback to the user. + +## Status + +Proposed + +## Consequences + +### Positive + +- simple to implement and understand +- familiar to tendermint / Cosmos devs +- allows trying out the MVP without relying on the [celestia-app](https://github.com/celestiaorg/celestia-app) (instead a simple abci app like a modified [KVStore](https://github.com/celestiaorg/celestia-core/blob/42e4e8b58ebc58ebd663c114d2bcd7ab045b1c55/abci/example/kvstore/README.md) app could be used to demo the DAS light client) + +### Negative + +- light client does not discover peers +- requires the light client that currently runs simple RPC requests only to run an IPFS node +- rpc makes it extremely easy to infer which light clients are doing DAS and which not +- the initial light client implementation might still be confusing to devs familiar to tendermint/Cosmos for the reason that it does DAS (and state fraud proofs) to get rid of the underlying honest majority assumption, but it will still do all checks related to that same honest majority assumption (e.g. download validator sets, Commits and validate that > 2/3 of them signed the header) + +### Neutral + +DAS light clients need to additionally obtain the DAHeader from the data root in the header to be able to actually do DAS. + +## References + +We have linked all references above inside the text already. diff --git a/docs/celestia-architecture/adr-005-decouple-blockid-and-partsetheader.md b/docs/celestia-architecture/adr-005-decouple-blockid-and-partsetheader.md new file mode 100644 index 0000000000..1bf8fa7416 --- /dev/null +++ b/docs/celestia-architecture/adr-005-decouple-blockid-and-partsetheader.md @@ -0,0 +1,47 @@ +# ADR 005: Decouple the PartSetHeader from the BlockID + +## Changelog + +- 2021-08-01: Initial Draft + +## Context + +Celestia has multiple commits to the block data via the `DataHash` and the `PartSetHeader` in the `BlockID`. As stated in the [#184](https://github.com/celestiaorg/lazyledger-core/issues/184), we no longer need the `PartSetHeader` for this additional commitment to the block's data. However, we are still planning to use the `PartSetHeader` for block propagation during consensus in the short-medium term. This means that we will remove the `PartSetHeader` from as many places as possible, but keep it in the `Proposal` struct. + +## Alternative Approaches + +It’s worth noting that there are proposed changes to remove the `PartSetHeader` entirely, and instead use the already existing commitment to block data, the `DataAvailabilityHeader`, to propagate blocks in parallel during consensus. Discussions regarding the detailed differences entailed in each approach are documented in that ADR's PR. The current direction that is described in this ADR is significantly more conservative in its approach, but it is not strictly an alternative to other designs. This is because other designs would also require removal of the `PartSethHeader`, which is a project in and of itself due to the `BlockID` widespread usage throughout tendermint and the bugs that pop up when attempting to remove it. + +## Decision + +While we build other better designs to experiment with, we will continue to implement the design specified here as it is not orthogonal. https://github.com/celestiaorg/lazyledger-core/pull/434#issuecomment-869158788 + +## Detailed Design + +- [X] Decouple the BlockID and the PartSetHeader [#441](https://github.com/celestiaorg/lazyledger-core/pull/441) +- [ ] Remove the BlockID from every possible struct other than the `Proposal` + - [X] Stop signing over the `PartSetHeader` while voting [#457](https://github.com/celestiaorg/lazyledger-core/pull/457) + - [X] Remove the `PartSetHeader` from the Header [#457](https://github.com/celestiaorg/lazyledger-core/pull/457) + - [X] Remove the `PartSetHeader` from `VoteSetBits`, `VoteSetMaj23`, and `state.State` [#479](https://github.com/celestiaorg/lazyledger-core/pull/479) + - [ ] Remove the `PartSetHeader` from other structs + + +## Status + +Proposed + +### Positive + +- Conservative and easy to implement +- Acts as a stepping stone for other better designs +- Allows us to use 64kb sized chunks, which are well tested + +### Negative + +- Not an ideal design as we still have to include an extra commitment to the block's data in the proposal + +## References + +Alternative ADR [#434](https://github.com/celestiaorg/lazyledger-core/pull/434) +Alternative implementation [#427](https://github.com/celestiaorg/lazyledger-core/pull/427) and [#443](https://github.com/celestiaorg/lazyledger-core/pull/443) +[Comment](https://github.com/celestiaorg/lazyledger-core/pull/434#issuecomment-869158788) that summarizes decision \ No newline at end of file diff --git a/docs/celestia-architecture/adr-006-row-propagation.md b/docs/celestia-architecture/adr-006-row-propagation.md new file mode 100644 index 0000000000..6fd2f3652e --- /dev/null +++ b/docs/celestia-architecture/adr-006-row-propagation.md @@ -0,0 +1,202 @@ +# ADR 006: Consensus Block Gossiping with Rows + +## Changelog +* 24.06.2021 - Initial description +* 07.07.2021 - More important details were added +* 18.08.2021 - Mention alternative approaches briefly + +## Context +It's a long story of relations between Celestia, Tendermint, and consensus block gossiping. Celestia's team discussed +multiple ideas, several ADRs were made, and nothing yet was finalized. This ADR is another attempt to bring valuable +changes into block gossiping and hopefully successful. + +Currently, we inherit the following from Tendermint. Our codebase relies on the blocks Parts notion. Each Part is a +piece of an entire serialized block. Those Parts are gossiped between nodes in consensus and committed with +`PartSetHeader` containing a Merkle Root of the Parts. However, Parts gossiping wasn't designed for Celestia blocks. + +Celestia comes with a different block representation from Tendermint. It lays out Blocks as a table of data shares, +where Rows or Columns can be and should be gossiped instead of Parts, keeping only one system-wide commitment to data. + +## Alternative Approaches +### ["nah it works just don't touch it"](https://ahseeit.com//king-include/uploads/2020/11/121269295_375504380484919_2997236194077828589_n-6586327691.jpg) approach + +It turns out that we could fully treat the Tendermint consensus as a black box, keeping two data commitments: one for +consensus with `PartSetHeader` and another for the world outside the consensus with `DAHeader`. + +#### Pros +* Less work + +### Others +* get rid of the PartsHeader from BlockID without changing block propagation at all (see [ADR 005](https://github.com/celestiaorg/celestia-core/blob/58a3901827afbf97852d807de34a2b66f93e0eb6/docs/lazy-adr/adr-005-decouple-blockid-and-partsetheader.md#adr-005-decouple-the-partsetheader-from-the-blockid)) +* change block propagation to fixed-sized chunks but based on the ODS instead of how Parts are built currently (for this we have empirical evidence of how it performs in practice) +* send the block as a whole (only works with smaller blocks) +* block propagation-based on sending the header and Tx-IDs and then requesting the Tx/Messages that are missing from the local mempool of a node on demand + +#### Cons +* Pulls two data commitments to Celestia's specs +* Brings ambiguity to data integrity verification +* Controversial from software design perspective +* Brings DOSing vector for big Blocks. Every Block would need to be represented in two formats in RAM +* Wastes more resources on building and verifying additional + +## Decision +The decision is to still treat Tendermint's consensus as a black box, but with few amendments to gossiping mechanism: +* Introduce `RowSet` that mimics `PartSet`. + + `RowSet` is a helper structure that wraps DAHeader and tracks received Rows with their integrity against DAHeader and + tells its user when the block is complete and/or can be recovered. Mostly it is a helper and is not a high-level + concept. +* Replace `PartSet` with `RowSet` within consensus. +* Keep `DAHeader` in `Proposal` +* Remove `PartSetHeader` from `Proposal` + +The changes above are required to implement the decision. At later point, other changes listed below are +likely to be implemented as a clean-up: +* Entirely removing `PartSetHeader`, as redundant data commitment +* Removing `PartSet` +* Relying on `DAHeader` instead of `PartSetHeader` + +## Detailed Design +The detailed design section demonstrates the design and supporting changes package by package. Fortunately, the +design does not affect any public API and changes are solely internal. + +### `types` +#### RowSet and Row +First and essential part is to implement `RowSet` and `Row`, fully mimicking semantics of `PartSet` and `Part` to +decrease the number of required changes. Below, implementation semantics are presented: + +```go +// Row represents a blob of multiple ExtendedDataSquare shares. +// Practically, it is half of an extended row, as other half can be recomputed. +type Row struct { +// Index is an top-to-bottom index of a Row in ExtendedDataSquare. +// NOTE: Row Index is unnecessary, as we can determine it's Index by hash from DAHeader. However, Index removal +// would bring more changes to Consensus Reactor with arguable pros of less bandwidth usage. +Index int +// The actual share blob. +Data []byte +} + +// NewRow creates new Row from flattened shares and index. +func NewRow(idx int, row [][]byte) *Row + +// RowSet wraps DAHeader and tracks added Rows with their integrity against DAHeader. +// It allows user to check whenever rsmt2d.ExtendedDataSquare can be recovered. +// +// RowSet tracks the whole ExtendedDataSquare, Where Q0 is the original block data: +// ---- ---- +// | Q0 || Q1 | +// ---- ---- +// | Q2 || Q3 | +// ---- ---- +// +// But its AddRow and GetRow methods accepts and returns only half of the Rows - Q0 and Q2. Q1 and Q3 are recomputed. +// ---- +// | Q0 | +// ---- +// | Q2 | +// ---- +// +type RowSet interface { +// NOTE: The RowSet is defined as an interface for simplicity. In practice it should be a struct with one and only +// implementation. + +// AddRow adds a Row to the set. It returns true with nil error in case Row was successfully added. +// The logic for Row is: +// * Check if it was already added +// * Verify its size corresponds to DAHeader +// * Extend it with erasure coding and compute a NMT Root over it +// * Verify that the NMT Root corresponds to DAHeader Root under its Index +// * Finally add it to set and mark as added. +// +AddRow(*Row) (bool, error) + +// GetRow return of a Row by its index, if exist. +GetRow(i int) *Row + +// Square checks if enough rows were added and returns recomputed ExtendedDataSquare if enough +Square() (*rsmt2d.ExtendedDataSquare, error) + +// other helper methods are omitted +} + +// NewRowSet creates full RowSet from rsmt2d.ExtendedDataSquare to gossip it to others through GetRow. +func NewRowSet(eds *rsmt2d.ExtendedDataSquare) *RowSet + +// NewRowSetFromHeader creates empty RowSet from a DAHeader to receive and verify gossiped Rows against the DAHeader +// with AddRow. +func NewRowSetFromHeader(dah *ipld.DataAvailabilityHeader) *RowSet +``` + +#### Vote +`Vote` should include a commitment to data. Previously, it relied on `PartSetHeader` in `BlockId`, instead it relies on +added `DAHeader`. Protobuf schema is updated accordingly. + +#### Proposal +`Proposal` is extended with `NumOriginalDataShares`. This is an optimization that +helps Validators to populate Header without counting original data shares themselves from a block received form a +Proposer. Potentially, that introduce a vulnerability by which a Proposer can send wrong value, leaving the populated +Header of Validators wrong. This part of the decision is optional. + +### `consenSUS` +#### Reactor +##### Messages +The decision affects two messages on consensus reactor: +* `BlockPartMessage` -> `BlockRowMessage` + * Instead of `Part` it carries `Row` defined above. +* `NewValidBlockMessage` + * Instead of `PartSetHeader` it carries `DAHeader` + * `BitArray` of `RowSet` instead of `PartSet` + Protobuf schema for both is updated accordingly. + +##### PeerRoundState +`PeerRoundState` tracks state of each known peer in a round, specifically what commitment it has for a Block and what +chunks peer holds. The decision changes it to track `DAHeader` instead of `PartSetHeader`, along with `BitArray` of +`RowSet` instead of `PartSet`. + +##### BlockCatchup +The Reactor helps its peers to catchup if they go out of sync. Instead of sending random `Part` it now sends random +`Row` by `BlockRowMessage`. Unfortunately, that requires the Reactor to load whole Block from store. As an optimization, +an ability to load Row only from the store could be introduced at later point. + +#### State +##### RoundState +The RoundState keeps Proposal, Valid and Lock Block's data. Along with an entire Block and its Parts, the RoundState +also keeps Rows using `RowSet`. At later point, `PartSet` that tracks part can be removed. + +##### Proposal Stage +Previously, the State in proposal stage waited for all Parts to assemble the entire Block. Instead, the State waits for +the half of all Rows from a proposer and/or peers to recompute the Block's data and notifies them back that no more +needs to be sent. Also, through Rows, only minimally required amount of information is gossiped. Everything else to +assemble the full Block is collected from own chain State and Proposal. + +## Status +Proposed + +## Consequences +### Positive +* Hardening of consensus gossiping with erasure coding +* Blocks exceeding the size limit are immediately rejected on Proposal, without the need to download an entire Block. +* More control over Row message size during consensus, comparing to Part message, as last part of the block always has + unpredictable size. `DAHeader`, on the other hand, allows knowing precisely the size of Row messages. +* Less bandwidth usage + * Only required Block's data is gossiped. + * Merkle proofs of Parts are not sent on the wire +* Only one system-wide block data commitment schema +* We don't abandon the work we were doing for months and taking profits out of it + * PR [#287](https://github.com/celestiaorg/lazyledger-core/pull/287) + * PR [#312](https://github.com/celestiaorg/lazyledger-core/pull/312) + * PR [#427](https://github.com/celestiaorg/lazyledger-core/pull/427) + * and merged others + +### Negative +* We invest some more time(~1.5 weeks). + * Most of the work is done. Only few changes left in the implementation along with peer reviews. + +### Neutral +* Rows vs Parts on the wire + * Previously, parts were propagated with max size of 64KiB. Let's now take a Row of the largest 128x128 block in + comparison. The actual data size in such a case for the Row would be 128x256(shares_per_row*share_size)=32KiB, which + is exactly two times smaller than a Part. +* Gossiped chunks are no longer constant size. Instead, their size is proportional to the size of Block's data. +* Another step back from original Tendermint's codebases \ No newline at end of file diff --git a/docs/celestia-architecture/adr-007-minimal-changes-to-tendermint.md b/docs/celestia-architecture/adr-007-minimal-changes-to-tendermint.md new file mode 100644 index 0000000000..87ab7bd539 --- /dev/null +++ b/docs/celestia-architecture/adr-007-minimal-changes-to-tendermint.md @@ -0,0 +1,237 @@ +# ADR 007: From Ukraine, with Love + +## Changelog + +- 2021-08-20: Initial Description +- 2022-05-03: Update pointing to ADR 008 + +## Context + +Currently, our fork of tendermint includes changes to how to erasure block data, minor changes to the header to commit +to that data, additions to serve data availability sampling, along with some miscellaneous modification to adhere to the +spec. Instead of incorporating all of these changes into our fork of tendermint, we will only make the strictly +necessary changes and the other services and their code to the new celestia-node repo. Notably, we will also refactor +some of the remaining necessary changes to be more isolated from the rest of the tendermint codebase. Both of these +strategies should significantly streamline pulling updates from upstream, and allow us to iterate faster since most +changes will be isolated to celestia-node. + +Update: many of the changes described below have since been minimized or removed. Please see ADR 008 for a summarized list of changes. Notably, we removed intermediate state roots, adopted two new methods from ABCI++ instead of PreprocessTxs, and are still signing over the PartSetHeader. + +## Decision + +Treat tendermint more as a "black box". + +## Detailed Design + +### Overview + +We keep the bare-minimum changes to tendermint in our fork, celestia-core. Where necessary and possible we augment the +tendermint node in a separate process, via celestia-node, which communicates with the tendermint node via RPC. All data +availability sampling logic, including all Celestia-specific networking logic not already provided by tendermint, is +moved into celestia node: + +![core node relation](./img/core-node-relation.jpg) + +The detailed design of celestia-node will be defined in the repository itself. + +### Necessary changes to tendermint + +#### Changing the repo import names to celestiaorg + +- Rebrand (https://github.com/celestiaorg/celestia-core/pull/476) + +#### Changes to the README.md other basic things + +- update github templates (https://github.com/celestiaorg/celestia-core/pull/405) +- update README.md (https://github.com/celestiaorg/celestia-core/pull/10) + +#### Adding the extra types of block data + +- Update core data types (https://github.com/celestiaorg/celestia-core/pull/17) + - Create the Message/Messages types + - Proto and the tendermint version + - Create the IntermediateStateRoots type + - Proto and the tendermint version +- Data availability for evidence (https://github.com/celestiaorg/celestia-core/pull/19) + - Add both types to `types.Data` + - Modify proto + - Add `EvidenceData` to `types.Data` + +#### Add the HeaderHash to the Commit + +- Add header hash to commit(https://github.com/celestiaorg/celestia-core/pull/198) + +#### Adding the consts package in types + +#### Remove iavl as a dependency + +- remove iavl as a dependency (https://github.com/celestiaorg/celestia-core/pull/129) + +#### Using the `DataAvailabilityHeader` to calculate the DataHash + +The `DataAvailabilityHeader` struct will be used by celestia-core as well as by the celestia-node. It might make sense +to (eventually) move the struct together with all the DA-related code into a separate repository and go-module. +@Wondertan explored this as part of [#427](https://github.com/celestiaorg/celestia-core/pull/427#issue-674512464). This +way all client implementations can depend on that module without running into circular dependencies. Hence, we only +describe how to hash the block data here: + +- Update core types (https://github.com/celestiaorg/celestia-core/pull/17) + - Replace the `Data.Hash()` with `DAH.Hash()` + - Use DAH to fill DataHash when filling the header + - Fill the DAH when making a block to generate the data hash + +#### Add availableDataOriginalSharesUsed to the header + +- Add availableDataOriginalSharesUsed to the header (https://github.com/celestiaorg/celestia-core/pull/262) + +#### Reap some number of transactions probably using the app or some other mech + +- Enforce a minimum square size (https://github.com/celestiaorg/celestia-core/pull/282) +- Use squares with a width that is a power of two(https://github.com/celestiaorg/celestia-core/pull/331) +- Adopt reamping from the mempool to max square size (https://github.com/celestiaorg/celestia-core/issues/77) +- Proposal: Decide on a mech to pick square size and communicate that to the + app (https://github.com/celestiaorg/celestia-core/issues/454) +- Also see ABCI++ for a less hacky solution + +#### Filling the DAH using share merging and splitting + +- Compute Shares (not merged) (https://github.com/celestiaorg/celestia-core/pull/60) + - part II (not merged) (https://github.com/celestiaorg/celestia-core/pull/63) + - while this was not merged, we will need some function to compute the shares that make up the block data +- Share Splitting (https://github.com/celestiaorg/celestia-core/pull/246) + - Serialize each constituent of block data + - Split into shares + - Txs (contiguous) + - Messages (not contiguous) + - Evidence (contiguous) + - IntermediateStateRoots (contiguous) +- Combine shares into original square +- ExtendBlockData +- Generate nmt root of each row and col +- Use those roots to generate the DataHash +- Share Merging (https://github.com/celestiaorg/celestia-core/pull/261) + - Sort by namespace + - Parse each reserved type + - Parse remaining messages + +#### Add the wrapper around nmt to erasure namespaces + +- Implement rsmt tree wrapper for nmt (https://github.com/celestiaorg/celestia-core/pull/238) + +#### Add PreprocessTxs to ABCI + +- Add PreprocessTxs method to ABCI (https://github.com/celestiaorg/celestia-core/pull/110) +- Add method to ABCI interface +- Create sync and async versions +- Add sync version the the CreateProposalBlock method of BlockExecutor + +#### Fill the DAH while making the block + +- Basic DA functionality (https://github.com/celestiaorg/celestia-core/pull/83) + +#### Only produce blocks on some interval + +- Control block times (https://github.com/tendermint/tendermint/issues/5911) + +#### Stop signing over the PartSetHeader + +- Replace canonical blockID with just a hash in the CononicalVote +- Replace the LastBlockID in the header with just a hash + +#### Optionally remove some unused code + +- Removing misc unsued code (https://github.com/celestiaorg/celestia-core/pull/208) +- Remove docs deployment (https://github.com/celestiaorg/celestia-core/pull/134) +- Start deleting docs (https://github.com/celestiaorg/celestia-core/pull/209) +- Remove tendermint-db in favor of badgerdb (https://github.com/celestiaorg/celestia-core/pull/241) +- Delete blockchain 2 until further notice (https://github.com/celestiaorg/celestia-core/pull/309) +- We don’t need to support using out of process apps + +#### Nice to Haves + +- More efficient hashing (https://github.com/celestiaorg/celestia-core/pull/351) + +We should also take this opportunity to refactor as many additions to tendermint into their own package as possible. +This will hopefully make updating to future versions of tendermint easier. For example, when we fill the data +availability header, instead of using a method on `Block`, it could be handled by a function that takes `types.Data` as +input and returns the DAH, the number of shares used in the square, along with the obligatory error. + +```go +func FillDataAvailabilityHeader(data types.Data) (types.DataAvailabilityHeader, numOrigDataShares, error) +``` + +We could perform a similar treatment to the `splitIntoShares` methods and their helper method `ComputeShares`. Instead +of performing the share splitting logic in those methods, we could keep it in a different package and instead call the +equivalent function to compute the shares. + +Beyond refactoring and some minor additions, we will also have to remove and revert quite a few changes to get to the +minimum desired changes specified above. + +### Changes that will need to be reverted + +#### IPLD Plugin + +- Introduction (https://github.com/celestiaorg/celestia-core/pull/144) +- Initial integration (https://github.com/celestiaorg/celestia-core/pull/152) +- Custom Multihash (https://github.com/celestiaorg/celestia-core/pull/155) +- Puting data during proposal (https://github.com/celestiaorg/celestia-core/pull/178) +- Module name (https://github.com/celestiaorg/celestia-core/pull/151) +- Update rsmt2d (https://github.com/celestiaorg/celestia-core/pull/290) +- Make plugin a package (https://github.com/celestiaorg/celestia-core/pull/294) + +#### Adding DAH to Stuff + +- Adding DAH to Proposal (https://github.com/celestiaorg/celestia-core/pull/248/files) +- Blockmeta (https://github.com/celestiaorg/celestia-core/pull/372) + +#### Embedding DAS + +- GetLeafData (https://github.com/celestiaorg/celestia-core/pull/212) +- RetrieveBlockData (https://github.com/celestiaorg/celestia-core/pull/232) +- ValidateAvailability (https://github.com/celestiaorg/celestia-core/pull/270) +- Prevent double writes to IPFS (https://github.com/celestiaorg/celestia-core/pull/271) +- Stop Pinning (https://github.com/celestiaorg/celestia-core/pull/276) +- Rework IPFS Node (https://github.com/celestiaorg/celestia-core/pull/334) +- Refactor for putting the block (https://github.com/celestiaorg/celestia-core/pull/338) +- Config for IPFS node (https://github.com/celestiaorg/celestia-core/pull/340) +- IPLD Dag instead of CoreAPI (https://github.com/celestiaorg/celestia-core/pull/352) +- Adding the DAG to the blockstore (https://github.com/celestiaorg/celestia-core/pull/356) +- Saving and Loading using IPFS (https://github.com/celestiaorg/celestia-core/pull/374) +- Manual Providing (https://github.com/celestiaorg/celestia-core/pull/375) +- Refactor node provider (https://github.com/celestiaorg/celestia-core/pull/400) +- DAS in light client workaround (https://github.com/celestiaorg/celestia-core/pull/413) + +#### BlockID and PartSetHeader + +- Decouple ParSetHeader from BlockID (https://github.com/celestiaorg/celestia-core/pull/441) +- Stop Signing over the PartSetHeader (https://github.com/celestiaorg/celestia-core/pull/457) +- We still don’t want to sign over the PartSetHeader, but we will not be able to use the same mechanism used in the + linked PR, as that way requires decoupling of the PSH from the BlockID +- Remove PSH from some consensus messages (https://github.com/celestiaorg/celestia-core/pull/479) + +Note: This ADR overrides ADR 005 Decouple BlockID and the PartSetHeader. The PartSetHeader and the BlockID will mostly +remain the same. This will make pulling changes from upstream much easier + +## Status + +Accepted + +## Consequences + +### Positive + +- Pulling changes from upstream is streamlined +- Separation of functionality will help us iterate faster +- Creates a great opportunity for reconsidering past design choices without fully starting from scratch +- Prepare for future designs +- Don’t have to have two p2p stacks in a single repo + +### Negative + +- Perform some computation multiple times +- Running multiple nodes instead of a single node is less convenient for node operators (but only in the case the full + celestia-node wants to participate in the consensus protocol) + +## References + +Tracking Issue #491 diff --git a/docs/celestia-architecture/adr-008-updating-to-tendermint-v0.35.x.md b/docs/celestia-architecture/adr-008-updating-to-tendermint-v0.35.x.md new file mode 100644 index 0000000000..276358c418 --- /dev/null +++ b/docs/celestia-architecture/adr-008-updating-to-tendermint-v0.35.x.md @@ -0,0 +1,53 @@ +# ADR 008: Updating to tendermint v0.35.x + +## Changelog + +- 2022-05-03: Initial document describing changes to tendermint v0.35.x + +## Context + +Building off of ADR 007, we have further distilled the necessary changes to tendermint and continued to move added logic to other repos. Specifically, we have moved generation of the data hash, efficient construction of the data square, and a message inclusion check to celestia-app via adopting two new methods from ABCI++. This document is to serve as a guide for the remaining changes made on top of tendermint v0.35.4. + +### Changes to tendermint + +#### Misc + +- [update github templates](https://github.com/celestiaorg/celestia-core/pull/405) +- [update README.md](https://github.com/celestiaorg/celestia-core/pull/737/commits/be9039d4e0f5d876ec3d8d4521be3374172d7992) +- [updating to go 1.17](https://github.com/celestiaorg/celestia-core/pull/737/commits/6094b7338082d106f81da987dffa56eb540a675e) +- [adding the consts package](https://github.com/celestiaorg/celestia-core/pull/737/commits/fea8528b0177230b7e75396ae05f7a9b5da23741) + +#### Changing the way the data hash is calculated + +To enable data availability sampling, Celestia uses a proprietary data square format to encode its block data. The data hash is generated from this data square by calculating namespace merkle tree root over each row and column. In the following changes, we implement encoding and decoding of block data to the data square format and tooling to generate the data hash. More details over this design can be found in our (archived but still very useful) [specs repo](https://github.com/celestiaorg/celestia-specs) + +- [Adding the Data Availability Header](https://github.com/celestiaorg/celestia-core/pull/737/commits/116b7af4000920030a373363487ef9a9f084e066) +- [Adding a wrapper for namespaced merkle trees](https://github.com/celestiaorg/celestia-core/pull/737/commits/eee8f352cb6a1687a9f6b470abe28bbd4eb66413) +- [Adding Messages and Evidence to the block data](https://github.com/celestiaorg/celestia-core/pull/737/commits/86df6529a7c0cc1112c34b6bf1b5364aa0518dec) +- [Adding share splitting and merging for block encoding](https://github.com/celestiaorg/celestia-core/pull/737/commits/bf2d8b46c1caf1fed52e7db9bf8aa6a9847d84ab) +- [Modifying MakeBlock to also accept Messages](https://github.com/celestiaorg/celestia-core/pull/737/commits/bb970a417356ab030c934ccd2bd39c9641af45f8) + +#### Adding PrepareProposal and ProcessProposal ABCI methods from ABCI++ + +- [PrepareProposal](https://github.com/celestiaorg/celestia-core/pull/737/commits/07f9a05444db763c44ff81f564e7350ddf57e5a4) +- [ProcessProposal](https://github.com/celestiaorg/celestia-core/pull/737/commits/2c9552db09841f2bbebc1ec34653b2441def9f13) + +more details on how we use these new methods in the app can be found in the [ABCI++ Adoption ADR](https://github.com/celestiaorg/celestia-app/blob/master/docs/architecture/ADR-001-ABCI%2B%2B.md). + +#### Wrapping Malleated Transactions + +Tendermint and the cosmos-sdk were not built to handle malleated transactions (txs that are submitted by the user, but modified by the block producer before being included in a block). While not a final solution, we have resorted to adding the hash of the original transaction (the one that is not modified by the block producer) to the modified one. This allows us to track the transaction in the event system and mempool. + +- [Index malleated Txs](https://github.com/celestiaorg/celestia-core/pull/737/commits/a54e3599a5ef6b2ba8b63f586aed8185a3f59e4d) + +#### Create NMT Inclusion Proofs for Transactions + +Since the block data that is committed over is encoded as a data square and we use namespaced merkle trees to generate the row and column roots of that square, we have to create transaction inclusion proofs also using nmts and a data square. The problem is that the block data isn't stored as a square, so in order to generate the inclusion proofs, we have to regenerate a portion of the square. We do that here. + +- [Create namespace merkle tree inclusion proofs for transactions included in the block](https://github.com/celestiaorg/celestia-core/pull/737/commits/01051aa5fef0693bf3bda801e39c80e5746b9c33) + +#### Adding the DataCommitment RPC endpoint + +This RPC endpoint is used by quantum gravity bridge orchestrators to create a commitment over the block data of a range of blocks. + +- [Adding the DataCommitment RPC endpoint](https://github.com/celestiaorg/celestia-core/pull/737/commits/134eeefb7af41afe760d4adc5b22a9d55e36bc3e) \ No newline at end of file diff --git a/docs/celestia-architecture/adr-template.md b/docs/celestia-architecture/adr-template.md new file mode 100644 index 0000000000..c36879bcec --- /dev/null +++ b/docs/celestia-architecture/adr-template.md @@ -0,0 +1,72 @@ +# ADR {ADR-NUMBER}: {TITLE} + +## Changelog + +- {date}: {changelog} + +## Context + +> This section contains all the context one needs to understand the current state, and why there is a problem. It should be as succinct as possible and introduce the high level idea behind the solution. + +## Alternative Approaches + +> This section contains information around alternative options that are considered before making a decision. It should contain a explanation on why the alternative approach(es) were not chosen. + +## Decision + +> This section records the decision that was made. +> It is best to record as much info as possible from the discussion that happened. This aids in not having to go back to the Pull Request to get the needed information. + +## Detailed Design + +> This section does not need to be filled in at the start of the ADR, but must be completed prior to the merging of the implementation. +> +> Here are some common questions that get answered as part of the detailed design: +> +> - What are the user requirements? +> +> - What systems will be affected? +> +> - What new data structures are needed, what data structures will be changed? +> +> - What new APIs will be needed, what APIs will be changed? +> +> - What are the efficiency considerations (time/space)? +> +> - What are the expected access patterns (load/throughput)? +> +> - Are there any logging, monitoring or observability needs? +> +> - Are there any security considerations? +> +> - Are there any privacy considerations? +> +> - How will the changes be tested? +> +> - If the change is large, how will the changes be broken up for ease of review? +> +> - Will these changes require a breaking (major) release? +> +> - Does this change require coordination with the Celestia fork of the SDK or celestia-app? + +## Status + +> A decision may be "proposed" if it hasn't been agreed upon yet, or "accepted" once it is agreed upon. Once the ADR has been implemented mark the ADR as "implemented". If a later ADR changes or reverses a decision, it may be marked as "deprecated" or "superseded" with a reference to its replacement. + +{Deprecated|Proposed|Accepted|Declined} + +## Consequences + +> This section describes the consequences, after applying the decision. All consequences should be summarized here, not just the "positive" ones. + +### Positive + +### Negative + +### Neutral + +## References + +> Are there any relevant PR comments, issues that led up to this, or articles referenced for why we made the given design choice? If so link them here! + +- {reference link} diff --git a/docs/celestia-architecture/assets/user-request.png b/docs/celestia-architecture/assets/user-request.png new file mode 100644 index 0000000000..3d04fad734 Binary files /dev/null and b/docs/celestia-architecture/assets/user-request.png differ diff --git a/docs/celestia-architecture/celestia-logo.png b/docs/celestia-architecture/celestia-logo.png new file mode 100644 index 0000000000..dce8b0b34d Binary files /dev/null and b/docs/celestia-architecture/celestia-logo.png differ diff --git a/docs/celestia-architecture/img/core-node-relation.jpg b/docs/celestia-architecture/img/core-node-relation.jpg new file mode 100644 index 0000000000..8c93640633 Binary files /dev/null and b/docs/celestia-architecture/img/core-node-relation.jpg differ diff --git a/docs/celestia-architecture/img/extended_square.png b/docs/celestia-architecture/img/extended_square.png new file mode 100644 index 0000000000..8bbf469505 Binary files /dev/null and b/docs/celestia-architecture/img/extended_square.png differ diff --git a/evidence/pool_test.go b/evidence/pool_test.go index 16c9317331..3033104c60 100644 --- a/evidence/pool_test.go +++ b/evidence/pool_test.go @@ -19,6 +19,7 @@ import ( sm "github.com/tendermint/tendermint/state" smmocks "github.com/tendermint/tendermint/state/mocks" "github.com/tendermint/tendermint/store" + "github.com/tendermint/tendermint/test/factory" "github.com/tendermint/tendermint/types" "github.com/tendermint/tendermint/version" ) @@ -192,7 +193,7 @@ func TestEvidencePoolUpdate(t *testing.T) { ev := types.NewMockDuplicateVoteEvidenceWithValidator(height, defaultEvidenceTime.Add(21*time.Minute), val, evidenceChainID) lastCommit := makeCommit(height, val.PrivKey.PubKey().Address()) - block := types.MakeBlock(height+1, []types.Tx{}, []types.Evidence{ev}, nil, lastCommit) + block := types.MakeBlock(height+1, factory.MakeData([]types.Tx{}, []types.Evidence{ev}, nil), lastCommit) // update state (partially) state.LastBlockHeight = height + 1 @@ -404,7 +405,7 @@ func initializeBlockStore(db dbm.DB, state sm.State, valAddr []byte) *store.Bloc for i := int64(1); i <= state.LastBlockHeight; i++ { lastCommit := makeCommit(i-1, valAddr) - block, _ := state.MakeBlock(i, []types.Tx{}, nil, nil, lastCommit, + block, _ := state.MakeBlock(i, factory.MakeData([]types.Tx{}, nil, nil), lastCommit, state.Validators.GetProposer().Address) block.Header.Time = defaultEvidenceTime.Add(time.Duration(i) * time.Minute) block.Header.Version = tmversion.Consensus{Block: version.BlockProtocol, App: 1} diff --git a/go.mod b/go.mod index 3e5682b733..2ebc10bd1a 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/adlio/schema v1.1.13 github.com/btcsuite/btcd v0.21.0-beta github.com/btcsuite/btcutil v1.0.2 - github.com/celestiaorg/nmt v0.8.0 + github.com/celestiaorg/nmt v0.10.0 github.com/celestiaorg/rsmt2d v0.5.0 github.com/fortytw2/leaktest v1.3.0 github.com/go-kit/kit v0.12.0 diff --git a/go.sum b/go.sum index 133776674d..03c3485090 100644 --- a/go.sum +++ b/go.sum @@ -144,8 +144,8 @@ github.com/celestiaorg/go-leopard v0.1.0 h1:28z2EkvKJIez5J9CEaiiUEC+OxalRLtTGJJ1 github.com/celestiaorg/go-leopard v0.1.0/go.mod h1:NtO/rjlB8dw2aq7jr06vZFKGvryQcTDXaNHelmPNOAM= github.com/celestiaorg/merkletree v0.0.0-20210714075610-a84dc3ddbbe4 h1:CJdIpo8n5MFP2MwK0gSRcOVlDlFdQJO1p+FqdxYzmvc= github.com/celestiaorg/merkletree v0.0.0-20210714075610-a84dc3ddbbe4/go.mod h1:fzuHnhzj1pUygGz+1ZkB3uQbEUL4htqCGJ4Qs2LwMZA= -github.com/celestiaorg/nmt v0.8.0 h1:wtX7GRouLbmBe+ffnc8+cOg2UbWteM+Y1imZuZ/EeqU= -github.com/celestiaorg/nmt v0.8.0/go.mod h1:3bqzTj8xKj0DgQUpOgZzoxvtNkC3MS/hTbQ6dn8SIa0= +github.com/celestiaorg/nmt v0.10.0 h1:HLfVWvpagHz5+uiE0QSjzv350wLhhnybNmrxq9NHLKc= +github.com/celestiaorg/nmt v0.10.0/go.mod h1:3bqzTj8xKj0DgQUpOgZzoxvtNkC3MS/hTbQ6dn8SIa0= github.com/celestiaorg/rsmt2d v0.5.0 h1:Wa0uNZUXl8lIMJnSunjoD65ktqBedXZD0z2ZU3xKYYw= github.com/celestiaorg/rsmt2d v0.5.0/go.mod h1:EZ+O2KdCq8xI7WFwjATLdhtMdrdClmAs2w7zENDr010= github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= diff --git a/mempool/v1/mempool.go b/mempool/v1/mempool.go index fab3fc9105..b89944407c 100644 --- a/mempool/v1/mempool.go +++ b/mempool/v1/mempool.go @@ -1,6 +1,7 @@ package v1 import ( + "crypto/sha256" "fmt" "runtime" "sort" @@ -410,7 +411,13 @@ func (txmp *TxMempool) Update( } // Regardless of success, remove the transaction from the mempool. - _ = txmp.removeTxByKey(tx.Key()) + if err := txmp.removeTxByKey(tx.Key()); err != nil { + if originalHash, _, isMalleated := types.UnwrapMalleatedTx(tx); isMalleated { + var originalKey [sha256.Size]byte + copy(originalKey[:], originalHash) + _ = txmp.removeTxByKey(types.TxKey(originalKey)) + } + } } txmp.purgeExpiredTxs(blockHeight) diff --git a/mempool/v1/mempool_test.go b/mempool/v1/mempool_test.go index 48949402cd..f7b840f4cb 100644 --- a/mempool/v1/mempool_test.go +++ b/mempool/v1/mempool_test.go @@ -2,6 +2,7 @@ package v1 import ( "bytes" + "crypto/sha256" "errors" "fmt" "math/rand" @@ -652,3 +653,30 @@ func TestTxMempool_CheckTxPostCheckError(t *testing.T) { }) } } + +func TestMalleatedTxRemoval(t *testing.T) { + txmp := setup(t, 500) + originalTx := []byte{1, 2, 3, 4} + malleatedTx := []byte{1, 2} + originalHash := sha256.Sum256(originalTx) + + // create the wrapped child transaction + wTx, err := types.WrapMalleatedTx(originalHash[:], malleatedTx) + require.NoError(t, err) + + // add the parent transaction to the mempool + err = txmp.CheckTx(originalTx, nil, mempool.TxInfo{}) + require.NoError(t, err) + + // remove the parent from the mempool using the wrapped child tx + err = txmp.Update(1, []types.Tx{wTx}, abciResponses(1, abci.CodeTypeOK), nil, nil) + require.NoError(t, err) +} + +func abciResponses(n int, code uint32) []*abci.ResponseDeliverTx { + responses := make([]*abci.ResponseDeliverTx, 0, n) + for i := 0; i < n; i++ { + responses = append(responses, &abci.ResponseDeliverTx{Code: code}) + } + return responses +} diff --git a/pkg/da/data_availability_header.go b/pkg/da/data_availability_header.go index 61e1f3c0c3..714d634c99 100644 --- a/pkg/da/data_availability_header.go +++ b/pkg/da/data_availability_header.go @@ -181,10 +181,7 @@ var tailPaddingShare = append( // MinDataAvailabilityHeader returns the minimum valid data availability header. // It is equal to the data availability header for an empty block func MinDataAvailabilityHeader() DataAvailabilityHeader { - shares := make([][]byte, consts.MinSharecount) - for i := 0; i < consts.MinSharecount; i++ { - shares[i] = tailPaddingShare - } + shares := GenerateEmptyShares(consts.MinSharecount) eds, err := ExtendShares(consts.MinSquareSize, shares) if err != nil { panic(err) @@ -193,6 +190,15 @@ func MinDataAvailabilityHeader() DataAvailabilityHeader { return dah } +// GenerateEmptyShares generate an array of empty shares +func GenerateEmptyShares(size int) [][]byte { + shares := make([][]byte, size) + for i := 0; i < size; i++ { + shares[i] = tailPaddingShare + } + return shares +} + // validateHash returns an error if the hash is not empty, but its // size != tmhash.Size. copy pasted from `types` package as to not import func validateHash(h []byte) error { diff --git a/pkg/prove/proof.go b/pkg/prove/proof.go index 7e358ed9a7..80f2e109e3 100644 --- a/pkg/prove/proof.go +++ b/pkg/prove/proof.go @@ -17,22 +17,21 @@ import ( // of a data square, and then using those shares to creates nmt inclusion proofs // It is possible that a transaction spans more than one row. In that case, we // have to return two proofs. -func TxInclusion(codec rsmt2d.Codec, data types.Data, origSquareSize, txIndex uint) (types.TxProof, error) { +func TxInclusion(codec rsmt2d.Codec, data types.Data, txIndex uint64) (types.TxProof, error) { // calculate the index of the shares that contain the tx startPos, endPos, err := txSharePosition(data.Txs, txIndex) if err != nil { return types.TxProof{}, err } - if (endPos - startPos) > 1 { - return types.TxProof{}, errors.New("transaction spanned more than two shares, this is not yet supported") - } // use the index of the shares and the square size to determine the row that // contains the tx we need to prove - startRow := startPos / origSquareSize - endRow := endPos / origSquareSize + startRow := startPos / data.OriginalSquareSize + endRow := endPos / data.OriginalSquareSize + startLeaf := startPos % data.OriginalSquareSize + endLeaf := endPos % data.OriginalSquareSize - rowShares, err := genRowShares(codec, data, origSquareSize, startRow, endRow) + rowShares, err := genRowShares(codec, data, startRow, endRow) if err != nil { return types.TxProof{}, err } @@ -42,7 +41,7 @@ func TxInclusion(codec rsmt2d.Codec, data types.Data, origSquareSize, txIndex ui var rowRoots []tmbytes.HexBytes //nolint:prealloc // rarely will this contain more than a single root for i, row := range rowShares { // create an nmt to use to generate a proof - tree := wrapper.NewErasuredNamespacedMerkleTree(uint64(origSquareSize)) + tree := wrapper.NewErasuredNamespacedMerkleTree(data.OriginalSquareSize) for j, share := range row { tree.Push( share, @@ -53,16 +52,21 @@ func TxInclusion(codec rsmt2d.Codec, data types.Data, origSquareSize, txIndex ui ) } - var pos uint - if i == 0 { - pos = startPos - (startRow * origSquareSize) - } else { - pos = endPos - (endRow * origSquareSize) + startLeafPos := startLeaf + endLeafPos := endLeaf + + // if this is not the first row, then start with the first leaf + if i > 0 { + startLeafPos = 0 + } + // if this is not the last row, then select for the rest of the row + if i != (len(rowShares) - 1) { + endLeafPos = data.OriginalSquareSize - 1 } - shares = append(shares, row[pos]) + shares = append(shares, row[startLeafPos:endLeafPos+1]...) - proof, err := tree.Prove(int(pos)) + proof, err := tree.Tree().ProveRange(int(startLeafPos), int(endLeafPos+1)) if err != nil { return types.TxProof{}, err } @@ -87,22 +91,22 @@ func TxInclusion(codec rsmt2d.Codec, data types.Data, origSquareSize, txIndex ui } // txSharePosition returns the share that a given transaction is included in. -// returns -1 if index is greater than that of the provided txs. -func txSharePosition(txs types.Txs, txIndex uint) (startSharePos, endSharePos uint, err error) { - if txIndex >= uint(len(txs)) { +// returns an error if index is greater than that of the provided txs. +func txSharePosition(txs types.Txs, txIndex uint64) (startSharePos, endSharePos uint64, err error) { + if txIndex >= uint64(len(txs)) { return startSharePos, endSharePos, errors.New("transaction index is greater than the number of txs") } totalLen := 0 - for i := uint(0); i < txIndex; i++ { + for i := uint64(0); i < txIndex; i++ { txLen := len(txs[i]) totalLen += (delimLen(txLen) + txLen) } txLen := len(txs[txIndex]) - startSharePos = uint((totalLen) / consts.TxShareSize) - endSharePos = uint((totalLen + txLen + delimLen(txLen)) / consts.TxShareSize) + startSharePos = uint64((totalLen) / consts.TxShareSize) + endSharePos = uint64((totalLen + txLen + delimLen(txLen)) / consts.TxShareSize) return startSharePos, endSharePos, nil } @@ -113,13 +117,13 @@ func delimLen(txLen int) int { } // genRowShares progessively generates data square rows from block data -func genRowShares(codec rsmt2d.Codec, data types.Data, origSquareSize, startRow, endRow uint) ([][][]byte, error) { - if endRow > origSquareSize { +func genRowShares(codec rsmt2d.Codec, data types.Data, startRow, endRow uint64) ([][][]byte, error) { + if endRow > data.OriginalSquareSize { return nil, errors.New("cannot generate row shares past the original square size") } origRowShares := splitIntoRows( - origSquareSize, - genOrigRowShares(data, origSquareSize, startRow, endRow), + data.OriginalSquareSize, + genOrigRowShares(data, startRow, endRow), ) encodedRowShares := make([][][]byte, len(origRowShares)) @@ -142,18 +146,20 @@ func genRowShares(codec rsmt2d.Codec, data types.Data, origSquareSize, startRow, // genOrigRowShares progressively generates data square rows for the original // data square, meaning the rows only half the full square length, as there is // not erasure data -func genOrigRowShares(data types.Data, originalSquareSize, startRow, endRow uint) [][]byte { - wantLen := (endRow + 1) * originalSquareSize - startPos := startRow * originalSquareSize +func genOrigRowShares(data types.Data, startRow, endRow uint64) [][]byte { + wantLen := (endRow + 1) * data.OriginalSquareSize + startPos := startRow * data.OriginalSquareSize shares := data.Txs.SplitIntoShares() // return if we have enough shares - if uint(len(shares)) >= wantLen { + if uint64(len(shares)) >= wantLen { return shares[startPos:wantLen].RawShares() } - shares = append(shares, data.Evidence.SplitIntoShares()...) - if uint(len(shares)) >= wantLen { + evdShares := data.Evidence.SplitIntoShares() + + shares = append(shares, evdShares...) + if uint64(len(shares)) >= wantLen { return shares[startPos:wantLen].RawShares() } @@ -165,7 +171,7 @@ func genOrigRowShares(data types.Data, originalSquareSize, startRow, endRow uint shares = types.AppendToShares(shares, m.NamespaceID, rawData) // return if we have enough shares - if uint(len(shares)) >= wantLen { + if uint64(len(shares)) >= wantLen { return shares[startPos:wantLen].RawShares() } } @@ -177,10 +183,10 @@ func genOrigRowShares(data types.Data, originalSquareSize, startRow, endRow uint } // splitIntoRows splits shares into rows of a particular square size -func splitIntoRows(origSquareSize uint, shares [][]byte) [][][]byte { - rowCount := uint(len(shares)) / origSquareSize +func splitIntoRows(origSquareSize uint64, shares [][]byte) [][][]byte { + rowCount := uint64(len(shares)) / origSquareSize rows := make([][][]byte, rowCount) - for i := uint(0); i < rowCount; i++ { + for i := uint64(0); i < rowCount; i++ { rows[i] = shares[i*origSquareSize : (i+1)*origSquareSize] } return rows diff --git a/pkg/prove/proof_test.go b/pkg/prove/proof_test.go index 3ac0b261f0..081beaac1a 100644 --- a/pkg/prove/proof_test.go +++ b/pkg/prove/proof_test.go @@ -3,7 +3,6 @@ package prove import ( "bytes" "fmt" - "math" "math/rand" "sort" "strings" @@ -11,28 +10,67 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + tmrand "github.com/tendermint/tendermint/libs/rand" "github.com/tendermint/tendermint/pkg/consts" "github.com/tendermint/tendermint/pkg/da" "github.com/tendermint/tendermint/types" ) func TestTxInclusion(t *testing.T) { - txCount := 100 typicalBlockData := types.Data{ - Txs: generateRandomlySizedContiguousShares(txCount, 200), - Messages: generateRandomlySizedMessages(10, 150), + Txs: generateRandomlySizedContiguousShares(100, 500), + Messages: generateRandomlySizedMessages(40, 16000), + OriginalSquareSize: 64, + } + lotsOfTxsNoMessages := types.Data{ + Txs: generateRandomlySizedContiguousShares(1000, 500), + OriginalSquareSize: 64, + } + overlappingSquareSize := 16 + overlappingRowsBlockData := types.Data{ + Txs: types.ToTxs( + [][]byte{ + tmrand.Bytes(consts.TxShareSize*overlappingSquareSize + 1), + tmrand.Bytes(10000), + }, + ), + OriginalSquareSize: uint64(overlappingSquareSize), + } + overlappingRowsBlockDataWithMessages := types.Data{ + Txs: types.ToTxs( + [][]byte{ + tmrand.Bytes(consts.TxShareSize*overlappingSquareSize + 1), + tmrand.Bytes(10000), + }, + ), + Messages: generateRandomlySizedMessages(8, 400), + OriginalSquareSize: uint64(overlappingSquareSize), } - // compute the data availability header - shares, _, err := typicalBlockData.ComputeShares(0) - require.NoError(t, err) - - squareSize := uint(math.Sqrt(float64(len(shares)))) + type test struct { + data types.Data + } + tests := []test{ + { + typicalBlockData, + }, + { + lotsOfTxsNoMessages, + }, + { + overlappingRowsBlockData, + }, + { + overlappingRowsBlockDataWithMessages, + }, + } - for i := 0; i < txCount; i++ { - txProof, err := TxInclusion(consts.DefaultCodec(), typicalBlockData, squareSize, uint(i)) - require.NoError(t, err) - assert.True(t, txProof.VerifyProof()) + for _, tt := range tests { + for i := 0; i < len(tt.data.Txs); i++ { + txProof, err := TxInclusion(consts.DefaultCodec(), tt.data, uint64(i)) + require.NoError(t, err) + assert.True(t, txProof.VerifyProof()) + } } } @@ -66,13 +104,13 @@ func TestTxSharePosition(t *testing.T) { } type startEndPoints struct { - start, end uint + start, end uint64 } for _, tt := range tests { positions := make([]startEndPoints, len(tt.txs)) for i := 0; i < len(tt.txs); i++ { - start, end, err := txSharePosition(tt.txs, uint(i)) + start, end, err := txSharePosition(tt.txs, uint64(i)) require.NoError(t, err) positions[i] = startEndPoints{start: start, end: end} } @@ -97,37 +135,35 @@ func TestTxSharePosition(t *testing.T) { } func Test_genRowShares(t *testing.T) { + squareSize := uint64(16) typicalBlockData := types.Data{ - Txs: generateRandomlySizedContiguousShares(120, 200), - Messages: generateRandomlySizedMessages(10, 1000), + Txs: generateRandomlySizedContiguousShares(10, 200), + Messages: generateRandomlySizedMessages(20, 1000), + OriginalSquareSize: squareSize, } - // compute the data availability header - allShares, _, err := typicalBlockData.ComputeShares(0) - require.NoError(t, err) - rawShares := allShares.RawShares() - - originalSquareSize := uint(math.Sqrt(float64(len(rawShares)))) - - eds, err := da.ExtendShares(uint64(originalSquareSize), rawShares) - require.NoError(t, err) - - eds.ColRoots() - + // note: we should be able to compute row shares from raw data + // this quickly tests this by computing the row shares before + // computing the shares in the normal way. rowShares, err := genRowShares( consts.DefaultCodec(), typicalBlockData, - originalSquareSize, 0, - originalSquareSize-1, + squareSize, ) require.NoError(t, err) - for i := uint(0); i < originalSquareSize; i++ { - row := eds.Row(i) + allShares, _, _ := typicalBlockData.ComputeShares(squareSize) + rawShares := allShares.RawShares() + + eds, err := da.ExtendShares(squareSize, rawShares) + require.NoError(t, err) + + for i := uint64(0); i < squareSize; i++ { + row := eds.Row(uint(i)) assert.Equal(t, row, rowShares[i], fmt.Sprintf("row %d", i)) // also test fetching individual rows - secondSet, err := genRowShares(consts.DefaultCodec(), typicalBlockData, originalSquareSize, i, i) + secondSet, err := genRowShares(consts.DefaultCodec(), typicalBlockData, i, i) require.NoError(t, err) assert.Equal(t, row, secondSet[0], fmt.Sprintf("row %d", i)) } @@ -135,17 +171,18 @@ func Test_genRowShares(t *testing.T) { func Test_genOrigRowShares(t *testing.T) { txCount := 100 + squareSize := uint64(16) typicalBlockData := types.Data{ - Txs: generateRandomlySizedContiguousShares(txCount, 200), - Messages: generateRandomlySizedMessages(10, 1500), + Txs: generateRandomlySizedContiguousShares(txCount, 200), + Messages: generateRandomlySizedMessages(10, 1500), + OriginalSquareSize: squareSize, } - // compute the data availability header - allShares, _, err := typicalBlockData.ComputeShares(0) + allShares, _, err := typicalBlockData.ComputeShares(squareSize) require.NoError(t, err) rawShares := allShares.RawShares() - genShares := genOrigRowShares(typicalBlockData, 8, 0, 7) + genShares := genOrigRowShares(typicalBlockData, 0, 15) require.Equal(t, len(allShares), len(genShares)) assert.Equal(t, rawShares, genShares) @@ -196,7 +233,9 @@ func generateRandomlySizedMessages(count, maxMsgSize int) types.Messages { msgs = nil } - return types.Messages{MessagesList: msgs} + messages := types.Messages{MessagesList: msgs} + messages.SortMessages() + return messages } func generateRandomMessage(size int) types.Message { diff --git a/rpc/core/tx.go b/rpc/core/tx.go index 60b134211e..3c7ae86466 100644 --- a/rpc/core/tx.go +++ b/rpc/core/tx.go @@ -43,8 +43,7 @@ func Tx(ctx *rpctypes.Context, hash []byte, proveTx bool) (*ctypes.ResultTx, err txProof, err = prove.TxInclusion( consts.DefaultCodec(), block.Data, - uint(block.Data.OriginalSquareSize), - uint(r.Index), + uint64(r.Index), ) if err != nil { return nil, err @@ -128,7 +127,7 @@ func TxSearch( var proof types.TxProof if proveTx { block := env.BlockStore.LoadBlock(r.Height) - proof, err = prove.TxInclusion(consts.DefaultCodec(), block.Data, uint(block.Data.OriginalSquareSize), uint(r.Index)) + proof, err = prove.TxInclusion(consts.DefaultCodec(), block.Data, uint64(r.Index)) if err != nil { return nil, err } diff --git a/state/execution.go b/state/execution.go index 7d83504c25..92ec0629e1 100644 --- a/state/execution.go +++ b/state/execution.go @@ -161,9 +161,7 @@ func (blockExec *BlockExecutor) CreateProposalBlock( return state.MakeBlock( height, - newData.Txs, - newData.Evidence.Evidence, - newData.Messages.MessagesList, + newData, commit, proposerAddr, ) diff --git a/state/execution_test.go b/state/execution_test.go index a2e9e1a237..2299cf60d1 100644 --- a/state/execution_test.go +++ b/state/execution_test.go @@ -104,9 +104,7 @@ func TestBeginBlockValidators(t *testing.T) { // block for height 2 block, _ := state.MakeBlock( 2, - factory.MakeTenTxs(2), - nil, - nil, + factory.MakeData(factory.MakeTenTxs(2), nil, nil), lastCommit, state.Validators.GetProposer().Address, ) diff --git a/state/helpers_test.go b/state/helpers_test.go index 2232a71152..bf3d4e6d64 100644 --- a/state/helpers_test.go +++ b/state/helpers_test.go @@ -57,9 +57,7 @@ func makeAndApplyGoodBlock(state sm.State, height int64, lastCommit *types.Commi blockExec *sm.BlockExecutor, evidence []types.Evidence) (sm.State, types.BlockID, error) { block, _ := state.MakeBlock( height, - factory.MakeTenTxs(height), - evidence, - nil, + factory.MakeData(factory.MakeTenTxs(height), evidence, nil), lastCommit, proposerAddr, ) @@ -142,8 +140,7 @@ func makeState(nVals, height int) (sm.State, dbm.DB, map[string]types.PrivValida func makeBlock(state sm.State, height int64) *types.Block { block, _ := state.MakeBlock( height, - makeTxs(state.LastBlockHeight), - nil, nil, + factory.MakeData(makeTxs(state.LastBlockHeight), nil, nil), new(types.Commit), state.Validators.GetProposer().Address, ) diff --git a/state/state.go b/state/state.go index bd62698a89..a76c19734c 100644 --- a/state/state.go +++ b/state/state.go @@ -233,15 +233,13 @@ func FromProto(pb *tmstate.State) (*State, error) { //nolint:golint // track rounds, and hence does not know the correct proposer. TODO: fix this! func (state State) MakeBlock( height int64, - txs []types.Tx, - evidence []types.Evidence, - messages []types.Message, + data types.Data, commit *types.Commit, proposerAddress []byte, ) (*types.Block, *types.PartSet) { // Build base block with block data. - block := types.MakeBlock(height, txs, evidence, messages, commit) + block := types.MakeBlock(height, data, commit) // Set time. var timestamp time.Time diff --git a/state/test/factory/block.go b/state/test/factory/block.go index 2101a3af41..80808b5daa 100644 --- a/state/test/factory/block.go +++ b/state/test/factory/block.go @@ -40,15 +40,31 @@ func MakeBlocks(n int, state *sm.State, privVal types.PrivValidator) []*types.Bl func MakeBlock(state sm.State, height int64, c *types.Commit) *types.Block { block, _ := state.MakeBlock( height, - factory.MakeTenTxs(state.LastBlockHeight), - nil, - nil, + MakeData(factory.MakeTenTxs(state.LastBlockHeight), nil, nil), c, state.Validators.GetProposer().Address, ) return block } +func MakeData(txs []types.Tx, evd []types.Evidence, msgs []types.Message) types.Data { + return types.Data{ + Txs: txs, + Evidence: types.EvidenceData{ + Evidence: evd, + }, + Messages: types.Messages{ + MessagesList: msgs, + }, + } +} + +func MakeDataFromTxs(txs []types.Tx) types.Data { + return types.Data{ + Txs: txs, + } +} + func makeBlockAndPartSet(state sm.State, lastBlock *types.Block, lastBlockMeta *types.BlockMeta, privVal types.PrivValidator, height int64) (*types.Block, *types.PartSet) { @@ -64,7 +80,7 @@ func makeBlockAndPartSet(state sm.State, lastBlock *types.Block, lastBlockMeta * lastBlockMeta.BlockID, []types.CommitSig{vote.CommitSig()}) } - return state.MakeBlock(height, []types.Tx{}, nil, nil, lastCommit, state.Validators.GetProposer().Address) + return state.MakeBlock(height, MakeDataFromTxs([]types.Tx{}), lastCommit, state.Validators.GetProposer().Address) } func MakeVote( diff --git a/state/validation_test.go b/state/validation_test.go index 910d2586ae..0a64a8eed7 100644 --- a/state/validation_test.go +++ b/state/validation_test.go @@ -79,7 +79,7 @@ func TestValidateBlockHeader(t *testing.T) { Invalid blocks don't pass */ for _, tc := range testCases { - block, _ := state.MakeBlock(height, makeTxs(height), nil, nil, lastCommit, proposerAddr) + block, _ := state.MakeBlock(height, factory.MakeData(makeTxs(height), nil, nil), lastCommit, proposerAddr) tc.malleateBlock(block) err := blockExec.ValidateBlock(state, block) require.Error(t, err, tc.name) @@ -96,7 +96,7 @@ func TestValidateBlockHeader(t *testing.T) { nextHeight := validationTestsStopHeight block, _ := state.MakeBlock( nextHeight, - factory.MakeTenTxs(nextHeight), nil, nil, + factory.MakeData(factory.MakeTenTxs(nextHeight), nil, nil), lastCommit, state.Validators.GetProposer().Address, ) @@ -146,7 +146,12 @@ func TestValidateBlockCommit(t *testing.T) { state.LastBlockID, []types.CommitSig{wrongHeightVote.CommitSig()}, ) - block, _ := state.MakeBlock(height, factory.MakeTenTxs(height), nil, nil, wrongHeightCommit, proposerAddr) + block, _ := state.MakeBlock( + height, + factory.MakeData(factory.MakeTenTxs(height), nil, nil), + wrongHeightCommit, + proposerAddr, + ) err = blockExec.ValidateBlock(state, block) _, isErrInvalidCommitHeight := err.(types.ErrInvalidCommitHeight) require.True(t, isErrInvalidCommitHeight, "expected ErrInvalidCommitHeight at height %d but got: %v", height, err) @@ -154,7 +159,12 @@ func TestValidateBlockCommit(t *testing.T) { /* #2589: test len(block.LastCommit.Signatures) == state.LastValidators.Size() */ - block, _ = state.MakeBlock(height, factory.MakeTenTxs(height), nil, nil, wrongSigsCommit, proposerAddr) + block, _ = state.MakeBlock( + height, + factory.MakeData(factory.MakeTenTxs(height), nil, nil), + wrongSigsCommit, + proposerAddr, + ) err = blockExec.ValidateBlock(state, block) _, isErrInvalidCommitSignatures := err.(types.ErrInvalidCommitSignatures) require.True(t, isErrInvalidCommitSignatures, @@ -261,7 +271,12 @@ func TestValidateBlockEvidence(t *testing.T) { evidence = append(evidence, newEv) currentBytes += int64(len(newEv.Bytes())) } - block, _ := state.MakeBlock(height, factory.MakeTenTxs(height), evidence, nil, lastCommit, proposerAddr) + block, _ := state.MakeBlock( + height, + factory.MakeData(factory.MakeTenTxs(height), evidence, nil), + lastCommit, + proposerAddr, + ) err := blockExec.ValidateBlock(state, block) if assert.Error(t, err) { _, ok := err.(*types.ErrEvidenceOverflow) diff --git a/store/store_test.go b/store/store_test.go index 19fc530d7f..72c1039630 100644 --- a/store/store_test.go +++ b/store/store_test.go @@ -21,6 +21,7 @@ import ( tmstore "github.com/tendermint/tendermint/proto/tendermint/store" tmversion "github.com/tendermint/tendermint/proto/tendermint/version" sm "github.com/tendermint/tendermint/state" + "github.com/tendermint/tendermint/state/test/factory" "github.com/tendermint/tendermint/types" tmtime "github.com/tendermint/tendermint/types/time" "github.com/tendermint/tendermint/version" @@ -50,7 +51,12 @@ func makeTxs(height int64) (txs []types.Tx) { } func makeBlock(height int64, state sm.State, lastCommit *types.Commit) *types.Block { - block, _ := state.MakeBlock(height, makeTxs(height), nil, nil, lastCommit, state.Validators.GetProposer().Address) + block, _ := state.MakeBlock( + height, + factory.MakeDataFromTxs(makeTxs(height)), + lastCommit, + state.Validators.GetProposer().Address, + ) return block } diff --git a/test/factory/tx.go b/test/factory/tx.go index c97aeefc96..b6dc0b64d1 100644 --- a/test/factory/tx.go +++ b/test/factory/tx.go @@ -14,3 +14,15 @@ func MakeTxs(height int64, num int) (txs []types.Tx) { func MakeTenTxs(height int64) (txs []types.Tx) { return MakeTxs(height, 10) } + +func MakeData(txs []types.Tx, evd []types.Evidence, msgs []types.Message) types.Data { + return types.Data{ + Txs: txs, + Evidence: types.EvidenceData{ + Evidence: evd, + }, + Messages: types.Messages{ + MessagesList: msgs, + }, + } +} diff --git a/types/block.go b/types/block.go index 39019d6640..e774b1d7b2 100644 --- a/types/block.go +++ b/types/block.go @@ -321,18 +321,14 @@ func MaxDataBytesNoEvidence(maxBytes int64, valsCount int) int64 { // It populates the same set of fields validated by ValidateBasic. func MakeBlock( height int64, - txs []Tx, evidence []Evidence, messages []Message, + data Data, lastCommit *Commit) *Block { block := &Block{ Header: Header{ Version: tmversion.Consensus{Block: version.BlockProtocol, App: 0}, Height: height, }, - Data: Data{ - Txs: txs, - Evidence: EvidenceData{Evidence: evidence}, - Messages: Messages{MessagesList: messages}, - }, + Data: data, LastCommit: lastCommit, } block.fillHeader() @@ -1167,7 +1163,7 @@ type Messages struct { func (msgs Messages) SplitIntoShares() NamespacedShares { shares := make([]NamespacedShare, 0) - msgs.sortMessages() + msgs.SortMessages() for _, m := range msgs.MessagesList { rawData, err := m.MarshalDelimited() if err != nil { @@ -1178,7 +1174,7 @@ func (msgs Messages) SplitIntoShares() NamespacedShares { return shares } -func (msgs *Messages) sortMessages() { +func (msgs *Messages) SortMessages() { sort.Slice(msgs.MessagesList, func(i, j int) bool { return bytes.Compare(msgs.MessagesList[i].NamespaceID, msgs.MessagesList[j].NamespaceID) < 0 }) diff --git a/types/block_test.go b/types/block_test.go index ea9468eed0..f899fa0c02 100644 --- a/types/block_test.go +++ b/types/block_test.go @@ -45,7 +45,7 @@ func TestBlockAddEvidence(t *testing.T) { ev := NewMockDuplicateVoteEvidenceWithValidator(h, time.Now(), vals[0], "block-test-chain") evList := []Evidence{ev} - block := MakeBlock(h, txs, evList, nil, commit) + block := MakeBlock(h, makeData(txs, evList, nil), commit) require.NotNil(t, block) require.Equal(t, 1, len(block.Data.Evidence.Evidence)) require.NotNil(t, block.EvidenceHash) @@ -96,7 +96,7 @@ func TestBlockValidateBasic(t *testing.T) { tc := tc i := i t.Run(tc.testName, func(t *testing.T) { - block := MakeBlock(h, txs, evList, nil, commit) + block := MakeBlock(h, makeData(txs, evList, nil), commit) block.ProposerAddress = valSet.GetProposer().Address tc.malleateBlock(block) err = block.ValidateBasic() @@ -107,13 +107,13 @@ func TestBlockValidateBasic(t *testing.T) { func TestBlockHash(t *testing.T) { assert.Nil(t, (*Block)(nil).Hash()) - assert.Nil(t, MakeBlock(int64(3), []Tx{Tx("Hello World")}, nil, nil, nil).Hash()) + assert.Nil(t, MakeBlock(int64(3), makeData([]Tx{Tx("Hello World")}, nil, nil), nil).Hash()) } func TestBlockMakePartSet(t *testing.T) { assert.Nil(t, (*Block)(nil).MakePartSet(2)) - partSet := MakeBlock(int64(3), []Tx{Tx("Hello World")}, nil, nil, nil).MakePartSet(1024) + partSet := MakeBlock(int64(3), makeData([]Tx{Tx("Hello World")}, nil, nil), nil).MakePartSet(1024) assert.NotNil(t, partSet) assert.EqualValues(t, 1, partSet.Total()) } @@ -131,7 +131,7 @@ func TestBlockMakePartSetWithEvidence(t *testing.T) { ev := NewMockDuplicateVoteEvidenceWithValidator(h, time.Now(), vals[0], "block-test-chain") evList := []Evidence{ev} - partSet := MakeBlock(h, []Tx{Tx("Hello World")}, evList, nil, commit).MakePartSet(512) + partSet := MakeBlock(h, makeData([]Tx{Tx("Hello World")}, evList, nil), commit).MakePartSet(512) assert.NotNil(t, partSet) assert.EqualValues(t, 4, partSet.Total()) } @@ -148,7 +148,7 @@ func TestBlockHashesTo(t *testing.T) { ev := NewMockDuplicateVoteEvidenceWithValidator(h, time.Now(), vals[0], "block-test-chain") evList := []Evidence{ev} - block := MakeBlock(h, []Tx{Tx("Hello World")}, evList, nil, commit) + block := MakeBlock(h, makeData([]Tx{Tx("Hello World")}, evList, nil), commit) block.ValidatorsHash = valSet.Hash() assert.False(t, block.HashesTo([]byte{})) assert.False(t, block.HashesTo([]byte("something else"))) @@ -156,7 +156,7 @@ func TestBlockHashesTo(t *testing.T) { } func TestBlockSize(t *testing.T) { - size := MakeBlock(int64(3), []Tx{Tx("Hello World")}, nil, nil, nil).Size() + size := MakeBlock(int64(3), makeData([]Tx{Tx("Hello World")}, nil, nil), nil).Size() if size <= 0 { t.Fatal("Size of the block is zero or negative") } @@ -167,7 +167,7 @@ func TestBlockString(t *testing.T) { assert.Equal(t, "nil-Block", (*Block)(nil).StringIndented("")) assert.Equal(t, "nil-Block", (*Block)(nil).StringShort()) - block := MakeBlock(int64(3), []Tx{Tx("Hello World")}, nil, nil, nil) + block := MakeBlock(int64(3), makeData([]Tx{Tx("Hello World")}, nil, nil), nil) assert.NotEqual(t, "nil-Block", block.String()) assert.NotEqual(t, "nil-Block", block.StringIndented("")) assert.NotEqual(t, "nil-Block", block.StringShort()) @@ -620,16 +620,16 @@ func TestBlockIDValidateBasic(t *testing.T) { func TestBlockProtoBuf(t *testing.T) { h := tmrand.Int63() c1 := randCommit(time.Now()) - b1 := MakeBlock(h, []Tx{Tx([]byte{1})}, []Evidence{}, nil, &Commit{Signatures: []CommitSig{}}) + b1 := MakeBlock(h, makeData([]Tx{Tx([]byte{1})}, []Evidence{}, nil), &Commit{Signatures: []CommitSig{}}) b1.ProposerAddress = tmrand.Bytes(crypto.AddressSize) evidenceTime := time.Date(2019, 1, 1, 0, 0, 0, 0, time.UTC) evi := NewMockDuplicateVoteEvidence(h, evidenceTime, "block-test-chain") - b2 := MakeBlock(h, []Tx{Tx([]byte{1})}, []Evidence{evi}, nil, c1) + b2 := MakeBlock(h, makeData([]Tx{Tx([]byte{1})}, []Evidence{evi}, nil), c1) b2.ProposerAddress = tmrand.Bytes(crypto.AddressSize) b2.Data.Evidence.ByteSize() - b3 := MakeBlock(h, []Tx{}, []Evidence{}, nil, c1) + b3 := MakeBlock(h, makeData([]Tx{}, []Evidence{}, nil), c1) b3.ProposerAddress = tmrand.Bytes(crypto.AddressSize) testCases := []struct { msg string diff --git a/types/event_bus_test.go b/types/event_bus_test.go index 58630bf306..92387b9ef7 100644 --- a/types/event_bus_test.go +++ b/types/event_bus_test.go @@ -127,7 +127,7 @@ func TestEventBusPublishEventNewBlock(t *testing.T) { } }) - block := MakeBlock(0, []Tx{}, []Evidence{}, nil, nil) + block := MakeBlock(0, makeData([]Tx{}, []Evidence{}, nil), nil) // blockID := BlockID{Hash: block.Hash(), PartSetHeader: block.MakePartSet(BlockPartSizeBytes).Header()} resultBeginBlock := abci.ResponseBeginBlock{ Events: []abci.Event{ @@ -280,7 +280,7 @@ func TestEventBusPublishEventNewBlockHeader(t *testing.T) { } }) - block := MakeBlock(0, []Tx{}, []Evidence{}, nil, nil) + block := MakeBlock(0, makeData([]Tx{}, []Evidence{}, nil), nil) resultBeginBlock := abci.ResponseBeginBlock{ Events: []abci.Event{ {Type: "testType", Attributes: []abci.EventAttribute{{Key: []byte("baz"), Value: []byte("1")}}}, diff --git a/types/shares_test.go b/types/shares_test.go index d2f42b1565..7d8881413b 100644 --- a/types/shares_test.go +++ b/types/shares_test.go @@ -527,7 +527,7 @@ func generateRandomlySizedMessages(count, maxMsgSize int) Messages { } messages := Messages{MessagesList: msgs} - messages.sortMessages() + messages.SortMessages() return messages } diff --git a/types/test_util.go b/types/test_util.go index 367fd06317..eb3cdaec85 100644 --- a/types/test_util.go +++ b/types/test_util.go @@ -78,3 +78,15 @@ func MakeVote( return vote, nil } + +func makeData(txs []Tx, evd []Evidence, msgs []Message) Data { + return Data{ + Txs: txs, + Evidence: EvidenceData{ + Evidence: evd, + }, + Messages: Messages{ + MessagesList: msgs, + }, + } +} diff --git a/types/tx.go b/types/tx.go index 3b3955c2bd..22fc5048e3 100644 --- a/types/tx.go +++ b/types/tx.go @@ -148,6 +148,7 @@ func (tp TxProof) Validate() error { } func (tp *TxProof) VerifyProof() bool { + cursor := int32(0) for i, proof := range tp.Proofs { nmtProof := nmt.NewInclusionProof( int(proof.Start), @@ -155,15 +156,17 @@ func (tp *TxProof) VerifyProof() bool { proof.Nodes, true, ) + sharesUsed := proof.End - proof.Start valid := nmtProof.VerifyInclusion( consts.NewBaseHashFunc(), consts.TxNamespaceID, - tp.Data[i], + tp.Data[cursor:sharesUsed+cursor], tp.RowRoots[i], ) if !valid { return false } + cursor += sharesUsed } return true } diff --git a/types/tx_test.go b/types/tx_test.go index ade5b794ab..254b62e603 100644 --- a/types/tx_test.go +++ b/types/tx_test.go @@ -51,17 +51,25 @@ func TestUnwrapMalleatedTx(t *testing.T) { _, _, ok := UnwrapMalleatedTx(tx) require.False(t, ok) + data := Data{ + Txs: []Tx{tx}, + Evidence: EvidenceData{ + Evidence: nil, + }, + Messages: Messages{ + MessagesList: []Message{ + { + NamespaceID: []byte{1, 2, 3, 4, 5, 6, 7, 8}, + Data: []byte{1, 2, 3, 4, 5, 6, 7, 8, 9}, + }, + }, + }, + } + // create a proto message that used to be decoded when it shouldn't have randomBlock := MakeBlock( 1, - []Tx{tx}, - nil, - []Message{ - { - NamespaceID: []byte{1, 2, 3, 4, 5, 6, 7, 8}, - Data: []byte{1, 2, 3, 4, 5, 6, 7, 8, 9}, - }, - }, + data, &Commit{}, )