From bfa805000aef5ff4116ad9c2f4c841d3e83e9657 Mon Sep 17 00:00:00 2001 From: Brian W Bush Date: Fri, 11 Oct 2024 16:57:50 -0600 Subject: [PATCH] First full draft of throughput cps. --- CPS-0THR/.obsidian/app.json | 4 + CPS-0THR/.obsidian/appearance.json | 3 + .../.obsidian/core-plugins-migration.json | 30 +++ CPS-0THR/.obsidian/core-plugins.json | 20 ++ CPS-0THR/.obsidian/workspace.json | 161 ++++++++++++++++ CPS-0THR/README.md | 122 ++++++++++++ CPS-0THR/images/block-run.svg | 175 ++++++++++++++++++ CPS-0THR/images/block-size-1blk.svg | 167 +++++++++++++++++ CPS-0THR/images/block-size-6hr.svg | 159 ++++++++++++++++ CPS-0THR/images/block-size-6min.svg | 158 ++++++++++++++++ CPS-0THR/images/block-size.svg | 72 +++++++ 11 files changed, 1071 insertions(+) create mode 100644 CPS-0THR/.obsidian/app.json create mode 100644 CPS-0THR/.obsidian/appearance.json create mode 100644 CPS-0THR/.obsidian/core-plugins-migration.json create mode 100644 CPS-0THR/.obsidian/core-plugins.json create mode 100644 CPS-0THR/.obsidian/workspace.json create mode 100644 CPS-0THR/README.md create mode 100644 CPS-0THR/images/block-run.svg create mode 100644 CPS-0THR/images/block-size-1blk.svg create mode 100644 CPS-0THR/images/block-size-6hr.svg create mode 100644 CPS-0THR/images/block-size-6min.svg create mode 100644 CPS-0THR/images/block-size.svg diff --git a/CPS-0THR/.obsidian/app.json b/CPS-0THR/.obsidian/app.json new file mode 100644 index 0000000000..d60be2985e --- /dev/null +++ b/CPS-0THR/.obsidian/app.json @@ -0,0 +1,4 @@ +{ + "readableLineLength": false, + "showInlineTitle": true +} \ No newline at end of file diff --git a/CPS-0THR/.obsidian/appearance.json b/CPS-0THR/.obsidian/appearance.json new file mode 100644 index 0000000000..c8c365d89b --- /dev/null +++ b/CPS-0THR/.obsidian/appearance.json @@ -0,0 +1,3 @@ +{ + "accentColor": "" +} \ No newline at end of file diff --git a/CPS-0THR/.obsidian/core-plugins-migration.json b/CPS-0THR/.obsidian/core-plugins-migration.json new file mode 100644 index 0000000000..436f43cf56 --- /dev/null +++ b/CPS-0THR/.obsidian/core-plugins-migration.json @@ -0,0 +1,30 @@ +{ + "file-explorer": true, + "global-search": true, + "switcher": true, + "graph": true, + "backlink": true, + "canvas": true, + "outgoing-link": true, + "tag-pane": true, + "properties": false, + "page-preview": true, + "daily-notes": true, + "templates": true, + "note-composer": true, + "command-palette": true, + "slash-command": false, + "editor-status": true, + "bookmarks": true, + "markdown-importer": false, + "zk-prefixer": false, + "random-note": false, + "outline": true, + "word-count": true, + "slides": false, + "audio-recorder": false, + "workspaces": false, + "file-recovery": true, + "publish": false, + "sync": false +} \ No newline at end of file diff --git a/CPS-0THR/.obsidian/core-plugins.json b/CPS-0THR/.obsidian/core-plugins.json new file mode 100644 index 0000000000..9405bfdc22 --- /dev/null +++ b/CPS-0THR/.obsidian/core-plugins.json @@ -0,0 +1,20 @@ +[ + "file-explorer", + "global-search", + "switcher", + "graph", + "backlink", + "canvas", + "outgoing-link", + "tag-pane", + "page-preview", + "daily-notes", + "templates", + "note-composer", + "command-palette", + "editor-status", + "bookmarks", + "outline", + "word-count", + "file-recovery" +] \ No newline at end of file diff --git a/CPS-0THR/.obsidian/workspace.json b/CPS-0THR/.obsidian/workspace.json new file mode 100644 index 0000000000..42cac7358c --- /dev/null +++ b/CPS-0THR/.obsidian/workspace.json @@ -0,0 +1,161 @@ +{ + "main": { + "id": "9ad733bf9d40292a", + "type": "split", + "children": [ + { + "id": "bb4c19fff6b445a7", + "type": "tabs", + "children": [ + { + "id": "ac95d6fb4b91c182", + "type": "leaf", + "state": { + "type": "markdown", + "state": { + "file": "README.md", + "mode": "preview", + "source": false + } + } + } + ] + } + ], + "direction": "vertical" + }, + "left": { + "id": "42fbb70d97ae7ece", + "type": "split", + "children": [ + { + "id": "9b1e9aff63612985", + "type": "tabs", + "children": [ + { + "id": "2fb879ee63e10993", + "type": "leaf", + "state": { + "type": "file-explorer", + "state": { + "sortOrder": "alphabetical" + } + } + }, + { + "id": "ebf78146969b3b77", + "type": "leaf", + "state": { + "type": "search", + "state": { + "query": "", + "matchingCase": false, + "explainSearch": false, + "collapseAll": false, + "extraContext": false, + "sortOrder": "alphabetical" + } + } + }, + { + "id": "60dabc9a555b1fdb", + "type": "leaf", + "state": { + "type": "bookmarks", + "state": {} + } + } + ] + } + ], + "direction": "horizontal", + "width": 300, + "collapsed": true + }, + "right": { + "id": "26dcbb9d83205ff9", + "type": "split", + "children": [ + { + "id": "dedf04c63de7f0f2", + "type": "tabs", + "children": [ + { + "id": "d2f6819e85d58d71", + "type": "leaf", + "state": { + "type": "backlink", + "state": { + "file": "README.md", + "collapseAll": false, + "extraContext": false, + "sortOrder": "alphabetical", + "showSearch": false, + "searchQuery": "", + "backlinkCollapsed": false, + "unlinkedCollapsed": true + } + } + }, + { + "id": "c9ae1bb55a5727d5", + "type": "leaf", + "state": { + "type": "outgoing-link", + "state": { + "file": "README.md", + "linksCollapsed": false, + "unlinkedCollapsed": true + } + } + }, + { + "id": "e6119c8364900217", + "type": "leaf", + "state": { + "type": "tag", + "state": { + "sortOrder": "frequency", + "useHierarchy": true + } + } + }, + { + "id": "70bcc48cad065d66", + "type": "leaf", + "state": { + "type": "outline", + "state": { + "file": "README.md" + } + } + } + ] + } + ], + "direction": "horizontal", + "width": 300, + "collapsed": true + }, + "left-ribbon": { + "hiddenItems": { + "switcher:Open quick switcher": false, + "graph:Open graph view": false, + "canvas:Create new canvas": false, + "daily-notes:Open today's daily note": false, + "templates:Insert template": false, + "command-palette:Open command palette": false + } + }, + "active": "ac95d6fb4b91c182", + "lastOpenFiles": [ + "README.md~", + "images/block-run.svg", + "images/block-size.svg", + "images/block-size-6min.svg", + "images/block-size-6hr.svg", + "images/block-size-1blk.svg", + "images", + "README.md" + ] +} \ No newline at end of file diff --git a/CPS-0THR/README.md b/CPS-0THR/README.md new file mode 100644 index 0000000000..05a7021805 --- /dev/null +++ b/CPS-0THR/README.md @@ -0,0 +1,122 @@ +--- +CPS: 0THR +Title: Greater Transaction Throughput +Category: Consensus +Status: Open +Authors: + - Arnaud Bailly + - Brian W. Bush + - Hans Lahe +Implementors: Intersect +Discussions: +Created: 2024-10-11 +License: Apache-2.0 +--- + +# CIP-???? Greater Transaction Throughput + +> [!NOTE] +> The structure of a CPS file is summarized in the table below: +> +> | Name | Description | +> | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +> | Preamble | Headers containing metadata about the CPS. | +> | Abstract | A short (\~200 word) description of the target goals and the technical obstacles to those goals. | +> | Problem | A more detailed description of the problem and its context. This section should explain what motivates the writing of the CPS document. | +> | Use cases | A concrete set of examples written from a user's perspective, describing what and why they are trying to do. When they exist, this section should give a sense of the current alternatives and highlight why they are unsuitable. | +> | Goals | A list of goals and non-goals a project is pursuing, ranked by importance. These goals should help understand the design space for the solution and what the underlying project is ultimately trying to achieve.

Goals may also contain requirements for the project. For example, they may include anything from a deadline to a budget (in terms of complexity or time) to security concerns.

Finally, goals may also serve as evaluation metrics to assess how good a proposed solution is. | +> | Open Questions | A set of questions to which any proposed solution should find an answer. Questions should help guide solutions design by highlighting some foreseen vulnerabilities or design flaws. Solutions in the form of CIP should thereby include these questions as part of their _'Rationale'_ section and provide an argued answer to each. | +> | _optional sections_ | If necessary, these sections may also be included in any order:
**References**
**Appendices**
**Acknowledgements**
Do not add material in an optional section if it pertains to one of the standard sections. | +> | Copyright | The CPS must be explicitly licensed under acceptable copyright terms. | + +## Abstract + +The Cardano mainnet occasionally experiences congestion where there are too many transactions in the memory pool to be included in the next block or in the next few blocks. Sometimes the block utilization peaks above 90% for an extended period of time. This not only impacts general user experience but it can also severely impact use cases such as airdrops, oracles, partner chains, DEXes, and Dapps. Emerging use cases and application deployments promise to accelerate the need for high throughput on Cardano. Applied research on several fronts is needed to propose and provide evidence for techniques that increase throughput measured in terms of transactions, transaction size, and script execution units. Such work should be based on a clear understanding of stakeholder requirements. + +## Motivation + +The Cardano mainnet occasionally encounters periods of congestion, where the number of pending transactions in the memory pool exceeds the network's capacity to include them in the upcoming block or even the next several blocks. During these times, block utilization can consistently peak above 90%, sometimes for an extended duration. This high level of congestion not only degrades the general user experience by causing delays in transaction processing but also poses significant challenges for specific use cases. For instance, activities such as airdrops, which require efficient processing of large numbers of transactions, can be significantly hampered. Similarly, oracles that depend on timely data updates might face disruptions, while partner chains could experience slower cross-chain interactions. The impact is also felt by decentralized exchanges (DEXes) that need fast transaction confirmations to maintain liquidity and decentralized applications (DApps) whose performance and user interactions are affected. + +Moreover, the ongoing evolution of the Cardano ecosystem is expected to amplify these demands. New and emerging use cases, along with an increasing number of application deployments, are likely to accelerate the need for higher throughput and improved scalability on the network. As the ecosystem expands to support more diverse and sophisticated use cases, such as real-time financial applications, gaming, or supply chain solutions, the pressure on Cardano's infrastructure to handle larger transaction volumes efficiently will continue to grow. Addressing these scaling challenges will be essential to ensure a seamless experience for users and to maintain Cardano’s competitive position in the rapidly evolving blockchain space. + +Cardano's current throughput (measured both in data rate and available script execution units) is usually (but not universally) adequate for the current demand. There is also some protocol-parameter opportunity to increase the block sizes and script execution limits to meet emerging future demands for increased network capacity. There are however fundamental limits to how far the block size and the script execution budget can be pushed, while maintaining system security. + +In Ouroboros Praos, maintaining the security of the system requires that blocks be distributed reliably across the network within a specified time frame, known as $\Delta$, which is set at five seconds on the Cardano mainnet. The process of relaying blocks is inherently sequential: blocks are transmitted from one block producer node to the next through a series of intermediary relay nodes. The time required for this process depends on the number of network hops between consecutive block producers and the network latency associated with each hop, considering that these hops often span the entire globe. Since this entire operation must consistently be completed within a five-second window, it imposes strict limitations on the maximum block size and the amount of time available for validating transactions and scripts. + +To significantly scale beyond these limitations, fundamental changes to the overall blockchain algorithm are necessary. The potential for scaling is substantial, as the network and computational resources of most nodes are largely underutilized, remaining almost idle for much of the time. By adopting a different algorithm, these resources could be leveraged more effectively to increase the total bandwidth of the blockchain. Such improvements could enable the system to handle a higher volume of transactions while maintaining security and efficiency, addressing current limitations and unlocking new levels of scalability for the Cardano network. As the blockchain continues to evolve, optimizing the utilization of network and computational resources will be crucial to supporting future growth and expanding the capabilities of the platform. + +Additionally, certain applications demand predictability or specific quality-of-service guarantees to function optimally. These applications might not necessarily need high levels of sustained throughput, but they are particularly sensitive to fluctuations in how quickly a transaction can be processed and included in a block after entering the memory pool. In such cases, even small delays or variances in the time it takes for a transaction to move from the memory pool into a confirmed block can significantly impact the performance, reliability, and user experience associated with these applications. + +For example, in financial services, delays in processing transactions could disrupt trading activities, arbitrage opportunities, or other time-sensitive financial operations where precise timing is critical. Similarly, gaming applications or real-time auctions require transactions to be confirmed quickly to maintain a seamless user experience or to uphold the integrity of the bidding process. Predictable block times are also important for supply chain applications, where time-sensitive tracking and updates must be performed in real-time to reflect changes in inventory or shipments. + +Quality-of-service guarantees can also be crucial for smart contracts that rely on external data feeds (oracles). These contracts might need a high degree of predictability in transaction processing to ensure that data updates happen within specific timeframes, thereby maintaining the accuracy of the contract's execution. The lack of consistency in transaction inclusion times could lead to issues such as missed deadlines, inconsistent states, or degraded performance for automated processes. + +Thus, while the need for raw throughput is one aspect of blockchain performance, the ability to ensure a predictable and stable processing time for transactions is equally important for many applications. Addressing this challenge involves optimizing the underlying network protocol, enhancing transaction prioritization mechanisms, or implementing features that can deliver the necessary guarantees for latency-sensitive use cases. As more sophisticated applications continue to emerge on blockchain platforms, meeting these requirements will be essential to ensuring that the technology can support a diverse range of real-world use cases effectively. + +## Problem + +Historical data indicates that an appreciable fraction of the blocks on the Cardano mainnet have been nearly full and that periods of high utilization can last for minutes. In such situations it is likely that additional transactions queue up in the nodes' memory pool, awaiting inclusion in a future block. Needless to say, block congestion correlates directly with transaction throughput. With the current average block-production rate of one per twenty seconds, that queuing can translate into unacceptably long waits for a transaction to be included in a block and receive its first confirmation. Even without the additional demand anticipated when new projects come online in the future, there sometimes are periods where user experience is degraded by limited throughput. + +The plots below illustrate the significant frequency of blocks that are nearly full. (The maximum block size since Epoch 335 has been 90,112 bytes.) The six-minute average block size also indicates the presence of full blocks, but the block-size limit does not appear significant in six-hour average. On the epoch-average level there are periodic peaks in block size. Note that when interpreting these diagrams, it is important to consider that transactions will not exactly fill a block. Also, some blocks hit their limit on Plutus execution cost and memory before well below the maximum-size limit: they are, nevertheless, fully utilized. + +| | | +| ---------------------------------------------------------- | ---------------------------------------------------------- | +| ![Distribution of block sizes](images/block-size-1blk.svg) | ![Distribution of block sizes](images/block-size-6min.svg) | +| ![Distribution of block sizes](images/block-size-6hr.svg) | ![Distribution of block sizes](images/block-size.svg) | + +> [!NOTE] +> It's quite a bit more work to repeat these plots for Plutus execution units, but that might be motivating. + +Of particular interest is the following plot that shows the distribution of the length of runs of consecutive blocks that are all larger than 80 kB. Occasionally, there are stretches of more than ten blocks being almost full, and in one case there was a series of 194 almost-full blocks. These long periods of nearly full blocks may be correlated with long waits between the time a user submits a transaction to the memory pool and the time it appears in a block. + +![Runs of nearly-full blocks](images/block-run.svg) + +> [!NOTE] +> The log scale on the plot above hides the rare cases in the tail. Consider using a square-root or other custom scale so that viewers can see the whole dynamic range of the data. + +## Use cases + +Even with the existing rate of transactions on the Cardano mainnet, there are periods where throughput-limits delay the inclusion of transactions in blocks and hamper settlement. Growing and emerging use cases will exacerbate the situation. + +- Time-sensitive applications like DEXes and Dapps require prompt inclusion of their transactions on the blockchain, and any delay also translates to a delay in settlement. See also [CPS-???? Faster Settlement](https://github.com/cardano-foundation/CIPs/pull/922). +- Newly released high-profile Cardano applications tend to create congestion as many users experiment and transact with the new capabilities shortly after they become available. Greater transaction throughput will improve the initial experience of new users of those applications, and some of those new users may be new to Cardano. *First impressions are important.* +- Partner chains, bridges, and oracles rely on quality of service guarantees that support a regular and predictable rhythm of their transactions being included in blocks. Delays in such transactions' inclusion in blocks can cascade to Dapps that interact with such services. Delays on oracles result in stale data being provided to Dapps or in Dapps having to wait for the updated oracle state to be posted. Delays on partner chains or bridges result in bottlenecks in the transfer of funds or information between chains. +- Transaction "scoopers" and "batchifiers" work most efficiently when high throughput is possible. +- Air drops are well known to have caused spikes in network load and block utilization. +- Any of the above use cases that also involve executing Plutus scripts add an additional requirement of execution-unit throughput in addition to transaction-size throughput. Applications that do complex validation encounter this extra dimension of resource usage. + +> [!NOTE] +> Specific existing or anticipated projects might want to chime in here with the details of their use cases and with metrics for desired throughput. + +## Goals + +1. Develop precise requirements for transaction and script-execution throughput for Cardano mainnet, categorized by use case and metrics for quality of service. +2. Increase transaction throughput in terms of number, size, and execution units and provide evidence that the proposed techniques meet stakeholder requirements. +3. Investigate and semi-quantitatively compare throughput techniques such as input endorsers, zero-knowledge technologies, transaction prioritization, offloading work (Mithril, partner chains, etc.), and protocol-parameter changes. +4. Propose methods for guaranteeing specific levels of throughput, including priority tiers and reservations. + +In addition to the goals above, it is advisable to avoid the following: + +1. Avoid approaches with long development timelines or high opportunity costs. +2. Do not weaken Ouroboros security or substantially enlarge its attack surface. +3. Minimize changes that increase the resource usages of Cardano nodes or the cost of operating them. +4. Guard against protocol alterations that adversely impact other scaling metrics such as settlement time. + +## Open questions + +- How much larger can existing Ouroboros Praos blocks be made without affecting Cardano mainnet safety or performance? +- How much can the block-production rate (the active-slot coefficient) be increased without affecting Cardano mainnet safety or performance? +- What fraction of theoretical global network bandwidth can techniques like input endorsers efficiently utilize? +- Are zero-knowledge techniques a viable option for increasing transaction throughput? +- How much will implementing greater transaction throughput impact the hardware requirements for and the cost of operating a Cardano stakepool? +- Will changes to the memory pool be necessary to support transaction throughput? +- Will increasing throughput adversely affect other performance metrics such as settlement time? +- Will higher throughput open Cardano to a broader spectrum of denial-of-service and other attacks? +- To what extent is the Plutus execution budget for blocks a more limiting constraint than the size budget for blocks? What statistics support this? What types of applications hit this constraint, and how often? +- Can high-throughput solutions simplify the operation of transactions scoopers and batchifiers? +- Does [Ouroboros Leios](https://iohk.io/en/research/library/papers/high-throughput-blockchain-consensus-under-realistic-network-assumptions/) satisfy stakeholder requirements for greater throughput? Would simpler solutions be adequate in the short term? +- How much can pay-for-priority schemes alleviation throughput concerns for high-value applications that are particularly sensitive to it? + +## Copyright + +This CIP is licensed under [Apache-2.0](http://www.apache.org/licenses/LICENSE-2.0). diff --git a/CPS-0THR/images/block-run.svg b/CPS-0THR/images/block-run.svg new file mode 100644 index 0000000000..51cce0301a --- /dev/null +++ b/CPS-0THR/images/block-run.svg @@ -0,0 +1,175 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +1e+01 +1e+03 +1e+05 + + + + + + + + +0 +50 +100 +150 +200 +Consecutive Blocks >= 80 kB +Number of Occurrences +Runs of Nearly-Full Blocks (since Epoch 335) + + diff --git a/CPS-0THR/images/block-size-1blk.svg b/CPS-0THR/images/block-size-1blk.svg new file mode 100644 index 0000000000..b2d06f1561 --- /dev/null +++ b/CPS-0THR/images/block-size-1blk.svg @@ -0,0 +1,167 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +50000 +100000 +150000 +200000 + + + + + + + + + +0 +25000 +50000 +75000 +Size [B] +Number of Blocks +Distribution of Block Size (since Epoch 335) + + diff --git a/CPS-0THR/images/block-size-6hr.svg b/CPS-0THR/images/block-size-6hr.svg new file mode 100644 index 0000000000..fd3fa39788 --- /dev/null +++ b/CPS-0THR/images/block-size-6hr.svg @@ -0,0 +1,159 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +25 +50 +75 +100 +125 + + + + + + + + + +25000 +50000 +75000 +Mean Size [B] +Number of Blocks +Distribution of Six-Hourly Mean Block Size (since Epoch 335) + + diff --git a/CPS-0THR/images/block-size-6min.svg b/CPS-0THR/images/block-size-6min.svg new file mode 100644 index 0000000000..4771764534 --- /dev/null +++ b/CPS-0THR/images/block-size-6min.svg @@ -0,0 +1,158 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +2000 +4000 + + + + + + + +0 +25000 +50000 +75000 +Mean Size [B] +Number of Blocks +Distribution of Six-Minute Mean Block Size (since Epoch 335) + + diff --git a/CPS-0THR/images/block-size.svg b/CPS-0THR/images/block-size.svg new file mode 100644 index 0000000000..d8b6d7a795 --- /dev/null +++ b/CPS-0THR/images/block-size.svg @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +25000 +50000 +75000 + + + + + + + + +350 +400 +450 +500 +Epoch No +Mean Size [B] +Epoch-Mean Block Size (since Epoch 335) + +