From a880c0055493d2811779faf2ff6a7f20559cf159 Mon Sep 17 00:00:00 2001 From: Ynon Flum Date: Tue, 28 Feb 2023 16:58:41 +0200 Subject: [PATCH] Merge master [don't auto-bump] --- Makefile | 13 ++-- bftengine/include/bftengine/CryptoManager.hpp | 6 +- .../bcstatetransfer/AsyncStateTransferCRE.hpp | 1 + bftengine/src/bcstatetransfer/BCStateTran.cpp | 4 +- bftengine/src/bftengine/CryptoManager.cpp | 3 - .../src/bftengine/KeyExchangeManager.cpp | 4 +- bftengine/src/bftengine/ReplicaBase.hpp | 12 ---- bftengine/src/bftengine/ReplicaImp.cpp | 2 +- bftengine/src/bftengine/ReplicaImp.hpp | 5 +- bftengine/src/bftengine/SigManager.cpp | 35 +++++----- bftengine/src/bftengine/SigManager.hpp | 2 +- .../ValidationOnlyIdentityManager.cpp | 1 + .../ValidationOnlyIdentityManager.hpp | 3 +- .../messages/PreProcessResultMsg_test.cpp | 1 + .../preprocessor/tests/preprocessor_test.cpp | 3 +- .../clientsManager/ClientsManager_test.cpp | 1 + bftengine/tests/messages/helper.cpp | 2 +- bftengine/tests/messages/helper.hpp | 2 +- .../threshsign/ThresholdSignaturesTypes.cpp | 2 +- tests/apollo/test_skvbc_dbsnapshot.py | 3 - tests/apollo/test_skvbc_reconfiguration.py | 15 ++-- tests/apollo/test_skvbc_state_transfer.py | 11 +-- tests/apollo/util/bft.py | 70 +++++++++++-------- tests/apollo/util/eliot_logging.py | 6 +- tests/apollo/util/skvbc.py | 8 ++- tests/apollo/util/test_base.py | 33 +++++---- 26 files changed, 128 insertions(+), 120 deletions(-) diff --git a/Makefile b/Makefile index 3565a06e86..6e12f8709f 100644 --- a/Makefile +++ b/Makefile @@ -168,6 +168,10 @@ CONCORD_BFT_ADDITIONAL_RUN_PARAMS+=\ --network host endif +CONCORD_BFT_ADDITIONAL_CTEST_RUN_PARAMS="--output-on-failure" +FAILED_CASES_FILE=/concord-bft/build/apollogs/latest/failed_cases.txt +PRINT_FAILED_APOLLO_CASES=([ -s ${FAILED_CASES_FILE} ] && echo Failed apollo cases: && cat -n ${FAILED_CASES_FILE} && exit 1) + ifneq (${APOLLO_LOG_STDOUT},) CONCORD_BFT_ADDITIONAL_RUN_PARAMS+=--env APOLLO_LOG_STDOUT=TRUE CONCORD_BFT_ADDITIONAL_CTEST_RUN_PARAMS+=-V @@ -189,8 +193,7 @@ BASIC_RUN_PARAMS?=-it --init --rm --privileged=true \ --name="${CONCORD_BFT_DOCKER_CONTAINER}" \ --workdir=${CONCORD_BFT_TARGET_SOURCE_PATH} \ --mount type=bind,source=${CURDIR},target=${CONCORD_BFT_TARGET_SOURCE_PATH}${CONCORD_BFT_CONTAINER_MOUNT_CONSISTENCY} \ - ${CONCORD_BFT_ADDITIONAL_RUN_PARAMS} \ - ${CONCORD_BFT_DOCKER_IMAGE_FULL_PATH} + ${CONCORD_BFT_ADDITIONAL_RUN_PARAMS} ${CONCORD_BFT_DOCKER_IMAGE_FULL_PATH} .DEFAULT_GOAL:=build @@ -336,7 +339,7 @@ test: ## Run all tests ${CONCORD_BFT_CONTAINER_SHELL} -c \ "mkdir -p ${CONCORD_BFT_CORE_DIR} && \ cd ${CONCORD_BFT_BUILD_DIR} && \ - ctest ${CONCORD_BFT_ADDITIONAL_CTEST_RUN_PARAMS} --timeout ${CONCORD_BFT_CTEST_TIMEOUT} --output-on-failure" + ctest ${CONCORD_BFT_ADDITIONAL_CTEST_RUN_PARAMS} --timeout ${CONCORD_BFT_CTEST_TIMEOUT} || ${PRINT_FAILED_APOLLO_CASES}" .PHONY: simple-test simple-test: ## Run Simple Test @@ -350,7 +353,7 @@ test-range: ## Run all tests in the range [START,END], inclusive: `make test-ran ${CONCORD_BFT_CONTAINER_SHELL} -c \ "mkdir -p ${CONCORD_BFT_CORE_DIR} && \ cd ${CONCORD_BFT_BUILD_DIR} && \ - ctest ${CONCORD_BFT_ADDITIONAL_CTEST_RUN_PARAMS} -I ${START},${END}" + ctest ${CONCORD_BFT_ADDITIONAL_CTEST_RUN_PARAMS} -I ${START},${END} || ${PRINT_FAILED_APOLLO_CASES}" # ctest allows repeating tests, but not with the exact needed behavior below. .PHONY: test-single-suite @@ -362,7 +365,7 @@ test-single-suite: ## Run a single test `make test-single-suite TEST_NAME= @@ -57,8 +56,6 @@ std::unique_ptr& CryptoManager::getLatestCryptoSystem() const { */ concord::crypto::SignatureAlgorithm CryptoManager::getLatestSignatureAlgorithm() const { const std::unordered_map typeToAlgorithm{ - {MULTISIG_BLS_SCHEME, concord::crypto::SignatureAlgorithm::BLS}, - {THRESHOLD_BLS_SCHEME, concord::crypto::SignatureAlgorithm::BLS}, {MULTISIG_EDDSA_SCHEME, concord::crypto::SignatureAlgorithm::EdDSA}, }; auto currentType = getLatestCryptoSystem()->getType(); diff --git a/bftengine/src/bftengine/KeyExchangeManager.cpp b/bftengine/src/bftengine/KeyExchangeManager.cpp index 77bca63157..3c4448ca29 100644 --- a/bftengine/src/bftengine/KeyExchangeManager.cpp +++ b/bftengine/src/bftengine/KeyExchangeManager.cpp @@ -89,9 +89,9 @@ void KeyExchangeManager::registerNewKeyPair(uint16_t repID, candidate_private_keys_.generated.clear(); // erasing seqnum from the map seq_candidate_map_.erase(sn); - ConcordAssert(private_keys_.key_data().generated.pub == kemsg.pubkey); + ConcordAssert(private_keys_.key_data().generated.pub == pubkey); private_keys_.onKeyExchange(cid, sn); - for (auto e : registryToExchange_) e->onPrivateKeyExchange(private_keys_.key_data().keys[sn], kemsg.pubkey, sn); + for (auto e : registryToExchange_) e->onPrivateKeyExchange(private_keys_.key_data().keys[sn], pubkey, sn); metrics_->self_key_exchange_counter++; } diff --git a/bftengine/src/bftengine/ReplicaBase.hpp b/bftengine/src/bftengine/ReplicaBase.hpp index 86cd8d0f81..4fb4406d24 100644 --- a/bftengine/src/bftengine/ReplicaBase.hpp +++ b/bftengine/src/bftengine/ReplicaBase.hpp @@ -99,18 +99,6 @@ class ReplicaBase { } } - /*bool validateMessage(MessageBase* msg) { - try { - if (config_.debugStatisticsEnabled) DebugStatistics::onReceivedExMessage(msg->type()); - - msg->validate(*repsInfo); - return true; - } catch (std::exception& e) { - onReportAboutInvalidMessage(msg, e.what()); - return false; - } - }*/ - protected: static const uint16_t ALL_OTHER_REPLICAS = UINT16_MAX; diff --git a/bftengine/src/bftengine/ReplicaImp.cpp b/bftengine/src/bftengine/ReplicaImp.cpp index 72de4e28dd..ca79da296e 100644 --- a/bftengine/src/bftengine/ReplicaImp.cpp +++ b/bftengine/src/bftengine/ReplicaImp.cpp @@ -4437,7 +4437,7 @@ ReplicaImp::ReplicaImp(bool firstTime, concord::crypto::KeyFormat::PemFormat, {{repsInfo->getIdOfOperator(), ReplicaConfig::instance().getOperatorPublicKey(), - concord::crypto::KeyFormat::PemFormat}} + concord::crypto::KeyFormat::PemFormat}}, *repsInfo); viewsManager = new ViewsManager(repsInfo); } else { diff --git a/bftengine/src/bftengine/ReplicaImp.hpp b/bftengine/src/bftengine/ReplicaImp.hpp index 3ff80a84e5..98d243a9fb 100644 --- a/bftengine/src/bftengine/ReplicaImp.hpp +++ b/bftengine/src/bftengine/ReplicaImp.hpp @@ -371,9 +371,6 @@ class ReplicaImp : public InternalReplicaApi, public ReplicaForStateTransfer { void recoverRequests(); - template - bool validateMessage(MessageType* msg); - std::function getMessageValidator(); // InternalReplicaApi @@ -633,7 +630,7 @@ class ReplicaImp : public InternalReplicaApi, public ReplicaForStateTransfer { void addTimers(); void startConsensusProcess(PrePrepareMsgUPtr pp, bool isCreatedEarlier); void startConsensusProcess(PrePrepareMsgUPtr pp); - void clearClientRequestQueue(); + size_t clearClientRequestQueue(); /** * Updates both seqNumInfo and slow_path metric * @param seqNumInfo diff --git a/bftengine/src/bftengine/SigManager.cpp b/bftengine/src/bftengine/SigManager.cpp index 955e3d7444..4e22bf787b 100644 --- a/bftengine/src/bftengine/SigManager.cpp +++ b/bftengine/src/bftengine/SigManager.cpp @@ -49,14 +49,15 @@ SigManager* SigManager::instance() { void SigManager::reset(std::shared_ptr other) { s_sm = other; } -std::shared_ptr SigManager::init(ReplicaId myId, - const Key& mySigPrivateKey, - const std::set>& publicKeysOfReplicas, - KeyFormat replicasKeysFormat, - const std::set>>* publicKeysOfClients, - KeyFormat clientsKeysFormat, - const std::optional>& operatorKey, - const ReplicasInfo& replicasInfo) { +std::shared_ptr SigManager::init( + ReplicaId myId, + const Key& mySigPrivateKey, + const std::set>& publicKeysOfReplicas, + KeyFormat replicasKeysFormat, + const std::set>>* publicKeysOfClients, + KeyFormat clientsKeysFormat, + const std::optional>& operatorKey, + const ReplicasInfo& replicasInfo) { vector> publickeys; map publicKeysMapping; size_t lowBound, highBound; @@ -102,14 +103,14 @@ std::shared_ptr SigManager::init(ReplicaId myId, } LOG_INFO(GL, "Done Compute Start ctor for SigManager with " << KVLOG(publickeys.size(), publicKeysMapping.size())); - auto ret = std::shared_ptr{new SigManager( - myId, - make_pair(mySigPrivateKey, replicasKeysFormat), - publickeys, - publicKeysMapping, - ((ReplicaConfig::instance().clientTransactionSigningEnabled) && (publicKeysOfClients != nullptr)), - operatorKey, - replicasInfo)}; + auto ret = std::shared_ptr{ + new SigManager(myId, + make_pair(mySigPrivateKey, replicasKeysFormat), + publickeys, + publicKeysMapping, + ((ReplicaConfig::instance().clientTransactionSigningEnabled) && (publicKeysOfClients != nullptr)), + operatorKey, + replicasInfo)}; reset(ret); return ret; @@ -120,7 +121,7 @@ SigManager::SigManager(PrincipalId myId, const vector>& publickeys, const map& publicKeysMapping, bool clientTransactionSigningEnabled, - const std::optional>& operatorKey + const std::optional>& operatorKey, const ReplicasInfo& replicasInfo) : myId_(myId), clientTransactionSigningEnabled_(clientTransactionSigningEnabled), diff --git a/bftengine/src/bftengine/SigManager.hpp b/bftengine/src/bftengine/SigManager.hpp index 28bf9d9105..89bdc9afdb 100644 --- a/bftengine/src/bftengine/SigManager.hpp +++ b/bftengine/src/bftengine/SigManager.hpp @@ -14,10 +14,10 @@ #include "PrimitiveTypes.hpp" #include "util/assertUtils.hpp" #include "util/Metrics.hpp" +#include "util/memory.hpp" #include "crypto/crypto.hpp" #include "crypto/signer.hpp" #include "crypto/verifier.hpp" -#include "memory.hpp" #include "SysConsts.hpp" #include #include diff --git a/bftengine/src/bftengine/ValidationOnlyIdentityManager.cpp b/bftengine/src/bftengine/ValidationOnlyIdentityManager.cpp index b8617382d9..cbec778b5e 100644 --- a/bftengine/src/bftengine/ValidationOnlyIdentityManager.cpp +++ b/bftengine/src/bftengine/ValidationOnlyIdentityManager.cpp @@ -24,6 +24,7 @@ ValidationOnlyIdentityManager::ValidationOnlyIdentityManager( publickeys, publicKeysMapping, false, + {}, replicasInfo) {} bool ValidationOnlyIdentityManager::verifySig( diff --git a/bftengine/src/bftengine/ValidationOnlyIdentityManager.hpp b/bftengine/src/bftengine/ValidationOnlyIdentityManager.hpp index 878251eefe..ace254a3e4 100644 --- a/bftengine/src/bftengine/ValidationOnlyIdentityManager.hpp +++ b/bftengine/src/bftengine/ValidationOnlyIdentityManager.hpp @@ -17,7 +17,8 @@ namespace bftEngine::impl { /* This class is a hack to enable the validation of replica signatures using fixed keys without initializing a CryptoManager object. Messages such as CheckpointMsg use a global singleton to expose their validation logic, - thus an instance of this class can be used to succeed in performing the validation. + thus an instance of this class can be registered as a global SigManager to succeed in performing CheckpointMsg + validations. */ class ValidationOnlyIdentityManager : public SigManager { public: diff --git a/bftengine/src/preprocessor/tests/messages/PreProcessResultMsg_test.cpp b/bftengine/src/preprocessor/tests/messages/PreProcessResultMsg_test.cpp index cd8fef6717..549e3d7340 100644 --- a/bftengine/src/preprocessor/tests/messages/PreProcessResultMsg_test.cpp +++ b/bftengine/src/preprocessor/tests/messages/PreProcessResultMsg_test.cpp @@ -58,6 +58,7 @@ std::shared_ptr createSigManagerWithTransactionSign replicasKeysFormat, publicKeysOfClients, concord::crypto::KeyFormat::HexaDecimalStrippedFormat, + {}, replicasInfo); } diff --git a/bftengine/src/preprocessor/tests/preprocessor_test.cpp b/bftengine/src/preprocessor/tests/preprocessor_test.cpp index 0ede21dddd..93c70f58a3 100644 --- a/bftengine/src/preprocessor/tests/preprocessor_test.cpp +++ b/bftengine/src/preprocessor/tests/preprocessor_test.cpp @@ -23,7 +23,7 @@ #include "ReplicaConfig.hpp" #include "IncomingMsgsStorageImp.hpp" #include "gtest/gtest.h" -#include "threshsign/eddsa/EdDSAMultisigFactory.h" +#include "crypto/threshsign/eddsa/EdDSAMultisigFactory.h" #include "CryptoManager.hpp" #include "tests/messages/helper.hpp" #include "tests/config/test_comm_config.hpp" @@ -216,6 +216,7 @@ void setUpConfiguration_4() { concord::crypto::KeyFormat::HexaDecimalStrippedFormat, nullptr, concord::crypto::KeyFormat::HexaDecimalStrippedFormat, + {}, *replicasInfo[i].get()); cryptoManager[i] = CryptoManager::init(std::make_unique(i, publicKeysVector, replicaPrivKeys.at(i))); diff --git a/bftengine/tests/clientsManager/ClientsManager_test.cpp b/bftengine/tests/clientsManager/ClientsManager_test.cpp index 58863a3c98..cb562a3653 100644 --- a/bftengine/tests/clientsManager/ClientsManager_test.cpp +++ b/bftengine/tests/clientsManager/ClientsManager_test.cpp @@ -151,6 +151,7 @@ static void resetSigManager() { kKeyFormatForTesting, &kInitialPublicKeysOfClientsForTesting, kKeyFormatForTesting, + {}, *sigManagerReplicasInfoForTesting); } diff --git a/bftengine/tests/messages/helper.cpp b/bftengine/tests/messages/helper.cpp index 0e01a16c40..d7e75f6f17 100644 --- a/bftengine/tests/messages/helper.cpp +++ b/bftengine/tests/messages/helper.cpp @@ -84,7 +84,7 @@ class TestSigManager : public bftEngine::impl::SigManager { const ReplicasInfo& replicasInfo, bool transactionSigningEnabled = false) : bftEngine::impl::SigManager( - myId, mySigPrivateKey, publickeys, publicKeysMapping, transactionSigningEnabled, replicasInfo) {} + myId, mySigPrivateKey, publickeys, publicKeysMapping, transactionSigningEnabled, {}, replicasInfo) {} static std::shared_ptr init(size_t myId, std::string& myPrivateKey, diff --git a/bftengine/tests/messages/helper.hpp b/bftengine/tests/messages/helper.hpp index 48eefcc25a..da31d405a9 100644 --- a/bftengine/tests/messages/helper.hpp +++ b/bftengine/tests/messages/helper.hpp @@ -26,7 +26,7 @@ #include "crypto/threshsign/IPublicKey.h" #include "CryptoManager.hpp" #include "SigManager.hpp" -#include "threshsign/eddsa/EdDSAMultisigFactory.h" +#include "crypto/threshsign/eddsa/EdDSAMultisigFactory.h" using bftEngine::impl::ReplicasInfo; diff --git a/libs/crypto/src/threshsign/ThresholdSignaturesTypes.cpp b/libs/crypto/src/threshsign/ThresholdSignaturesTypes.cpp index 57c64abe35..a10d06f28b 100644 --- a/libs/crypto/src/threshsign/ThresholdSignaturesTypes.cpp +++ b/libs/crypto/src/threshsign/ThresholdSignaturesTypes.cpp @@ -33,7 +33,7 @@ Cryptosystem::Cryptosystem(const std::string& sysType, subtype_(sysSubtype), numSigners_(sysNumSigners), threshold_(sysThreshold), - signerID_(NID), + signerID_(INVALID_SIGNER_ID), publicKey_("uninitialized") { if (!isValidCryptosystemSelection(sysType, sysSubtype, sysNumSigners, sysThreshold)) { throw std::runtime_error( diff --git a/tests/apollo/test_skvbc_dbsnapshot.py b/tests/apollo/test_skvbc_dbsnapshot.py index f4ee4bb67f..08501fb52a 100644 --- a/tests/apollo/test_skvbc_dbsnapshot.py +++ b/tests/apollo/test_skvbc_dbsnapshot.py @@ -595,9 +595,6 @@ async def test_db_checkpoint_creation_with_wedge(self, bft_network): op = operator.Operator( bft_network.config, client, bft_network.builddir) await op.wedge() - #from time import sleep - #sleep(1) - #return await bft_network.wait_for_stable_checkpoint( bft_network.all_replicas(), stable_seqnum = (checkpoint_before + 2) * 150) await self.validate_stop_on_wedge_point(bft_network, skvbc=skvbc, fullWedge=True) # verify that snapshot is created on wedge point diff --git a/tests/apollo/test_skvbc_reconfiguration.py b/tests/apollo/test_skvbc_reconfiguration.py index fd641058b0..6bd842a461 100644 --- a/tests/apollo/test_skvbc_reconfiguration.py +++ b/tests/apollo/test_skvbc_reconfiguration.py @@ -636,7 +636,7 @@ async def test_key_exchange(self, bft_network): client = bft_network.random_client() skvbc = kvbc.SimpleKVBCProtocol(bft_network) - await self.send_and_check_key_exchange(target_replica=0, bft_network=bft_network, client=client) + await self.send_and_wait_for_key_exchange_execution(target_replica=0, bft_network=bft_network, client=client) await skvbc.fill_and_wait_for_checkpoint(initial_nodes=bft_network.all_replicas(), num_of_checkpoints_to_add=3, @@ -667,7 +667,7 @@ async def test_key_exchange_with_restart(self, bft_network): num_of_checkpoints_to_add=2, verify_checkpoint_persistency=False) - await self.send_and_check_key_exchange(target_replica=1, bft_network=bft_network, client=client) + await self.send_and_wait_for_key_exchange_execution(target_replica=1, bft_network=bft_network, client=client) await skvbc.fill_and_wait_for_checkpoint(initial_nodes=bft_network.all_replicas(), num_of_checkpoints_to_add=2, @@ -714,7 +714,7 @@ async def test_key_exchange_with_file_backup(self, bft_network): num_of_checkpoints_to_add=2, verify_checkpoint_persistency=False) - await self.send_and_check_key_exchange(target_replica=1, bft_network=bft_network, client=client) + await self.send_and_wait_for_key_exchange_execution(target_replica=1, bft_network=bft_network, client=client) await skvbc.fill_and_wait_for_checkpoint(initial_nodes=bft_network.all_replicas(), num_of_checkpoints_to_add=2, @@ -722,7 +722,7 @@ async def test_key_exchange_with_file_backup(self, bft_network): #backup gen-sec.1 file copy2(bft_network.testdir + "/gen-sec.1" , bft_network.testdir + "/gen-sec.1.bak") - await self.send_and_check_key_exchange(target_replica=1, bft_network=bft_network, client=client) + await self.send_and_wait_for_key_exchange_execution(target_replica=1, bft_network=bft_network, client=client) bft_network.stop_replica(1) # restore gen-sec.1 file copy2(bft_network.testdir + "/gen-sec.1.bak" , bft_network.testdir + "/gen-sec.1") @@ -733,7 +733,7 @@ async def test_key_exchange_with_file_backup(self, bft_network): verify_checkpoint_persistency=False, assert_state_transfer_not_started=False) - async def send_and_check_key_exchange(self, target_replica, bft_network, client): + async def send_and_wait_for_key_exchange_execution(self, target_replica, bft_network, client): with log.start_action(action_type="send_and_check_key_exchange", target_replica=target_replica): sent_key_exchange_counter_before = 0 @@ -745,8 +745,7 @@ async def send_and_check_key_exchange(self, target_replica, bft_network, client) # public_key_exchange_for_peer_counter_before = await bft_network.metrics.get(0, *["KeyExchangeManager", "Counters", "public_key_exchange_for_peer"]) except: log.log_message(message_type=f"Replica {target_replica} was unable to query KeyExchangeMetrics, assuming zero") - - log.log_message(f"sending key exchange command to replica {target_replica}") + op = operator.Operator(bft_network.config, client, bft_network.builddir) await op.key_exchange([target_replica]) @@ -1736,7 +1735,7 @@ async def test_add_nodes_with_failures(self, bft_network): bft_network.restart_clients(generate_tx_signing_keys=True, restart_replicas=False) live_replicas = bft_network.all_replicas(without=replicas_for_st.union(crashed_replica)) - await bft_network.check_initital_key_exchange(stop_replicas=False, full_key_exchange=False, replicas_to_start=live_replicas) + await bft_network.check_initial_key_exchange(stop_replicas=False, full_key_exchange=False, replicas_to_start=live_replicas) await skvbc.send_n_kvs_sequentially(601, description=f'New epoch without {crashed_replica.union(replicas_for_st)}') bft_network.start_replicas(crashed_replica) await self.validate_initial_key_exchange(bft_network, crashed_replica, metrics_id="self_key_exchange", diff --git a/tests/apollo/test_skvbc_state_transfer.py b/tests/apollo/test_skvbc_state_transfer.py index 1795c21eef..4fd704d68d 100644 --- a/tests/apollo/test_skvbc_state_transfer.py +++ b/tests/apollo/test_skvbc_state_transfer.py @@ -343,13 +343,13 @@ async def test_state_transfer_rvt_root_validation_after_adding_blocks(self, bft_ op = operator.Operator(bft_network.config, client, bft_network.builddir) for i in range(6): - print(f'Iteration {i}') + log.log_message(message_type=f'Iteration {i}') await skvbc.fill_and_wait_for_checkpoint( bft_network.all_replicas(), num_of_checkpoints_to_add=2, verify_checkpoint_persistency=False, assert_state_transfer_not_started=False, - expected_starting_checkpoint=i * 2) + expected_starting_checkpoint_func=lambda cp: cp == i * 2) if i > 0 and i % 2 == 0: await op.latest_pruneable_block() @@ -359,16 +359,17 @@ async def test_state_transfer_rvt_root_validation_after_adding_blocks(self, bft_ for r in rsi_rep.values(): lpab = cmf_msgs.ReconfigurationResponse.deserialize(r)[0] latest_pruneable_blocks += [lpab.response] - print('Pruning...') + + log.log_message(message_type='Pruning...') await op.prune(latest_pruneable_blocks) await self.wait_for_pruning_to_complete(client, op, bft_network) restart = random.choice([0, 1]) if restart == 1: - print('Selecting a random replica to be restarted (the primary is excluded)...') + log.log_message(message_type='Selecting a random replica to be restarted (the primary is excluded)...') replica_to_restart = random.choice(bft_network.all_replicas(without={0})) - print(f'Replica {replica_to_restart} will be restarted.') + log.log_message(message_type=f'Replica {replica_to_restart} will be restarted.') bft_network.stop_replica(replica_to_restart, True) bft_network.start_replica(replica_to_restart) # Restarted replica should get enough time to load metric holding last-stored-checkpoint diff --git a/tests/apollo/util/bft.py b/tests/apollo/util/bft.py index c7e9585c6a..4373eae58e 100644 --- a/tests/apollo/util/bft.py +++ b/tests/apollo/util/bft.py @@ -30,11 +30,11 @@ import time from pathlib import Path from re import sub -from typing import Coroutine, Sequence, Callable, Optional +from typing import Coroutine, Sequence, Callable, Optional, Tuple, Dict, TextIO import re import trio -from util.test_base import repeat_test +from util.test_base import repeat_test, ApolloTest from util.consts import CHECKPOINT_SEQUENCES sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "pyclient")) @@ -209,13 +209,17 @@ async def wrapper(*args, **kwargs): start_replica_cmd=start_replica_cmd, stop_replica_cmd=None, num_ro_replicas=num_ro_replicas) - with test_instance.subTest(config=f'{bft_config}'): + subtest_id = None + with test_instance.subTest(config=f'{bft_config}', + storage=f"v{os.environ.get('BLOCKCHAIN_VERSION', default='v1')}"): + subtest_id = test_instance._subtest.id() + ApolloTest.FAILED_CASES[subtest_id] = 'Case aborted by an external signal' async with trio.open_nursery() as background_nursery: @repeat_test(num_repeats, break_on_first_failure, break_on_first_success, test_name) async def test_with_bft_network(): with BftTestNetwork.new(config, background_nursery, with_cre=with_cre, use_unified_certs=use_unified_certs) as bft_network: bft_network.current_test = test_name - seed = args[0].test_seed + seed = test_instance.test_seed with log.start_task(action_type=f"{async_fn.__name__}", bft_config=f'{bft_config}_clients={config.num_clients}', seed=seed, repeats=num_repeats, @@ -232,6 +236,9 @@ async def test_with_bft_network(): bft_network.test_start_time = time.time() await async_fn(*args, **kwargs, bft_network=bft_network) await test_with_bft_network() + ApolloTest.FAILED_CASES.pop(subtest_id) + test_instance.register_errors() + return wrapper return decorator @@ -301,7 +308,7 @@ def __init__(self, is_existing, origdir, config, testdir, certdir, builddir, too self.builddir = builddir self.toolsdir = toolsdir self.procs = procs - self.subproc_monitors = dict() + self.subproc_monitors: Dict[int, Tuple[Thread, Event, TextIO]] = dict() self.replicas = replicas # Make sure that client order is deterministic so that # seeded random client choices are deterministic @@ -313,7 +320,6 @@ def __init__(self, is_existing, origdir, config, testdir, certdir, builddir, too if client_factory: self.client_factory = client_factory else: - log.log_message(message_type=f"Client class is {BFT_CLIENT_TYPE}") self.client_factory = partial(self._create_new_client, BFT_CLIENT_TYPE) self.open_fds = {} self.current_test = "" @@ -338,8 +344,8 @@ def __init__(self, is_existing, origdir, config, testdir, certdir, builddir, too def new(cls, config, background_nursery, client_factory=None, with_cre=False, use_unified_certs=False): builddir = os.path.abspath("../../build") toolsdir = os.path.join(builddir, "tools") - testdir = tempfile.mkdtemp() - certdir = tempfile.mkdtemp() + testdir = tempfile.mkdtemp(prefix='testdir_') + certdir = tempfile.mkdtemp(prefix='certdir_') cls.assert_dirs_exist([testdir, certdir, builddir, toolsdir]) bft_network = cls( is_existing=False, @@ -591,12 +597,14 @@ def copy_certs_from_server_to_clients(self, src): def _create_clients(self): start_id = self.config.n + self.config.num_ro_replicas - for client_id in range(start_id, start_id + self.config.num_clients): + last_id = start_id + self.config.num_clients + log_message(message_type=f"Creating clients", first_client_id=start_id, last_client_id=last_id, + communication=str(BFT_CLIENT_TYPE), config_template=self._bft_config('client_id')) + for client_id in range(start_id, last_id): self.clients[client_id] = self.client_factory(client_id) def _create_new_client(self, client_class, client_id): config = self._bft_config(client_id) - log_message(message_type=f"Creating client {client_id}", config=str(config)) ro_replicas = [r.id for r in self.ro_replicas] return client_class(config, self.replicas, self.background_nursery, ro_replicas=ro_replicas) @@ -650,7 +658,7 @@ def setup_txn_signing(self): self.principals_mapping = "" self.principals_to_participant_map = {} if self.txn_signing_enabled: - self.txn_signing_keys_base_path = tempfile.mkdtemp() + self.txn_signing_keys_base_path = tempfile.mkdtemp(prefix='txn_signing_keys') self.principals_mapping, self.principals_to_participant_map = self.create_principals_mapping() self.generate_txn_signing_keys(self.txn_signing_keys_base_path) @@ -842,7 +850,7 @@ def start_replica(self, replica_id): if keep_logs: test_name = os.environ.get('TEST_NAME') - if os.environ.get('BLOCKCHAIN_VERSION', default="1").lower() == "4" : + if os.environ.get('BLOCKCHAIN_VERSION', default="1").lower() == "4": test_name = test_name + "_v4" test_name = test_name if test_name else f"{self.current_test}" @@ -901,12 +909,19 @@ def start_replica(self, replica_id): env=my_env ) + # If we run with some debug tool, let the module process the triggered proc process, and set the process info + # according to the returned value. Some debug tools spawn multiple processes. + if self.debug_tool and self.debug_tool.name: + self.procs[replica_id] = self.debug_tool.process_pids_after_replica_started(proc, replica_id) + else: + self.procs[replica_id] = proc + if keep_logs: - for other_id, other_tuple in self.subproc_monitors.items(): - other_file = other_tuple[2] - if not other_file.closed: - other_file.write(f"################### Apollo starting replica {replica_id}\n") - other_file.flush() + for _, __, other_replica_stdout_file in self.subproc_monitors.values(): + if not other_replica_stdout_file.closed: + other_replica_stdout_file.write( + f"################### Apollo starting replica {replica_id}\n") + other_replica_stdout_file.flush() stop_event = Event() thread = Thread(target=BftTestNetwork.monitor_replica_subproc, args=(self.procs[replica_id], replica_id, stop_event, stdout_file)) @@ -914,12 +929,6 @@ def start_replica(self, replica_id): thread.start() self.verify_matching_replica_client_communication(replica_test_log_path) - # If we run with some debug tool, let the module process the triggered proc process, and set the process info - # according to the returned value. Some debug tools spawn multiple processes. - if self.debug_tool and self.debug_tool.name: - self.procs[replica_id] = self.debug_tool.process_pids_after_replica_started(proc, replica_id) - else: - self.procs[replica_id] = proc replica_for_perf = os.environ.get('PERF_REPLICA', None) if self.test_start_time and replica_for_perf and int(replica_for_perf) == replica_id: @@ -1510,7 +1519,9 @@ async def rvt_root_value_to_be_returned(): return await self.wait_for(rvt_root_value_to_be_returned, timeout, .5) - async def wait_for_replicas_to_checkpoint(self, replica_ids, expected_checkpoint_num=None, timeout=30): + async def wait_for_replicas_to_checkpoint(self, replica_ids: Sequence[int], + expected_checkpoint_num: Callable[[int], bool] = lambda _: True, + timeout=30): """ Wait for every replica in `replicas` to take a checkpoint. Check every .5 seconds and give fail after 30 seconds. @@ -1521,19 +1532,16 @@ async def wait_for_replicas_to_checkpoint(self, replica_ids, expected_checkpoint for replica_id in replica_ids: nursery.start_soon(self.wait_for_checkpoint, replica_id, expected_checkpoint_num) - async def wait_for_checkpoint(self, replica_id, expected_checkpoint_num=None): + async def wait_for_checkpoint(self, replica_id: int, + expected_checkpoint_num: Callable[[int], bool] = lambda _: True): """ Wait for a single replica to reach the expected_checkpoint_num. If none is provided, return the last stored checkpoint. """ with log.start_action(action_type="wait_for_checkpoint", replica=replica_id) as action: - if expected_checkpoint_num is None: - expected_checkpoint_num = lambda _: True - async def expected_checkpoint_to_be_reached(): key = ['bc_state_transfer', 'Gauges', 'last_stored_checkpoint'] last_stored_checkpoint = await self.retrieve_metric(replica_id, *key) - if last_stored_checkpoint is not None and expected_checkpoint_num(last_stored_checkpoint): action.log(message_type=f'[checkpoint] #{last_stored_checkpoint} reached by replica=#{replica_id}') action.add_success_fields(last_stored_checkpoint=last_stored_checkpoint) @@ -1814,7 +1822,7 @@ async def check_initial_key_exchange(self, stop_replicas=True, full_key_exchange """ required_exchanges = self.config.n - 1 if full_key_exchange else 2 * self.config.f + self.config.c - with log.start_action(action_type="check_initital_key_exchange", required_exchanges=required_exchanges): + with log.start_action(action_type="check_initial_key_exchange", required_exchanges=required_exchanges): replicas_to_start = [r for r in range(self.config.n)] if replicas_to_start == [] else replicas_to_start self.start_replicas(replicas_to_start) num_of_exchanged_replicas = 0 @@ -2079,5 +2087,5 @@ def restore_form_older_db_snapshot(self, snapshot_id, src_replica, dest_replicas @staticmethod def assert_dirs_exist(dirs): - for dir in dirs: + for dir in map(os.path.abspath, dirs): assert os.path.isdir(dir), f"{dir} must exist!" \ No newline at end of file diff --git a/tests/apollo/util/eliot_logging.py b/tests/apollo/util/eliot_logging.py index 7d687a025e..c9e6433149 100644 --- a/tests/apollo/util/eliot_logging.py +++ b/tests/apollo/util/eliot_logging.py @@ -13,8 +13,7 @@ def logdir_timestamp(): timestamp_file = Path(f'../../build/timestamp') if not timestamp_file.exists(): - raise - print(f"Timestamp file {timestamp_file} doesn't exist") + print(f"Timestamp file {timestamp_file} doesn't exist", file=sys.stderr) return datetime.now().strftime("%y-%m-%d_%H-%M-%S") test_dir_name = timestamp_file.read_text().split('\n')[0] assert len(test_dir_name) > 0 @@ -35,6 +34,9 @@ def set_file_destination(): now = logdir_timestamp() test_name = f"apollo_run_{now}" + if os.environ.get('BLOCKCHAIN_VERSION', default="1").lower() == "4": + test_name = test_name + "_v4" + relative_apollo_logs = 'tests/apollo/logs' relative_current_run_logs = f'{relative_apollo_logs}/{logdir_timestamp()}' logs_dir = f'../../build/{relative_current_run_logs}' diff --git a/tests/apollo/util/skvbc.py b/tests/apollo/util/skvbc.py index 6ff8a55b9b..8951816574 100644 --- a/tests/apollo/util/skvbc.py +++ b/tests/apollo/util/skvbc.py @@ -242,7 +242,7 @@ async def start_replicas_and_write_with_multiple_clients( False, ExitPolicy.TIME) await self.network_wait_for_checkpoint( initial_nodes, - expected_checkpoint_num=None, + expected_checkpoint_num=lambda _: True, verify_checkpoint_persistency=persistency_enabled, assert_state_transfer_not_started=True) @@ -267,7 +267,7 @@ async def fill_and_wait_for_checkpoint( assert_state_transfer_not_started=True, without_clients=None, timeout=30, - expected_starting_checkpoint=None): + expected_starting_checkpoint_func=lambda _: True): """ A helper function used by tests to fill a window with data and then checkpoint it. @@ -277,7 +277,9 @@ async def fill_and_wait_for_checkpoint( TODO: Make filling concurrent to speed up tests """ - checkpoint_before = await self.bft_network.wait_for_checkpoint(replica_id=random.choice(initial_nodes), expected_checkpoint_num=lambda cp: cp == expected_starting_checkpoint if expected_starting_checkpoint else None) + checkpoint_before = await self.bft_network.wait_for_checkpoint( + replica_id=random.choice(initial_nodes), + expected_checkpoint_num=expected_starting_checkpoint_func) request_count = 1 + num_of_checkpoints_to_add * CHECKPOINT_SEQUENCES with log.start_action(action_type="fill_and_wait_for_checkpoint", checkpoint_before=checkpoint_before, request_count=request_count): diff --git a/tests/apollo/util/test_base.py b/tests/apollo/util/test_base.py index f449bf2b57..24ce5f10a2 100644 --- a/tests/apollo/util/test_base.py +++ b/tests/apollo/util/test_base.py @@ -14,50 +14,56 @@ import random import traceback import unittest + import util.eliot_logging as log from util.eliot_logging import logdir from functools import wraps import itertools import atexit +import sys from pathlib import Path -def write_failures_file(): +def report_cases_failures(): if ApolloTest.FAILED_CASES: Path(logdir()).absolute().mkdir(parents=True, exist_ok=True) with open(os.path.abspath(f'{logdir()}/failed_cases.txt'), 'a+') as failed_cases_file: for test_name, msg in ApolloTest.FAILED_CASES.items(): print(f'{test_name} - {msg}', file=failed_cases_file) - -atexit.register(write_failures_file) +atexit.register(report_cases_failures) class ApolloTest(unittest.TestCase): FAILED_CASES = dict() + _SEED = None - def tearDown(self): + def register_errors(self): if hasattr(self._outcome, 'errors'): result = self.defaultTestResult() self._feedErrorsToResult(result, self._outcome.errors) else: result = self._outcome.result - for errors in (result.errors, result.failures): - for test, text in errors: - if test.id() not in ApolloTest.FAILED_CASES: - # the full traceback is in the variable `text` - msg = [x for x in text.split('\n')[1:] - if not x.startswith(' ')][0] - ApolloTest.FAILED_CASES[test.id()] = msg + all_errors = result.errors + result.failures + all_errors = [test_case for test_case in all_errors if isinstance(test_case[0], unittest.TestCase) and + test_case[0].id() not in ApolloTest.FAILED_CASES] + for test_case, traceback_text in all_errors: + msg = [x for x in traceback_text.split('\n')[1:] + if not x.startswith(' ')][0] + ApolloTest.FAILED_CASES[test_case.id()] = msg @property def test_seed(self): return self._test_seed + @classmethod + def setUpClass(cls): + cls._SEED = os.getenv('APOLLO_SEED', random.randint(0, 1 << 32)) + random.seed(cls._SEED) + def setUp(self): - self._test_seed = os.getenv('APOLLO_SEED', random.randint(0, 1 << 32)) - random.seed(self._test_seed) + self._test_seed = ApolloTest._SEED def parameterize(**parameterize_kwargs): @@ -81,6 +87,7 @@ async def wrapper(*args, **kwargs): return decorator + def repeat_test(max_repeats: int, break_on_first_failure: bool, break_on_first_success: bool, test_name=None): """ Runs a test max_repeats times when both break_on_first_failure and break_on_first_success et to False.