From 15f934270a204f90ca142aad656a9bd7be70e9de Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 3 Feb 2025 21:21:25 +0100 Subject: [PATCH 01/26] Make sure to set valid and collectionID for all links Feature parity with other collections --- include/podio/detail/LinkCollectionImpl.h | 4 ++++ tests/unittests/links.cpp | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/include/podio/detail/LinkCollectionImpl.h b/include/podio/detail/LinkCollectionImpl.h index c4b9c20ce..559d63ea4 100644 --- a/include/podio/detail/LinkCollectionImpl.h +++ b/include/podio/detail/LinkCollectionImpl.h @@ -210,6 +210,10 @@ class LinkCollection : public podio::CollectionBase { void setID(unsigned id) override { m_collectionID = id; + if (!m_isSubsetColl) { + std::ranges::for_each(m_storage.entries, [id](auto* obj) { obj->id = {obj->id.index, id}; }); + } + m_isValid = true; } unsigned getID() const override { diff --git a/tests/unittests/links.cpp b/tests/unittests/links.cpp index eb80b076f..0bb939731 100644 --- a/tests/unittests/links.cpp +++ b/tests/unittests/links.cpp @@ -318,6 +318,15 @@ TEST_CASE("LinkCollection subset collection", "[links][subset-colls]") { TEST_CASE("LinkCollection basics", "[links]") { REQUIRE(podio::detail::linkCollTypeName() == "podio::LinkCollection"); + + auto links = TestLColl{}; + auto link = links.create(); + REQUIRE(link.id().collectionID == 0); + + links.setID(42); + for (auto l : links) { + REQUIRE(l.id().collectionID == 42); + } } auto createLinkCollections(const size_t nElements = 3u) { From 33f12d835f8f12b10298f5767f4ccad1e6561468 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 11 Oct 2023 16:52:48 +0200 Subject: [PATCH 02/26] Add test case for checking whether reading limited set works --- tests/root_io/read_frame_root.cpp | 43 ++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/tests/root_io/read_frame_root.cpp b/tests/root_io/read_frame_root.cpp index 40f8cdccf..be26fb919 100644 --- a/tests/root_io/read_frame_root.cpp +++ b/tests/root_io/read_frame_root.cpp @@ -1,10 +1,51 @@ #include "read_frame.h" #include "read_frame_auxiliary.h" +#include "podio/Frame.h" #include "podio/ROOTReader.h" #include +#include #include +#include + +int test_read_frame_limited(const std::string& inputFile) { + auto reader = podio::ROOTReader(); + reader.openFile(inputFile); + const std::vector collsToRead = {"mcparticles", "clusters"}; + + const auto event = podio::Frame(reader.readNextEntry("events", collsToRead)); + + const auto& availColls = event.getAvailableCollections(); + + const bool validColls = + std::set(availColls.begin(), availColls.end()) != std::set(collsToRead.begin(), collsToRead.end()); + + if (validColls) { + std::cerr << "The available collections are not as expected" << std::endl; + return 1; + } + + if (!event.get("mcparticles")) { + std::cerr << "Collection 'mcparticles' should be available" << std::endl; + return 1; + } + + if (event.get("hits")) { + std::cerr << "Collection 'hits' is available, but should not be" << std::endl; + return 1; + } + + const auto& clusters = event.get("clusters"); + const auto clu0 = clusters[0]; + const auto hits = clu0.Hits(); + if (hits.size() != 1 || hits[0].isAvailable()) { + std::cerr << "Hit in clusters are available but shouldn't be" << std::endl; + return 1; + } + + return 0; +} int main(int argc, char* argv[]) { std::string inputFile = "example_frame.root"; @@ -19,5 +60,5 @@ int main(int argc, char* argv[]) { } return read_frames(inputFile, assertBuildVersion) + - test_frame_aux_info(inputFile); + test_frame_aux_info(inputFile) + test_read_frame_limited(inputFile); } From 933cd74ea05a69f07ecd645e9b9e05cc3e43eca6 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 6 Jan 2025 19:41:37 +0100 Subject: [PATCH 03/26] Also check auxiliary information for RNTuple --- tests/root_io/read_rntuple.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/root_io/read_rntuple.cpp b/tests/root_io/read_rntuple.cpp index ddd98202b..e730f7092 100644 --- a/tests/root_io/read_rntuple.cpp +++ b/tests/root_io/read_rntuple.cpp @@ -1,6 +1,10 @@ -#include "podio/RNTupleReader.h" #include "read_frame.h" +#include "read_frame_auxiliary.h" + +#include "podio/RNTupleReader.h" int main() { - return read_frames("example_rntuple.root"); + const std::string inputFile = "example_rntuple.root"; + + return read_frames(inputFile) + test_frame_aux_info(inputFile); } From eeb79d69cc1ab53c95cd0c12db4d74152542628d Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 6 Jan 2025 19:44:14 +0100 Subject: [PATCH 04/26] Move test function into header and template it --- tests/read_frame.h | 48 ++++++++++++++++++++++++++++--- tests/root_io/read_frame_root.cpp | 43 +-------------------------- 2 files changed, 45 insertions(+), 46 deletions(-) diff --git a/tests/read_frame.h b/tests/read_frame.h index 5055a4da8..3a4e36845 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -180,14 +180,14 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { } if (reader.getEntries(podio::Category::Event) != 10) { - std::cerr << "Could not read back the number of events correctly. " - << "(expected:" << 10 << ", actual: " << reader.getEntries(podio::Category::Event) << ")" << std::endl; + std::cerr << "Could not read back the number of events correctly. " << "(expected:" << 10 + << ", actual: " << reader.getEntries(podio::Category::Event) << ")" << std::endl; return 1; } if (reader.getEntries(podio::Category::Event) != reader.getEntries("other_events")) { - std::cerr << "Could not read back the number of events correctly. " - << "(expected:" << 10 << ", actual: " << reader.getEntries("other_events") << ")" << std::endl; + std::cerr << "Could not read back the number of events correctly. " << "(expected:" << 10 + << ", actual: " << reader.getEntries("other_events") << ")" << std::endl; return 1; } @@ -269,4 +269,44 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { return 0; } +// /// Check that reading only a subset of collections works as expected +template +int test_read_frame_limited(const std::string& inputFile) { + auto reader = ReaderT(); + reader.openFile(inputFile); + const std::vector collsToRead = {"mcparticles", "clusters"}; + + const auto event = podio::Frame(reader.readNextEntry("events", collsToRead)); + + const auto& availColls = event.getAvailableCollections(); + + const bool validColls = + std::set(availColls.begin(), availColls.end()) != std::set(collsToRead.begin(), collsToRead.end()); + + if (validColls) { + std::cerr << "The available collections are not as expected" << std::endl; + return 1; + } + + if (!event.get("mcparticles")) { + std::cerr << "Collection 'mcparticles' should be available" << std::endl; + return 1; + } + + if (event.get("hits")) { + std::cerr << "Collection 'hits' is available, but should not be" << std::endl; + return 1; + } + + const auto& clusters = event.get("clusters"); + const auto clu0 = clusters[0]; + const auto hits = clu0.Hits(); + if (hits.size() != 1 || hits[0].isAvailable()) { + std::cerr << "Hit in clusters are available but shouldn't be" << std::endl; + return 1; + } + + return 0; +} + #endif // PODIO_TESTS_READ_FRAME_H diff --git a/tests/root_io/read_frame_root.cpp b/tests/root_io/read_frame_root.cpp index be26fb919..fd70e15de 100644 --- a/tests/root_io/read_frame_root.cpp +++ b/tests/root_io/read_frame_root.cpp @@ -1,51 +1,10 @@ #include "read_frame.h" #include "read_frame_auxiliary.h" -#include "podio/Frame.h" #include "podio/ROOTReader.h" #include -#include #include -#include - -int test_read_frame_limited(const std::string& inputFile) { - auto reader = podio::ROOTReader(); - reader.openFile(inputFile); - const std::vector collsToRead = {"mcparticles", "clusters"}; - - const auto event = podio::Frame(reader.readNextEntry("events", collsToRead)); - - const auto& availColls = event.getAvailableCollections(); - - const bool validColls = - std::set(availColls.begin(), availColls.end()) != std::set(collsToRead.begin(), collsToRead.end()); - - if (validColls) { - std::cerr << "The available collections are not as expected" << std::endl; - return 1; - } - - if (!event.get("mcparticles")) { - std::cerr << "Collection 'mcparticles' should be available" << std::endl; - return 1; - } - - if (event.get("hits")) { - std::cerr << "Collection 'hits' is available, but should not be" << std::endl; - return 1; - } - - const auto& clusters = event.get("clusters"); - const auto clu0 = clusters[0]; - const auto hits = clu0.Hits(); - if (hits.size() != 1 || hits[0].isAvailable()) { - std::cerr << "Hit in clusters are available but shouldn't be" << std::endl; - return 1; - } - - return 0; -} int main(int argc, char* argv[]) { std::string inputFile = "example_frame.root"; @@ -60,5 +19,5 @@ int main(int argc, char* argv[]) { } return read_frames(inputFile, assertBuildVersion) + - test_frame_aux_info(inputFile) + test_read_frame_limited(inputFile); + test_frame_aux_info(inputFile) + test_read_frame_limited(inputFile); } From 2aa47890c5efa4a74d1c54a9f472ec9ee99dbbea Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 6 Jan 2025 19:44:33 +0100 Subject: [PATCH 05/26] Make ROOTReader capable of only reading a subset --- include/podio/ROOTReader.h | 13 ++++++++++--- src/ROOTReader.cc | 17 ++++++++++++----- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index 7167411ec..3a2147442 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -74,20 +74,26 @@ class ROOTReader { /// Read the next data entry for a given category. /// /// @param name The category name for which to read the next entry + /// @param collsToRead (optional) the collection names that should be read. If + /// not provided (or empty) all collections will be read /// /// @returns FrameData from which a podio::Frame can be constructed if the /// category exists and if there are still entries left to read. /// Otherwise a nullptr - std::unique_ptr readNextEntry(const std::string& name); + std::unique_ptr readNextEntry(const std::string& name, + const std::vector& collsToRead = {}); /// Read the desired data entry for a given category. /// /// @param name The category name for which to read the next entry /// @param entry The entry number to read + /// @param collsToRead (optional) the collection names that should be read. If + /// not provided (or empty) all collections will be read /// /// @returns FrameData from which a podio::Frame can be constructed if the /// category and the desired entry exist. Otherwise a nullptr - std::unique_ptr readEntry(const std::string& name, const unsigned entry); + std::unique_ptr readEntry(const std::string& name, const unsigned entry, + const std::vector& collsToRead = {}); /// Get the number of entries for the given name /// @@ -174,7 +180,8 @@ class ROOTReader { /// Read the data entry specified in the passed CategoryInfo, and increase the /// counter afterwards. In case the requested entry is larger than the /// available number of entries, return a nullptr. - std::unique_ptr readEntry(ROOTReader::CategoryInfo& catInfo); + std::unique_ptr readEntry(ROOTReader::CategoryInfo& catInfo, + const std::vector& collsToRead); /// Get / read the buffers at index iColl in the passed category information podio::CollectionReadBuffers getCollectionBuffers(CategoryInfo& catInfo, size_t iColl, bool reloadBranches, diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 9bba247be..43931904d 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -12,6 +12,7 @@ #include "TChain.h" #include "TClass.h" +#include #include #include @@ -78,18 +79,21 @@ GenericParameters ROOTReader::readEntryParameters(ROOTReader::CategoryInfo& catI return params; } -std::unique_ptr ROOTReader::readNextEntry(const std::string& name) { +std::unique_ptr ROOTReader::readNextEntry(const std::string& name, + const std::vector& collsToRead) { auto& catInfo = getCategoryInfo(name); - return readEntry(catInfo); + return readEntry(catInfo, collsToRead); } -std::unique_ptr ROOTReader::readEntry(const std::string& name, const unsigned entNum) { +std::unique_ptr ROOTReader::readEntry(const std::string& name, const unsigned entNum, + const std::vector& collsToRead) { auto& catInfo = getCategoryInfo(name); catInfo.entry = entNum; - return readEntry(catInfo); + return readEntry(catInfo, collsToRead); } -std::unique_ptr ROOTReader::readEntry(ROOTReader::CategoryInfo& catInfo) { +std::unique_ptr ROOTReader::readEntry(ROOTReader::CategoryInfo& catInfo, + const std::vector& collsToRead) { if (!catInfo.chain) { return nullptr; } @@ -109,6 +113,9 @@ std::unique_ptr ROOTReader::readEntry(ROOTReader::CategoryInfo& c ROOTFrameData::BufferMap buffers; for (size_t i = 0; i < catInfo.storedClasses.size(); ++i) { + if (!collsToRead.empty() && std::ranges::find(collsToRead, catInfo.storedClasses[i].first) == collsToRead.end()) { + continue; + } buffers.emplace(catInfo.storedClasses[i].first, getCollectionBuffers(catInfo, i, reloadBranches, localEntry)); } From 0d6dbe5436dd90b2585dcb5ab253a802a47570c9 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 6 Jan 2025 19:56:14 +0100 Subject: [PATCH 06/26] Improve error message on failing tests --- tests/read_frame.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/read_frame.h b/tests/read_frame.h index 3a4e36845..9ef4fe00d 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -285,6 +285,16 @@ int test_read_frame_limited(const std::string& inputFile) { if (validColls) { std::cerr << "The available collections are not as expected" << std::endl; + std::cerr << "expected: "; + for (const auto& c : std::set(collsToRead.begin(), collsToRead.end())) { + std::cerr << c << " "; + } + std::cerr << "\nactual: "; + for (const auto& c : std::set(availColls.begin(), availColls.end())) { + std::cerr << c << " "; + } + std::cerr << std::endl; + return 1; } From 69671bd74445ab1ed69ef08d7ced0c6e00929b2d Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 6 Jan 2025 19:56:23 +0100 Subject: [PATCH 07/26] Add selective collection reading to RNTupleReader --- include/podio/RNTupleReader.h | 14 ++++++++------ src/RNTupleReader.cc | 11 ++++++++--- tests/root_io/read_rntuple.cpp | 3 ++- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/podio/RNTupleReader.h b/include/podio/RNTupleReader.h index a6e106176..aaa926476 100644 --- a/include/podio/RNTupleReader.h +++ b/include/podio/RNTupleReader.h @@ -19,10 +19,6 @@ namespace podio { -/** -This class has the function to read available data from disk -and to prepare collections and buffers. -**/ /// The RNTupleReader can be used to read files that have been written with the /// RNTuple backend. /// @@ -61,20 +57,26 @@ class RNTupleReader { /// Read the next data entry for a given category. /// /// @param name The category name for which to read the next entry + /// @param collsToRead (optional) the collection names that should be read. If + /// not provided (or empty) all collections will be read /// /// @returns FrameData from which a podio::Frame can be constructed if the /// category exists and if there are still entries left to read. /// Otherwise a nullptr - std::unique_ptr readNextEntry(const std::string& name); + std::unique_ptr readNextEntry(const std::string& name, + const std::vector& collsToRead = {}); /// Read the desired data entry for a given category. /// /// @param name The category name for which to read the next entry /// @param entry The entry number to read + /// @param collsToRead (optional) the collection names that should be read. If + /// not provided (or empty) all collections will be read /// /// @returns FrameData from which a podio::Frame can be constructed if the /// category and the desired entry exist. Otherwise a nullptr - std::unique_ptr readEntry(const std::string& name, const unsigned entry); + std::unique_ptr readEntry(const std::string& name, const unsigned entry, + const std::vector& collsToRead = {}); /// Get the names of all the available Frame categories in the current file(s). /// diff --git a/src/RNTupleReader.cc b/src/RNTupleReader.cc index d04ae0345..92dcb9a41 100644 --- a/src/RNTupleReader.cc +++ b/src/RNTupleReader.cc @@ -138,11 +138,13 @@ std::vector RNTupleReader::getAvailableCategories() const { return cats; } -std::unique_ptr RNTupleReader::readNextEntry(const std::string& name) { - return readEntry(name, m_entries[name]); +std::unique_ptr RNTupleReader::readNextEntry(const std::string& name, + const std::vector& collsToRead) { + return readEntry(name, m_entries[name], collsToRead); } -std::unique_ptr RNTupleReader::readEntry(const std::string& category, const unsigned entNum) { +std::unique_ptr RNTupleReader::readEntry(const std::string& category, const unsigned entNum, + const std::vector& collsToRead) { if (m_totalEntries.find(category) == m_totalEntries.end()) { getEntries(category); } @@ -179,6 +181,9 @@ std::unique_ptr RNTupleReader::readEntry(const std::string& categ const auto& collInfo = m_collectionInfo[category]; for (size_t i = 0; i < collInfo.id.size(); ++i) { + if (!collsToRead.empty() && std::ranges::find(collsToRead, collInfo.name[i]) == collsToRead.end()) { + continue; + } const auto& collType = collInfo.type[i]; const auto& bufferFactory = podio::CollectionBufferFactory::instance(); auto maybeBuffers = diff --git a/tests/root_io/read_rntuple.cpp b/tests/root_io/read_rntuple.cpp index e730f7092..68ffe04f7 100644 --- a/tests/root_io/read_rntuple.cpp +++ b/tests/root_io/read_rntuple.cpp @@ -6,5 +6,6 @@ int main() { const std::string inputFile = "example_rntuple.root"; - return read_frames(inputFile) + test_frame_aux_info(inputFile); + return read_frames(inputFile) + test_frame_aux_info(inputFile) + + test_read_frame_limited(inputFile); } From 39da93581564c096840716f60feba4c1761286d9 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 6 Jan 2025 20:51:35 +0100 Subject: [PATCH 08/26] Add selective collection reading to SIO --- include/podio/SIOFrameData.h | 10 ++++++++-- include/podio/SIOReader.h | 22 ++++++++++++++++++++-- src/SIOFrameData.cc | 27 ++++++++++++++++++++++----- src/SIOReader.cc | 10 ++++++---- tests/sio_io/read_frame_sio.cpp | 2 +- 5 files changed, 57 insertions(+), 14 deletions(-) diff --git a/include/podio/SIOFrameData.h b/include/podio/SIOFrameData.h index dd2dddb20..25ac7120c 100644 --- a/include/podio/SIOFrameData.h +++ b/include/podio/SIOFrameData.h @@ -35,11 +35,13 @@ class SIOFrameData { /// tableBuffer containing the necessary information for unpacking the /// collections. The two size parameters denote the uncompressed size of the /// respective buffers. - SIOFrameData(sio::buffer&& collBuffers, std::size_t dataSize, sio::buffer&& tableBuffer, std::size_t tableSize) : + SIOFrameData(sio::buffer&& collBuffers, std::size_t dataSize, sio::buffer&& tableBuffer, std::size_t tableSize, + std::vector limitColls = {}) : m_recBuffer(std::move(collBuffers)), m_tableBuffer(std::move(tableBuffer)), m_dataSize(dataSize), - m_tableSize(tableSize) { + m_tableSize(tableSize), + m_limitColls(std::move(limitColls)) { } std::optional getCollectionBuffers(const std::string& name); @@ -79,6 +81,10 @@ class SIOFrameData { std::vector m_subsetCollectionBits{}; podio::GenericParameters m_parameters{}; + + /// The collections that should be made available for a Frame constructed from + /// this (if non-empty) + std::vector m_limitColls{}; }; } // namespace podio diff --git a/include/podio/SIOReader.h b/include/podio/SIOReader.h index f9f5ba02a..62585e1df 100644 --- a/include/podio/SIOReader.h +++ b/include/podio/SIOReader.h @@ -37,21 +37,39 @@ class SIOReader { /// Read the next data entry for a given category. /// + /// @note Given how the SIO files are currently layed out it is in fact not + /// possible to only read a subset of a Frame. Rather the subset of + /// collections to read will be an artificial limit on the returned + /// SIOFrameData. Limiting the collections to read will not improve I/O + /// performance. + /// /// @param name The category name for which to read the next entry + /// @param collsToRead (optional) the collection names that should be read. If + /// not provided (or empty) all collections will be read /// /// @returns FrameData from which a podio::Frame can be constructed if the /// category exists and if there are still entries left to read. /// Otherwise a nullptr - std::unique_ptr readNextEntry(const std::string& name); + std::unique_ptr readNextEntry(const std::string& name, + const std::vector& collsToRead = {}); /// Read the desired data entry for a given category. /// + /// @note Given how the SIO files are currently layed out it is in fact not + /// possible to only read a subset of a Frame. Rather the subset of + /// collections to read will be an artificial limit on the returned + /// SIOFrameData. Limiting the collections to read will not improve I/O + /// performance. + /// /// @param name The category name for which to read the next entry /// @param entry The entry number to read + /// @param collsToRead (optional) the collection names that should be read. If + /// not provided (or empty) all collections will be read /// /// @returns FrameData from which a podio::Frame can be constructed if the /// category and the desired entry exist. Otherwise a nullptr - std::unique_ptr readEntry(const std::string& name, const unsigned entry); + std::unique_ptr readEntry(const std::string& name, const unsigned entry, + const std::vector& collsToRead = {}); /// Get the number of entries for the given name /// diff --git a/src/SIOFrameData.cc b/src/SIOFrameData.cc index 3343bea52..fc34c17a2 100644 --- a/src/SIOFrameData.cc +++ b/src/SIOFrameData.cc @@ -18,6 +18,10 @@ std::optional SIOFrameData::getCollectionBuffers(c const auto nameIt = std::ranges::find(names, name); // collection indices start at 1! const auto index = std::distance(std::begin(names), nameIt) + 1; + // This collection is not available (artificially!) + if (m_availableBlocks[index] == 0) { + return std::nullopt; + } // Mark this block as consumed m_availableBlocks[index] = 0; @@ -38,11 +42,8 @@ std::vector SIOFrameData::getAvailableCollections() { std::vector collections; for (size_t i = 1; i < m_blocks.size(); ++i) { if (m_availableBlocks[i]) { - // We have to get the collID of this collection in the idTable as there is - // no guarantee that it coincides with the index in the blocks. - // Additionally, collection indices start at 1 - const auto collID = m_idTable.ids()[i - 1]; - collections.push_back(m_idTable.name(collID).value()); + const auto name = m_idTable.names()[i - 1]; + collections.push_back(name); } } @@ -67,6 +68,22 @@ void SIOFrameData::unpackBuffers() { sio::buffer uncBuffer{m_dataSize}; compressor.uncompress(m_recBuffer.span(), uncBuffer); sio::api::read_blocks(uncBuffer.span(), m_blocks); + + if (m_limitColls.empty()) { + return; + } + + // In order to save on memory and to not litter the rest of the implementation + // with similar checks, we immediately throw away all collections that should + // not become available + for (size_t i = 1; i < m_blocks.size(); ++i) { + const auto name = m_idTable.names()[i - 1]; + if (std::ranges::find(m_limitColls, name) == m_limitColls.end()) { + auto buffers = dynamic_cast(m_blocks[i].get())->getBuffers(); + buffers.deleteBuffers(buffers); + m_availableBlocks[i] = 0; + } + } } void SIOFrameData::createBlocks() { diff --git a/src/SIOReader.cc b/src/SIOReader.cc index a54e5670b..4d9147b6f 100644 --- a/src/SIOReader.cc +++ b/src/SIOReader.cc @@ -26,7 +26,8 @@ void SIOReader::openFile(const std::string& filename) { readEDMDefinitions(); // Potentially could do this lazily } -std::unique_ptr SIOReader::readNextEntry(const std::string& name) { +std::unique_ptr SIOReader::readNextEntry(const std::string& name, + const std::vector& collsToRead) { // Skip to where the next record of this name starts in the file, based on // how many times we have already read this name // @@ -44,14 +45,15 @@ std::unique_ptr SIOReader::readNextEntry(const std::string& name) m_nameCtr[name]++; return std::make_unique(std::move(dataBuffer), dataInfo._uncompressed_length, std::move(tableBuffer), - tableInfo._uncompressed_length); + tableInfo._uncompressed_length, collsToRead); } -std::unique_ptr SIOReader::readEntry(const std::string& name, const unsigned entry) { +std::unique_ptr SIOReader::readEntry(const std::string& name, const unsigned entry, + const std::vector& collsToRead) { // NOTE: Will create or overwrite the entry counter // All checks are done in the following function m_nameCtr[name] = entry; - return readNextEntry(name); + return readNextEntry(name, collsToRead); } std::vector SIOReader::getAvailableCategories() const { diff --git a/tests/sio_io/read_frame_sio.cpp b/tests/sio_io/read_frame_sio.cpp index 8a27743ac..34bad94d8 100644 --- a/tests/sio_io/read_frame_sio.cpp +++ b/tests/sio_io/read_frame_sio.cpp @@ -12,5 +12,5 @@ int main(int argc, char* argv[]) { } return read_frames(inputFile, assertBuildVersion) + - test_frame_aux_info(inputFile); + test_frame_aux_info(inputFile) + test_read_frame_limited(inputFile); } From 5c353ba494ebfd8abfe7d9a4b4a004b758da96db Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 6 Jan 2025 21:07:04 +0100 Subject: [PATCH 09/26] Enable selective reading in Reader interface --- include/podio/Reader.h | 38 +++++++++++++++--------- tests/read_frame.h | 25 ++++++++++++---- tests/root_io/read_interface_rntuple.cpp | 5 ++-- tests/root_io/read_interface_root.cpp | 10 +++---- tests/sio_io/read_interface_sio.cpp | 6 ++-- 5 files changed, 51 insertions(+), 33 deletions(-) diff --git a/include/podio/Reader.h b/include/podio/Reader.h index d8209197e..a2c5a2526 100644 --- a/include/podio/Reader.h +++ b/include/podio/Reader.h @@ -22,8 +22,8 @@ class Reader { struct ReaderConcept { virtual ~ReaderConcept() = default; - virtual podio::Frame readNextFrame(const std::string& name) = 0; - virtual podio::Frame readFrame(const std::string& name, size_t index) = 0; + virtual podio::Frame readNextFrame(const std::string& name, const std::vector&) = 0; + virtual podio::Frame readFrame(const std::string& name, size_t index, const std::vector&) = 0; virtual size_t getEntries(const std::string& name) const = 0; virtual podio::version::Version currentFileVersion() const = 0; virtual std::optional currentFileVersion(const std::string& name) const = 0; @@ -44,16 +44,17 @@ class Reader { ~ReaderModel() = default; - podio::Frame readNextFrame(const std::string& name) override { - auto maybeFrame = m_reader->readNextEntry(name); + podio::Frame readNextFrame(const std::string& name, const std::vector& collsToRead) override { + auto maybeFrame = m_reader->readNextEntry(name, collsToRead); if (maybeFrame) { return maybeFrame; } throw std::runtime_error("Failed reading category " + name + " (reading beyond bounds?)"); } - podio::Frame readFrame(const std::string& name, size_t index) override { - auto maybeFrame = m_reader->readEntry(name, index); + podio::Frame readFrame(const std::string& name, size_t index, + const std::vector& collsToRead) override { + auto maybeFrame = m_reader->readEntry(name, index, collsToRead); if (maybeFrame) { return maybeFrame; } @@ -105,46 +106,55 @@ class Reader { /// Read the next frame of a given category /// /// @param name The category name for which to read the next frame + /// @param collsToRead (optional) the collection names that should be read. If + /// not provided (or empty) all collections will be read /// /// @returns A fully constructed Frame with the contents read from file /// /// @throws std::invalid_argument in case the category is not available or in /// case no more entries are available - podio::Frame readNextFrame(const std::string& name) { - return m_self->readNextFrame(name); + podio::Frame readNextFrame(const std::string& name, const std::vector& collsToRead = {}) { + return m_self->readNextFrame(name, collsToRead); } /// Read the next frame of the "events" category /// + /// @param collsToRead (optional) the collection names that should be read. If + /// not provided (or empty) all collections will be read + /// /// @returns A fully constructed Frame with the contents read from file /// /// @throws std::invalid_argument in case no (more) events are available - podio::Frame readNextEvent() { - return readNextFrame(podio::Category::Event); + podio::Frame readNextEvent(const std::vector& collsToRead = {}) { + return readNextFrame(podio::Category::Event, collsToRead); } /// Read a specific frame for a given category /// /// @param name The category name for which to read the next entry /// @param index The entry number to read + /// @param collsToRead (optional) the collection names that should be read. If + /// not provided (or empty) all collections will be read /// /// @returns A fully constructed Frame with the contents read from file /// /// @throws std::invalid_argument in case the category is not available or in /// case the specified entry is not available - podio::Frame readFrame(const std::string& name, size_t index) { - return m_self->readFrame(name, index); + podio::Frame readFrame(const std::string& name, size_t index, const std::vector& collsToRead = {}) { + return m_self->readFrame(name, index, collsToRead); } /// Read a specific frame of the "events" category /// /// @param index The event number to read + /// @param collsToRead (optional) the collection names that should be read. If + /// not provided (or empty) all collections will be read /// /// @returns A fully constructed Frame with the contents read from file /// /// @throws std::invalid_argument in case the desired event is not available - podio::Frame readEvent(size_t index) { - return readFrame(podio::Category::Event, index); + podio::Frame readEvent(size_t index, const std::vector& collsToRead = {}) { + return readFrame(podio::Category::Event, index, collsToRead); } /// Get the number of entries for the given name diff --git a/tests/read_frame.h b/tests/read_frame.h index 9ef4fe00d..f37e4a796 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -15,6 +15,7 @@ #include "interface_extension_model/TestInterfaceLinkCollection.h" #include "podio/Frame.h" +#include "podio/Reader.h" #include @@ -269,14 +270,17 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { return 0; } -// /// Check that reading only a subset of collections works as expected template -int test_read_frame_limited(const std::string& inputFile) { - auto reader = ReaderT(); - reader.openFile(inputFile); +int test_read_frame_limited(ReaderT& reader) { const std::vector collsToRead = {"mcparticles", "clusters"}; - const auto event = podio::Frame(reader.readNextEntry("events", collsToRead)); + const auto event = [&]() { + if constexpr (std::is_same_v) { + return podio::Frame(reader.readFrame("events", 1, collsToRead)); + } else { + return podio::Frame(reader.readEntry("events", 1, collsToRead)); + } + }(); const auto& availColls = event.getAvailableCollections(); @@ -308,7 +312,7 @@ int test_read_frame_limited(const std::string& inputFile) { return 1; } - const auto& clusters = event.get("clusters"); + const auto& clusters = event.template get("clusters"); const auto clu0 = clusters[0]; const auto hits = clu0.Hits(); if (hits.size() != 1 || hits[0].isAvailable()) { @@ -319,4 +323,13 @@ int test_read_frame_limited(const std::string& inputFile) { return 0; } +/// Check that reading only a subset of collections works as expected +template +int test_read_frame_limited(const std::string& inputFile) { + auto reader = ReaderT(); + reader.openFile(inputFile); + + return test_read_frame_limited(reader); +} + #endif // PODIO_TESTS_READ_FRAME_H diff --git a/tests/root_io/read_interface_rntuple.cpp b/tests/root_io/read_interface_rntuple.cpp index e2ee2a421..b5188203d 100644 --- a/tests/root_io/read_interface_rntuple.cpp +++ b/tests/root_io/read_interface_rntuple.cpp @@ -1,10 +1,9 @@ +#include "read_frame.h" #include "read_interface.h" int main(int, char**) { auto reader = podio::makeReader("example_from_rntuple_interface.root"); - if (read_frames(reader)) { - return 1; - } + return read_frames(reader) + test_read_frame_limited(reader); return 0; } diff --git a/tests/root_io/read_interface_root.cpp b/tests/root_io/read_interface_root.cpp index 8d33ec1c3..b43546ec0 100644 --- a/tests/root_io/read_interface_root.cpp +++ b/tests/root_io/read_interface_root.cpp @@ -1,11 +1,9 @@ +#include "read_frame.h" #include "read_interface.h" -int main(int, char**) { +#include "podio/Reader.h" +int main(int, char**) { auto reader = podio::makeReader("example_frame_interface.root"); - if (read_frames(reader)) { - return 1; - } - - return 0; + return read_frames(reader) + test_read_frame_limited(reader); } diff --git a/tests/sio_io/read_interface_sio.cpp b/tests/sio_io/read_interface_sio.cpp index e411f2a96..0695158ff 100644 --- a/tests/sio_io/read_interface_sio.cpp +++ b/tests/sio_io/read_interface_sio.cpp @@ -1,11 +1,9 @@ +#include "read_frame.h" #include "read_interface.h" int main(int, char**) { - auto readerSIO = podio::makeReader("example_frame_sio_interface.sio"); - if (read_frames(readerSIO)) { - return 1; - } + return read_frames(readerSIO) + test_read_frame_limited(readerSIO); return 0; } From 2f313094aefaf9f2c08026e01588d7d2c4cc0132 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 6 Jan 2025 21:22:17 +0100 Subject: [PATCH 10/26] Make sure that passing non-existent names doesn't break --- tests/read_frame.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/read_frame.h b/tests/read_frame.h index f37e4a796..58e16c197 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -320,6 +320,22 @@ int test_read_frame_limited(ReaderT& reader) { return 1; } + // Check that nothing breaks if we pass in an unavailable collection name + const auto emptyEvent = [&]() { + if constexpr (std::is_same_v) { + return podio::Frame(reader.readFrame("events", 1, {"no-collection-with-this-name", "orThisOne"})); + } else { + return podio::Frame(reader.readEntry("events", 1, {"no-collection-with-this-name", "orThisOne"})); + } + }(); + + if (!emptyEvent.getAvailableCollections().empty()) { + std::cerr << "Trying to read collection names that are unavailable should not break, but should also not give a " + "meaningful event" + << std::endl; + return 1; + } + return 0; } From 1dcead59fd01babd1d2a5dcc95ea89aee46066df Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 6 Jan 2025 21:31:49 +0100 Subject: [PATCH 11/26] Add simple test for reusing collection names It is possible that users want to "ignore" a certain collection when reading files. It should then still be possible to add a new collection with the same name to the event that has been created without breaking anything. --- tests/read_frame.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/read_frame.h b/tests/read_frame.h index 58e16c197..bf3706dec 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -1,6 +1,7 @@ #ifndef PODIO_TESTS_READ_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable #define PODIO_TESTS_READ_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable +#include "datamodel/ExampleHitCollection.h" #include "datamodel/ExampleWithInterfaceRelationCollection.h" #include "datamodel/ExampleWithVectorMemberCollection.h" #include "read_test.h" @@ -274,7 +275,7 @@ template int test_read_frame_limited(ReaderT& reader) { const std::vector collsToRead = {"mcparticles", "clusters"}; - const auto event = [&]() { + auto event = [&]() { if constexpr (std::is_same_v) { return podio::Frame(reader.readFrame("events", 1, collsToRead)); } else { @@ -320,6 +321,17 @@ int test_read_frame_limited(ReaderT& reader) { return 1; } + const auto& newHits = [&]() -> auto const& { + auto mutHits = ExampleHitCollection(); + mutHits.create(); + return event.put(std::move(mutHits), "hits"); + }(); + + if (newHits.size() != 1) { + std::cerr << "Adding new collection with same name as available from data (but not read) doesn't work" << std::endl; + return 1; + } + // Check that nothing breaks if we pass in an unavailable collection name const auto emptyEvent = [&]() { if constexpr (std::is_same_v) { From e024485527204aa3457d8d9267269d8fdb1f531e Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 16 Jan 2025 15:15:27 +0100 Subject: [PATCH 12/26] Replace a pair with a struct Facilitates the usage of projections with range algorithms --- include/podio/ROOTReader.h | 16 ++++++++++------ src/ROOTReader.cc | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index 3a2147442..c46d185b5 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -28,6 +28,10 @@ namespace detail { // vector using CollectionInfo = std::tuple; + struct NamedCollInfo { + std::string name{}; + CollectionInfo info{}; + }; } // namespace detail class CollectionBase; @@ -152,12 +156,12 @@ class ROOTReader { /// constructor from chain for more convenient map insertion CategoryInfo(std::unique_ptr&& c) : chain(std::move(c)) { } - std::unique_ptr chain{nullptr}; ///< The TChain with the data - unsigned entry{0}; ///< The next entry to read - std::vector> storedClasses{}; ///< The stored collections in this - ///< category - std::vector branches{}; ///< The branches for this category - std::shared_ptr table{nullptr}; ///< The collection ID table for this category + std::unique_ptr chain{nullptr}; ///< The TChain with the data + unsigned entry{0}; ///< The next entry to read + std::vector storedClasses{}; ///< The stored collections in this + ///< category + std::vector branches{}; ///< The branches for this category + std::shared_ptr table{nullptr}; ///< The collection ID table for this category }; /// Initialize the passed CategoryInfo by setting up the necessary branches, diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 43931904d..6a3b6c5cd 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -18,11 +18,11 @@ namespace podio { -std::tuple, std::vector>> +std::tuple, std::vector> createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, const std::vector& collInfo); -std::tuple, std::vector>> +std::tuple, std::vector> createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable, const std::vector& collInfo); @@ -113,10 +113,10 @@ std::unique_ptr ROOTReader::readEntry(ROOTReader::CategoryInfo& c ROOTFrameData::BufferMap buffers; for (size_t i = 0; i < catInfo.storedClasses.size(); ++i) { - if (!collsToRead.empty() && std::ranges::find(collsToRead, catInfo.storedClasses[i].first) == collsToRead.end()) { + if (!collsToRead.empty() && std::ranges::find(collsToRead, catInfo.storedClasses[i].name) == collsToRead.end()) { continue; } - buffers.emplace(catInfo.storedClasses[i].first, getCollectionBuffers(catInfo, i, reloadBranches, localEntry)); + buffers.emplace(catInfo.storedClasses[i].name, getCollectionBuffers(catInfo, i, reloadBranches, localEntry)); } auto parameters = readEntryParameters(catInfo, reloadBranches, localEntry); @@ -127,8 +127,8 @@ std::unique_ptr ROOTReader::readEntry(ROOTReader::CategoryInfo& c podio::CollectionReadBuffers ROOTReader::getCollectionBuffers(ROOTReader::CategoryInfo& catInfo, size_t iColl, bool reloadBranches, unsigned int localEntry) { - const auto& name = catInfo.storedClasses[iColl].first; - const auto& [collType, isSubsetColl, schemaVersion, index] = catInfo.storedClasses[iColl].second; + const auto& name = catInfo.storedClasses[iColl].name; + const auto& [collType, isSubsetColl, schemaVersion, index] = catInfo.storedClasses[iColl].info; auto& branches = catInfo.branches[index]; const auto& bufferFactory = podio::CollectionBufferFactory::instance(); @@ -314,14 +314,14 @@ std::vector ROOTReader::getAvailableCategories() const { return cats; } -std::tuple, std::vector>> +std::tuple, std::vector> createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable, const std::vector& collInfo) { size_t collectionIndex{0}; std::vector collBranches; collBranches.reserve(collInfo.size() + 1); - std::vector> storedClasses; + std::vector storedClasses; storedClasses.reserve(collInfo.size()); for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { @@ -366,14 +366,14 @@ createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable return {std::move(collBranches), storedClasses}; } -std::tuple, std::vector>> +std::tuple, std::vector> createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, const std::vector& collInfo) { size_t collectionIndex{0}; std::vector collBranches; collBranches.reserve(collInfo.size() + 1); - std::vector> storedClasses; + std::vector storedClasses; storedClasses.reserve(collInfo.size()); for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { From 93c9149cb907f0b5e355d1d4626d846d3ea90d6f Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 16 Jan 2025 15:35:38 +0100 Subject: [PATCH 13/26] Throw an exception instead of ignoring non-existant names --- include/podio/SIOFrameData.h | 11 +---------- src/RNTupleReader.cc | 12 ++++++++++-- src/ROOTReader.cc | 9 +++++++++ src/SIOFrameData.cc | 20 ++++++++++++++++++++ tests/read_frame.h | 27 +++++++++++++++++---------- 5 files changed, 57 insertions(+), 22 deletions(-) diff --git a/include/podio/SIOFrameData.h b/include/podio/SIOFrameData.h index 25ac7120c..9d5882026 100644 --- a/include/podio/SIOFrameData.h +++ b/include/podio/SIOFrameData.h @@ -4,13 +4,10 @@ #include "podio/CollectionBuffers.h" #include "podio/CollectionIDTable.h" #include "podio/GenericParameters.h" -#include "podio/SIOBlock.h" #include #include -#include -#include #include #include #include @@ -36,13 +33,7 @@ class SIOFrameData { /// collections. The two size parameters denote the uncompressed size of the /// respective buffers. SIOFrameData(sio::buffer&& collBuffers, std::size_t dataSize, sio::buffer&& tableBuffer, std::size_t tableSize, - std::vector limitColls = {}) : - m_recBuffer(std::move(collBuffers)), - m_tableBuffer(std::move(tableBuffer)), - m_dataSize(dataSize), - m_tableSize(tableSize), - m_limitColls(std::move(limitColls)) { - } + std::vector limitColls = {}); std::optional getCollectionBuffers(const std::string& name); diff --git a/src/RNTupleReader.cc b/src/RNTupleReader.cc index 92dcb9a41..1d75fa0f9 100644 --- a/src/RNTupleReader.cc +++ b/src/RNTupleReader.cc @@ -158,6 +158,16 @@ std::unique_ptr RNTupleReader::readEntry(const std::string& categ } } + const auto& collInfo = m_collectionInfo[category]; + // Make sure to not silently ignore non-existant but requested collections + if (!collsToRead.empty()) { + for (const auto& name : collsToRead) { + if (std::ranges::find(collInfo.name, name) == collInfo.name.end()) { + throw std::invalid_argument(name + " is not available from Frame"); + } + } + } + m_entries[category] = entNum + 1; // m_readerEntries contains the accumulated entries for all the readers @@ -178,8 +188,6 @@ std::unique_ptr RNTupleReader::readEntry(const std::string& categ auto dentry = m_readers[category][readerIndex]->GetModel()->GetDefaultEntry(); #endif - const auto& collInfo = m_collectionInfo[category]; - for (size_t i = 0; i < collInfo.id.size(); ++i) { if (!collsToRead.empty() && std::ranges::find(collsToRead, collInfo.name[i]) == collsToRead.end()) { continue; diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 6a3b6c5cd..1164321b0 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -101,6 +101,15 @@ std::unique_ptr ROOTReader::readEntry(ROOTReader::CategoryInfo& c return nullptr; } + // Make sure to not silently ignore non-existant but requested collections + if (!collsToRead.empty()) { + for (const auto& name : collsToRead) { + if (std::ranges::find(catInfo.storedClasses, name, &detail::NamedCollInfo::name) == catInfo.storedClasses.end()) { + throw std::invalid_argument(name + " is not available from Frame"); + } + } + } + // After switching trees in the chain, branch pointers get invalidated so // they need to be reassigned. // NOTE: root 6.22/06 requires that we get completely new branches here, diff --git a/src/SIOFrameData.cc b/src/SIOFrameData.cc index fc34c17a2..1d083909e 100644 --- a/src/SIOFrameData.cc +++ b/src/SIOFrameData.cc @@ -7,6 +7,26 @@ #include namespace podio { + +SIOFrameData::SIOFrameData(sio::buffer&& collBuffers, std::size_t dataSize, sio::buffer&& tableBuffer, + std::size_t tableSize, std::vector limitColls) : + m_recBuffer(std::move(collBuffers)), + m_tableBuffer(std::move(tableBuffer)), + m_dataSize(dataSize), + m_tableSize(tableSize), + m_limitColls(std::move(limitColls)) { + readIdTable(); + // Assuming here that the idTable only contains the collections that are + // also available + if (!m_limitColls.empty()) { + for (const auto& name : m_limitColls) { + if (std::ranges::find(m_idTable.names(), name) == m_idTable.names().end()) { + throw std::invalid_argument(name + " is not available from Frame"); + } + } + } +} + std::optional SIOFrameData::getCollectionBuffers(const std::string& name) { unpackBuffers(); diff --git a/tests/read_frame.h b/tests/read_frame.h index bf3706dec..40f4051e6 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -19,6 +19,7 @@ #include "podio/Reader.h" #include +#include void processExtensions(const podio::Frame& event, int iEvent, podio::version::Version) { const auto& extColl = event.get("extension_Contained"); @@ -332,19 +333,25 @@ int test_read_frame_limited(ReaderT& reader) { return 1; } - // Check that nothing breaks if we pass in an unavailable collection name - const auto emptyEvent = [&]() { + // Check that we get the expected exception if trying to read a non-existant + // collection + try { if constexpr (std::is_same_v) { - return podio::Frame(reader.readFrame("events", 1, {"no-collection-with-this-name", "orThisOne"})); + reader.readFrame("events", 1, {"no-collection-with-this-name", "orThisOne"}); } else { - return podio::Frame(reader.readEntry("events", 1, {"no-collection-with-this-name", "orThisOne"})); + reader.readEntry("events", 1, {"no-collection-with-this-name", "orThisOne"}); } - }(); - - if (!emptyEvent.getAvailableCollections().empty()) { - std::cerr << "Trying to read collection names that are unavailable should not break, but should also not give a " - "meaningful event" - << std::endl; + // if we haven't gotten an exception before we fail + std::cerr << "Attempting to read non-existant collections should result in an exception" << std::endl; + return 1; + } catch (std::invalid_argument& ex) { + if (ex.what() != std::string("no-collection-with-this-name is not available from Frame")) { + std::cerr << "Exception message for attempting to limit to non-existant collections not as expected: " + << ex.what() << std::endl; + return 1; + } + } catch (...) { + std::cerr << "Attempting to read a non-existant collection didn't yield the correct exception" << std::endl; return 1; } From 35c8fb0391f7a69e26f14f41e82af7aed6288d6c Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 16 Jan 2025 15:40:16 +0100 Subject: [PATCH 14/26] Remove obsolete checks --- include/podio/SIOFrameData.h | 6 +++--- src/SIOFrameData.cc | 4 ---- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/include/podio/SIOFrameData.h b/include/podio/SIOFrameData.h index 9d5882026..97e4aa85f 100644 --- a/include/podio/SIOFrameData.h +++ b/include/podio/SIOFrameData.h @@ -32,15 +32,15 @@ class SIOFrameData { /// tableBuffer containing the necessary information for unpacking the /// collections. The two size parameters denote the uncompressed size of the /// respective buffers. + /// + /// In case the limitColls contain a collection name that is not available + /// from the idTable names this throws an exception SIOFrameData(sio::buffer&& collBuffers, std::size_t dataSize, sio::buffer&& tableBuffer, std::size_t tableSize, std::vector limitColls = {}); std::optional getCollectionBuffers(const std::string& name); podio::CollectionIDTable getIDTable() { - if (m_idTable.empty()) { - readIdTable(); - } return {m_idTable.ids(), m_idTable.names()}; } diff --git a/src/SIOFrameData.cc b/src/SIOFrameData.cc index 1d083909e..ca77b0a0a 100644 --- a/src/SIOFrameData.cc +++ b/src/SIOFrameData.cc @@ -78,10 +78,6 @@ void SIOFrameData::unpackBuffers() { return; } - if (m_idTable.empty()) { - readIdTable(); - } - createBlocks(); sio::zlib_compression compressor; From 4476bd51d569aef84af1a2311d5eabc67830e3b0 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 16 Jan 2025 16:24:47 +0100 Subject: [PATCH 15/26] Update docstrings to inform about the exception --- include/podio/RNTupleReader.h | 6 ++++++ include/podio/ROOTReader.h | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/include/podio/RNTupleReader.h b/include/podio/RNTupleReader.h index aaa926476..672df4b58 100644 --- a/include/podio/RNTupleReader.h +++ b/include/podio/RNTupleReader.h @@ -63,6 +63,9 @@ class RNTupleReader { /// @returns FrameData from which a podio::Frame can be constructed if the /// category exists and if there are still entries left to read. /// Otherwise a nullptr + /// + /// @throws std::invalid_argument in case collsToRead contains collection + /// names that are not available std::unique_ptr readNextEntry(const std::string& name, const std::vector& collsToRead = {}); @@ -75,6 +78,9 @@ class RNTupleReader { /// /// @returns FrameData from which a podio::Frame can be constructed if the /// category and the desired entry exist. Otherwise a nullptr + /// + /// @throws std::invalid_argument in case collsToRead contains collection + /// names that are not available std::unique_ptr readEntry(const std::string& name, const unsigned entry, const std::vector& collsToRead = {}); diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index c46d185b5..5f8112301 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -84,6 +84,9 @@ class ROOTReader { /// @returns FrameData from which a podio::Frame can be constructed if the /// category exists and if there are still entries left to read. /// Otherwise a nullptr + /// + /// @throws std::invalid_argument in case collsToRead contains collection + /// names that are not available std::unique_ptr readNextEntry(const std::string& name, const std::vector& collsToRead = {}); @@ -96,6 +99,9 @@ class ROOTReader { /// /// @returns FrameData from which a podio::Frame can be constructed if the /// category and the desired entry exist. Otherwise a nullptr + /// + /// @throws std::invalid_argument in case collsToRead contains collection + /// names that are not available std::unique_ptr readEntry(const std::string& name, const unsigned entry, const std::vector& collsToRead = {}); From 8a7cc0cd06d495640a78e4ce650e069f6c5fc681 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 16 Jan 2025 17:03:05 +0100 Subject: [PATCH 16/26] Add basic checks for roundtripping --- tests/root_io/CMakeLists.txt | 11 +- .../selected_colls_roundtrip_rntuple.cpp | 9 ++ .../root_io/selected_colls_roundtrip_root.cpp | 8 ++ tests/selected_colls_roundtrip.h | 101 ++++++++++++++++++ tests/sio_io/CMakeLists.txt | 2 + tests/sio_io/selected_colls_roundtrip_sio.cpp | 8 ++ 6 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 tests/root_io/selected_colls_roundtrip_rntuple.cpp create mode 100644 tests/root_io/selected_colls_roundtrip_root.cpp create mode 100644 tests/selected_colls_roundtrip.h create mode 100644 tests/sio_io/selected_colls_roundtrip_sio.cpp diff --git a/tests/root_io/CMakeLists.txt b/tests/root_io/CMakeLists.txt index b32889e49..1b0d5fb1d 100644 --- a/tests/root_io/CMakeLists.txt +++ b/tests/root_io/CMakeLists.txt @@ -8,6 +8,7 @@ set(root_dependent_tests read_and_write_frame_root.cpp write_interface_root.cpp read_interface_root.cpp + selected_colls_roundtrip_root.cpp ) if(ENABLE_RNTUPLE) set(root_dependent_tests @@ -17,6 +18,7 @@ if(ENABLE_RNTUPLE) read_python_frame_rntuple.cpp write_interface_rntuple.cpp read_interface_rntuple.cpp + selected_colls_roundtrip_rntuple.cpp ) endif() if(ENABLE_DATASOURCE) @@ -39,13 +41,20 @@ set_tests_properties( read_frame_root read_frame_root_multiple read_and_write_frame_root + selected_colls_roundtrip_root PROPERTIES DEPENDS write_frame_root ) if(ENABLE_RNTUPLE) - set_property(TEST read_rntuple PROPERTY DEPENDS write_rntuple) + set_tests_properties( + read_rntuple + selected_colls_roundtrip_rntuple + + PROPERTIES + DEPENDS write_rntuple + ) set_property(TEST read_interface_rntuple PROPERTY DEPENDS write_interface_rntuple) endif() diff --git a/tests/root_io/selected_colls_roundtrip_rntuple.cpp b/tests/root_io/selected_colls_roundtrip_rntuple.cpp new file mode 100644 index 000000000..1074131a6 --- /dev/null +++ b/tests/root_io/selected_colls_roundtrip_rntuple.cpp @@ -0,0 +1,9 @@ +#include "selected_colls_roundtrip.h" + +#include "podio/RNTupleReader.h" +#include "podio/RNTupleWriter.h" + +int main() { + return do_roundtrip("example_rntuple.root", + "selected_example_rntuple.root"); +} diff --git a/tests/root_io/selected_colls_roundtrip_root.cpp b/tests/root_io/selected_colls_roundtrip_root.cpp new file mode 100644 index 000000000..d2268cc21 --- /dev/null +++ b/tests/root_io/selected_colls_roundtrip_root.cpp @@ -0,0 +1,8 @@ +#include "selected_colls_roundtrip.h" + +#include "podio/ROOTReader.h" +#include "podio/ROOTWriter.h" + +int main() { + return do_roundtrip("example_frame.root", "selected_example_frame.root"); +} diff --git a/tests/selected_colls_roundtrip.h b/tests/selected_colls_roundtrip.h new file mode 100644 index 000000000..210349997 --- /dev/null +++ b/tests/selected_colls_roundtrip.h @@ -0,0 +1,101 @@ +#include + +#include +#include +#include +#include + +/// Roundtrip test for ensuring that files that have been written with Frames +/// that have been read from file selecting only a few collections to be read +/// are still properly usable when read again. +/// +/// The flow is +/// +/// - Open the original file (using one that has been produced by other I/O +/// tests) +/// - Read a Frame selecting only a few of the available collections +/// - Write this frame into the outputFile without doing any further checks +/// (that is done by other tests) +/// - Read back that file and check that it can still be used +/// - Reading full frames +/// - and frames with a selection of collections +/// +/// do_roundrip ties everything together and is the only function that needs to +/// be called by backend specific tests + +/// The collection names that will be written into the new file +const std::vector collectionSelection = {"mcparticles", "links", "userInts", "hits"}; + +/// The collections to select in the second round +const std::vector roundTripSelection = {"hits", "userInts"}; + +/// Write a new file containing only a few selected collections and only one +/// event +template +void writeSelectedFile(const std::string& originalFile, const std::string& outputFile) { + auto reader = ReaderT(); + reader.openFile(originalFile); + const auto event = podio::Frame(reader.readEntry("events", 0, collectionSelection)); + + auto writer = WriterT(outputFile); + writer.writeFrame(event, "events"); +} + +bool compareUnordered(std::vector lhs, std::vector rhs) { + std::ranges::sort(lhs); + std::ranges::sort(rhs); + return std::ranges::equal(lhs, rhs); +} + +std::ostream& operator<<(std::ostream& os, const std::vector& vec) { + os << "["; + std::string delim = ""; + for (const auto& v : vec) { + os << std::exchange(delim, ", ") << v; + } + return os << "]"; +} + +/// Read the file that has been created and check all available collections +template +int readSelectedFileFull(const std::string& filename) { + auto reader = ReaderT(); + reader.openFile(filename); + + const auto event = podio::Frame(reader.readEntry("events", 0)); + + if (!compareUnordered(event.getAvailableCollections(), collectionSelection)) { + std::cerr + << "Collection names that are available from the selected collections file are not as expected (expected: " + << collectionSelection << ", actual " << event.getAvailableCollections() << ")" << std::endl; + return 1; + } + + return 0; +} + +/// Read the file that has been created and check whether selecting on that file +/// again also works +template +int readSelectedFilePartial(const std::string& filename) { + auto reader = ReaderT(); + reader.openFile(filename); + + const auto event = podio::Frame(reader.readEntry("events", 0, roundTripSelection)); + + if (!compareUnordered(event.getAvailableCollections(), roundTripSelection)) { + std::cerr + << "Collection names that are available from the selected collections file are not as expected (expected: " + << roundTripSelection << ", actual " << event.getAvailableCollections() << ")" << std::endl; + return 1; + } + + return 0; +} + +template +int do_roundtrip(const std::string& originalFile, const std::string& outputFile) { + writeSelectedFile(originalFile, outputFile); + + return readSelectedFileFull(outputFile) + readSelectedFilePartial(outputFile); +} diff --git a/tests/sio_io/CMakeLists.txt b/tests/sio_io/CMakeLists.txt index 7f53917fe..3a2432ab7 100644 --- a/tests/sio_io/CMakeLists.txt +++ b/tests/sio_io/CMakeLists.txt @@ -5,6 +5,7 @@ set(sio_dependent_tests read_python_frame_sio.cpp write_interface_sio.cpp read_interface_sio.cpp + selected_colls_roundtrip_sio.cpp ) set(sio_libs podio::podioSioIO podio::podioIO) foreach( sourcefile ${sio_dependent_tests} ) @@ -14,6 +15,7 @@ endforeach() set_tests_properties( read_frame_sio read_and_write_frame_sio + selected_colls_roundtrip_sio PROPERTIES DEPENDS diff --git a/tests/sio_io/selected_colls_roundtrip_sio.cpp b/tests/sio_io/selected_colls_roundtrip_sio.cpp new file mode 100644 index 000000000..4ecbffda4 --- /dev/null +++ b/tests/sio_io/selected_colls_roundtrip_sio.cpp @@ -0,0 +1,8 @@ +#include "selected_colls_roundtrip.h" + +#include "podio/SIOReader.h" +#include "podio/SIOWriter.h" + +int main() { + return do_roundtrip("example_frame.sio", "selected_example_frame.sio"); +} From 2656841eba21d7ef3905e2cf34651ad9a330e44e Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 16 Jan 2025 20:52:51 +0100 Subject: [PATCH 17/26] Add content checks for roundtripped files with dropped collections --- tests/selected_colls_roundtrip.h | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/tests/selected_colls_roundtrip.h b/tests/selected_colls_roundtrip.h index 210349997..8a71e74c3 100644 --- a/tests/selected_colls_roundtrip.h +++ b/tests/selected_colls_roundtrip.h @@ -1,3 +1,5 @@ +#include "read_test.h" + #include #include @@ -24,10 +26,13 @@ /// be called by backend specific tests /// The collection names that will be written into the new file -const std::vector collectionSelection = {"mcparticles", "links", "userInts", "hits"}; +const std::vector collectionSelection = {"mcparticles", "links", "userInts", "hits", "clusters"}; /// The collections to select in the second round -const std::vector roundTripSelection = {"hits", "userInts"}; +const std::vector roundTripSelection = {"hits", "userInts", "mcparticles"}; + +/// Collections that were available originally and shouldn't be any longer +const std::vector droppedCollections = {"moreMCs", "userDoubles", "info", "refs", "hitRefs"}; /// Write a new file containing only a few selected collections and only one /// event @@ -71,6 +76,21 @@ int readSelectedFileFull(const std::string& filename) { return 1; } + for (const auto& name : droppedCollections) { + if (event.get(name)) { + std::cerr << "The frame contained a dropped collection: " << name << std::endl; + return 1; + } + } + + checkMCParticleCollection(event, reader.currentFileVersion()); + checkHitCollection(event, 0); + const auto& hits = event.get("hits"); + checkClusterCollection(event, hits); + const auto& clusters = event.get("clusters"); + checkIntUserDataCollection(event, 0); + checkLinkCollection(event, hits, clusters); + return 0; } @@ -90,6 +110,10 @@ int readSelectedFilePartial(const std::string& filename) { return 1; } + checkMCParticleCollection(event, reader.currentFileVersion()); + checkHitCollection(event, 0); + checkIntUserDataCollection(event, 0); + return 0; } From 8f20608ca24c8c8fe64075a81ccdf199f54c2cce Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 20 Jan 2025 12:03:21 +0100 Subject: [PATCH 18/26] Add include guards --- tests/selected_colls_roundtrip.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/selected_colls_roundtrip.h b/tests/selected_colls_roundtrip.h index 8a71e74c3..6a99b7053 100644 --- a/tests/selected_colls_roundtrip.h +++ b/tests/selected_colls_roundtrip.h @@ -1,3 +1,6 @@ +#ifndef PODIO_TESTS_SELECTED_COLLS_ROUNDTRIP_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TESTS_SELECTED_COLLS_ROUNDTRIP_H // NOLINT(llvm-header-guard): folder structure not suitable + #include "read_test.h" #include @@ -123,3 +126,5 @@ int do_roundtrip(const std::string& originalFile, const std::string& outputFile) return readSelectedFileFull(outputFile) + readSelectedFilePartial(outputFile); } + +#endif // PODIO_TESTS_SELECTED_COLLS_ROUNDTRIP_H From fb4abf599e6d878418d6b9b73bc7298b8f8d2d21 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 20 Jan 2025 12:03:29 +0100 Subject: [PATCH 19/26] Improve formatting --- tests/read_frame.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/read_frame.h b/tests/read_frame.h index 40f4051e6..d1e9e031c 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -183,13 +183,13 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { } if (reader.getEntries(podio::Category::Event) != 10) { - std::cerr << "Could not read back the number of events correctly. " << "(expected:" << 10 + std::cerr << "Could not read back the number of events correctly. (expected: " << 10 << ", actual: " << reader.getEntries(podio::Category::Event) << ")" << std::endl; return 1; } if (reader.getEntries(podio::Category::Event) != reader.getEntries("other_events")) { - std::cerr << "Could not read back the number of events correctly. " << "(expected:" << 10 + std::cerr << "Could not read back the number of events correctly. (expected: " << 10 << ", actual: " << reader.getEntries("other_events") << ")" << std::endl; return 1; } From 4ac4b12a0b728a69d35e7b4afb9d2f7ad1fa5c28 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 20 Jan 2025 12:16:41 +0100 Subject: [PATCH 20/26] Don't run tests that miss inputs for sanitizers --- tests/CTestCustom.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index bce6fc5fd..dc10192ab 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -23,6 +23,7 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ write_frame_root read_frame_root + selected_colls_roundtrip_root write_interface_root read_interface_root @@ -96,6 +97,7 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ read_rntuple read_interface_rntuple + selected_colls_roundtrip_rntuple ) endif() @@ -107,6 +109,7 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ read_rntuple write_interface_rntuple read_interface_rntuple + selected_colls_roundtrip_rntuple ) endif() From 6b048f999f300fbf913db558b4abea63b5fff2f7 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 30 Jan 2025 14:09:38 +0100 Subject: [PATCH 21/26] Remove double return --- tests/root_io/read_interface_rntuple.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/root_io/read_interface_rntuple.cpp b/tests/root_io/read_interface_rntuple.cpp index b5188203d..ebe47547c 100644 --- a/tests/root_io/read_interface_rntuple.cpp +++ b/tests/root_io/read_interface_rntuple.cpp @@ -4,6 +4,4 @@ int main(int, char**) { auto reader = podio::makeReader("example_from_rntuple_interface.root"); return read_frames(reader) + test_read_frame_limited(reader); - - return 0; } From 933fd8f75efe3f6b4e08b7dd2915765efebb5dde Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 30 Jan 2025 14:10:51 +0100 Subject: [PATCH 22/26] Bump minimum ROOT version for RNTuple support --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7876e4067..5c480736f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,13 +82,15 @@ option(ENABLE_JULIA "Enable Julia support. When enabled, Julia datamodels w list(APPEND CMAKE_PREFIX_PATH $ENV{ROOTSYS}) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) set(root_components_needed RIO Tree) +set(root_min_version 6.28.04) if(ENABLE_RNTUPLE) list(APPEND root_components_needed ROOTNTuple) + set(root_min_version 6.32) endif() if(ENABLE_DATASOURCE) list(APPEND root_components_needed ROOTDataFrame) endif() -find_package(ROOT 6.28.04 REQUIRED COMPONENTS ${root_components_needed}) +find_package(ROOT ${root_min_version} REQUIRED COMPONENTS ${root_components_needed}) # ROOT_CXX_STANDARD was introduced in https://github.com/root-project/root/pull/6466 # before that it's an empty variable so we check if it's any number > 0 From 8ce3a7aa2849cfb6ec3a8cc125d762bd1e5cf1f2 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 30 Jan 2025 14:26:51 +0100 Subject: [PATCH 23/26] Remove no longer necessary special casing --- include/podio/RNTupleReader.h | 4 +--- include/podio/RNTupleWriter.h | 4 +--- src/RNTupleReader.cc | 20 -------------------- src/RNTupleWriter.cc | 29 ----------------------------- 4 files changed, 2 insertions(+), 55 deletions(-) diff --git a/include/podio/RNTupleReader.h b/include/podio/RNTupleReader.h index 672df4b58..4a056f4f6 100644 --- a/include/podio/RNTupleReader.h +++ b/include/podio/RNTupleReader.h @@ -12,10 +12,8 @@ #include #include +#include #include -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) - #include -#endif namespace podio { diff --git a/include/podio/RNTupleWriter.h b/include/podio/RNTupleWriter.h index b57eb09b3..49a1a128e 100644 --- a/include/podio/RNTupleWriter.h +++ b/include/podio/RNTupleWriter.h @@ -9,9 +9,7 @@ #include "TFile.h" #include #include -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) - #include -#endif +#include #include #include diff --git a/src/RNTupleReader.cc b/src/RNTupleReader.cc index 1d75fa0f9..5e935b053 100644 --- a/src/RNTupleReader.cc +++ b/src/RNTupleReader.cc @@ -178,15 +178,11 @@ std::unique_ptr RNTupleReader::readEntry(const std::string& categ auto readerIndex = upper - 1 - m_readerEntries[category].begin(); ROOTFrameData::BufferMap buffers; -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) // We need to create a non-bare entry here, because the entries for the // parameters are not explicitly (re)set and we need them default initialized. // In principle we would only need a bare entry for the collection data, since // we set all the fields there in any case. auto dentry = m_readers[category][readerIndex]->GetModel().CreateEntry(); -#else - auto dentry = m_readers[category][readerIndex]->GetModel()->GetDefaultEntry(); -#endif for (size_t i = 0; i < collInfo.id.size(); ++i) { if (!collsToRead.empty() && std::ranges::find(collsToRead, collInfo.name[i]) == collsToRead.end()) { @@ -207,40 +203,24 @@ std::unique_ptr RNTupleReader::readEntry(const std::string& categ if (collInfo.isSubsetCollection[i]) { auto brName = root_utils::subsetBranch(collInfo.name[i]); auto vec = new std::vector; -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) dentry->BindRawPtr(brName, vec); -#else - dentry->CaptureValueUnsafe(brName, vec); -#endif collBuffers.references->at(0) = std::unique_ptr>(vec); } else { -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) dentry->BindRawPtr(collInfo.name[i], collBuffers.data); -#else - dentry->CaptureValueUnsafe(collInfo.name[i], collBuffers.data); -#endif const auto relVecNames = podio::DatamodelRegistry::instance().getRelationNames(collType); for (size_t j = 0; j < relVecNames.relations.size(); ++j) { const auto relName = relVecNames.relations[j]; auto vec = new std::vector; const auto brName = root_utils::refBranch(collInfo.name[i], relName); -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) dentry->BindRawPtr(brName, vec); -#else - dentry->CaptureValueUnsafe(brName, vec); -#endif collBuffers.references->at(j) = std::unique_ptr>(vec); } for (size_t j = 0; j < relVecNames.vectorMembers.size(); ++j) { const auto vecName = relVecNames.vectorMembers[j]; const auto brName = root_utils::vecBranch(collInfo.name[i], vecName); -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) dentry->BindRawPtr(brName, collBuffers.vectorMembers->at(j).second); -#else - dentry->CaptureValueUnsafe(brName, collBuffers.vectorMembers->at(j).second); -#endif } } diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index 73c5db22d..4027079c2 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -41,13 +41,8 @@ void RNTupleWriter::fillParams(const GenericParameters& params, CategoryInfo& ca ROOT::Experimental::REntry* entry) { auto& paramStorage = getParamStorage(catInfo); paramStorage = params.getKeysAndValues(); -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) entry->BindRawPtr(root_utils::getGPKeyName(), ¶mStorage.keys); entry->BindRawPtr(root_utils::getGPValueName(), ¶mStorage.values); -#else - entry->CaptureValueUnsafe(root_utils::getGPKeyName(), ¶mStorage.keys); - entry->CaptureValueUnsafe(root_utils::getGPValueName(), ¶mStorage.values); -#endif } void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& category) { @@ -99,11 +94,7 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat } } -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) auto entry = m_categories[category].writer->GetModel().CreateBareEntry(); -#else - auto entry = m_categories[category].writer->GetModel()->CreateBareEntry(); -#endif ROOT::Experimental::RNTupleWriteOptions options; options.SetCompression(ROOT::RCompressionSetting::EDefaults::kUseGeneralPurpose); @@ -111,21 +102,13 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat for (const auto& [name, coll] : collections) { auto collBuffers = coll->getBuffers(); if (collBuffers.vecPtr) { -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) entry->BindRawPtr(name, static_cast(collBuffers.vecPtr)); -#else - entry->CaptureValueUnsafe(name, static_cast(collBuffers.vecPtr)); -#endif } if (coll->isSubsetCollection()) { auto& refColl = (*collBuffers.references)[0]; const auto brName = root_utils::subsetBranch(name); -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) entry->BindRawPtr(brName, refColl.get()); -#else - entry->CaptureValueUnsafe(brName, refColl.get()); -#endif } else { @@ -134,11 +117,7 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat int i = 0; for (auto& c : (*refColls)) { const auto brName = root_utils::refBranch(name, relVecNames.relations[i]); -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) entry->BindRawPtr(brName, c.get()); -#else - entry->CaptureValueUnsafe(brName, c.get()); -#endif ++i; } } @@ -149,11 +128,7 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat const auto typeName = "vector<" + type + ">"; const auto brName = root_utils::vecBranch(name, relVecNames.vectorMembers[i]); auto ptr = *static_cast**>(vec); -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) entry->BindRawPtr(brName, ptr); -#else - entry->CaptureValueUnsafe(brName, ptr); -#endif ++i; } } @@ -177,11 +152,7 @@ std::unique_ptr RNTupleWriter::createModels(const std::vector& collections) { auto model = ROOT::Experimental::RNTupleModel::CreateBare(); -#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) using ROOT::Experimental::RFieldBase; -#else - using ROOT::Experimental::Detail::RFieldBase; -#endif for (auto& [name, coll] : collections) { // For the first entry in each category we also record the datamodel From ea4b3cdb31a66c570dd7e27210b7c2f1aa127648 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Fri, 31 Jan 2025 12:01:06 +0100 Subject: [PATCH 24/26] Add possibility to select colections to python API --- include/podio/ROOTLegacyReader.h | 11 +++++++---- include/podio/SIOLegacyReader.h | 11 +++++++---- python/podio/base_reader.py | 8 ++++++-- python/podio/frame_iterator.py | 18 +++++++++++++----- python/podio/test_Reader.py | 18 ++++++++++++++++++ src/ROOTLegacyReader.cc | 6 ++++-- src/SIOLegacyReader.cc | 5 +++-- 7 files changed, 58 insertions(+), 19 deletions(-) diff --git a/include/podio/ROOTLegacyReader.h b/include/podio/ROOTLegacyReader.h index 9383ca51b..9693bfdb0 100644 --- a/include/podio/ROOTLegacyReader.h +++ b/include/podio/ROOTLegacyReader.h @@ -75,20 +75,23 @@ class ROOTLegacyReader { /// Read the next data entry from which a Frame can be constructed. /// /// @note the category name has to be "events" in this case, as only that - /// category is available for legacy files. + /// category is available for legacy files. Also the collections to read + /// argument will be ignored. /// /// @returns FrameData from which a podio::Frame can be constructed if there /// are still entries left to read. Otherwise a nullptr - std::unique_ptr readNextEntry(const std::string&); + std::unique_ptr readNextEntry(const std::string&, const std::vector& = {}); /// Read the desired data entry from which a Frame can be constructed. /// /// @note the category name has to be "events" in this case, as only that - /// category is available for legacy files. + /// category is available for legacy files. Also the collections to read + /// argument will be ignored. /// /// @returns FrameData from which a podio::Frame can be constructed if the /// desired entry exists. Otherwise a nullptr - std::unique_ptr readEntry(const std::string&, const unsigned entry); + std::unique_ptr readEntry(const std::string&, const unsigned entry, + const std::vector& = {}); /// Get the number of entries for the given name /// diff --git a/include/podio/SIOLegacyReader.h b/include/podio/SIOLegacyReader.h index d490fcd5e..8ea86627e 100644 --- a/include/podio/SIOLegacyReader.h +++ b/include/podio/SIOLegacyReader.h @@ -40,20 +40,23 @@ class SIOLegacyReader { /// there are no more entries left, this returns a nullptr. /// /// @note the category name has to be "events" in this case, as only that - /// category is available for legacy files. + /// category is available for legacy files. Also the collections to read + /// argument will be ignored. /// /// @returns FrameData from which a podio::Frame can be constructed if there /// are still entries left to read. Otherwise a nullptr - std::unique_ptr readNextEntry(const std::string&); + std::unique_ptr readNextEntry(const std::string&, const std::vector& = {}); /// Read the desired data entry from which a Frame can be constructed. /// /// @note the category name has to be "events" in this case, as only that - /// category is available for legacy files. + /// category is available for legacy files. Also the collections to read + /// argument will be ignored. /// /// @returns FrameData from which a podio::Frame can be constructed if the /// desired entry exists. Otherwise a nullptr - std::unique_ptr readEntry(const std::string&, const unsigned entry); + std::unique_ptr readEntry(const std::string&, const unsigned entry, + const std::vector& = {}); /// Get the number of entries for the given name /// diff --git a/python/podio/base_reader.py b/python/podio/base_reader.py index e078aa4ad..d834c1922 100644 --- a/python/podio/base_reader.py +++ b/python/podio/base_reader.py @@ -35,17 +35,21 @@ def categories(self): """ return self._categories - def get(self, category): + def get(self, category, coll_names=None): """Get an iterator with access functionality for a given category. Args: category (str): The name of the desired category + coll_names (list[str]): The list of collections to read (optional, + all available collections will by default) Returns: FrameCategoryIterator: The iterator granting access to all Frames of the desired category """ - return FrameCategoryIterator(self._reader, category) + if self.is_legacy and coll_names: + raise ValueError("Legacy readers do not support selective reading") + return FrameCategoryIterator(self._reader, category, coll_names) @property def is_legacy(self): diff --git a/python/podio/frame_iterator.py b/python/podio/frame_iterator.py index 1fa97d828..2ba11960b 100644 --- a/python/podio/frame_iterator.py +++ b/python/podio/frame_iterator.py @@ -11,15 +11,18 @@ class FrameCategoryIterator: reader as well as accessing specific entries """ - def __init__(self, reader, category): + def __init__(self, reader, category, coll_names=None): """Construct the iterator from the reader and the category. Args: reader (Reader): Any podio reader offering access to Frames category (str): The category name of the Frames to be iterated over + coll_names (list[str]): The list of collections to read (optional, + all available collections will by default) """ self._reader = reader self._category = category + self._coll_names = coll_names or [] def __iter__(self): """The trivial implementation for the iterator protocol.""" @@ -27,9 +30,12 @@ def __iter__(self): def __next__(self): """Get the next available Frame or stop.""" - frame_data = self._reader.readNextEntry(self._category) - if frame_data: - return Frame(std.move(frame_data)) + try: + frame_data = self._reader.readNextEntry(self._category, self._coll_names) + if frame_data: + return Frame(std.move(frame_data)) + except std.invalid_argument as e: + raise ValueError(e.what()) from e raise StopIteration @@ -52,7 +58,7 @@ def __getitem__(self, entry): raise IndexError try: - frame_data = self._reader.readEntry(self._category, entry) + frame_data = self._reader.readEntry(self._category, entry, self._coll_names) except std.bad_function_call: print( "Error: Unable to read an entry of the input file. This can " @@ -62,6 +68,8 @@ def __getitem__(self, entry): "library folder with your data model\n" ) raise + except std.invalid_argument as e: + raise ValueError(e.what()) from e if frame_data: return Frame(std.move(frame_data)) diff --git a/python/podio/test_Reader.py b/python/podio/test_Reader.py index d6fc90640..24fb39a06 100644 --- a/python/podio/test_Reader.py +++ b/python/podio/test_Reader.py @@ -89,6 +89,24 @@ def test_invalid_datamodel_version(self): with self.assertRaises(KeyError): self.reader.current_file_version("non-existant-model") + def test_limited_collections(self): + """Make sure only reading a subset of collections works""" + # We only do bare checks here as more extensive tests are already done + # on the c++ side + event = self.reader.get("events", ["hits", "info", "links"])[0] + self.assertEqual(set(event.getAvailableCollections()), {"hits", "info", "links"}) + + def test_invalid_limited_collections(self): + """Ensure that requesting non existant collections raises a value error""" + with self.assertRaises(ValueError): + events = self.reader.get("events", ["non-existent-collection"]) + _ = events[0] + + with self.assertRaises(ValueError): + events = self.reader.get("events", ["non-existent-collection"]) + for _ in events: + pass + class LegacyReaderTestCaseMixin: """Common test cases for the legacy readers python bindings. diff --git a/src/ROOTLegacyReader.cc b/src/ROOTLegacyReader.cc index 87f7a4e1b..d94303aa8 100644 --- a/src/ROOTLegacyReader.cc +++ b/src/ROOTLegacyReader.cc @@ -17,14 +17,16 @@ namespace podio { -std::unique_ptr ROOTLegacyReader::readNextEntry(const std::string& name) { +std::unique_ptr ROOTLegacyReader::readNextEntry(const std::string& name, + const std::vector&) { if (name != m_categoryName) { return nullptr; } return readEntry(); } -std::unique_ptr ROOTLegacyReader::readEntry(const std::string& name, unsigned entry) { +std::unique_ptr ROOTLegacyReader::readEntry(const std::string& name, unsigned entry, + const std::vector&) { if (name != m_categoryName) { return nullptr; } diff --git a/src/SIOLegacyReader.cc b/src/SIOLegacyReader.cc index dcf3d9d18..dae37b964 100644 --- a/src/SIOLegacyReader.cc +++ b/src/SIOLegacyReader.cc @@ -23,7 +23,7 @@ void SIOLegacyReader::openFile(const std::string& filename) { readCollectionIDTable(); } -std::unique_ptr SIOLegacyReader::readNextEntry(const std::string& name) { +std::unique_ptr SIOLegacyReader::readNextEntry(const std::string& name, const std::vector&) { if (name != m_categoryName) { return nullptr; } @@ -47,7 +47,8 @@ std::unique_ptr SIOLegacyReader::readNextEntry(const std::string& m_tableUncLength); } -std::unique_ptr SIOLegacyReader::readEntry(const std::string& name, const unsigned entry) { +std::unique_ptr SIOLegacyReader::readEntry(const std::string& name, const unsigned entry, + const std::vector&) { if (name != m_categoryName) { return nullptr; } From 51ab49d786d242cfe37687bca926b663ee3efb0d Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Fri, 31 Jan 2025 17:56:16 +0100 Subject: [PATCH 25/26] Update readme to reflect new ROOT requirements --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3bf5ab347..3e385216a 100644 --- a/README.md +++ b/README.md @@ -18,12 +18,14 @@ use a recent LCG or Key4hep stack release. On Mac OS or Ubuntu, you need to install the following software. -### ROOT 6.28.04 +### ROOT 6.28.04 (6.32 for RNTuple support) Install ROOT 6.28.04 (or later) built with c++20 support and set up your ROOT environment: source /bin/thisroot.sh +If you want to build with RNTuple support, you will need 6.32 at least. + ### Catch2 v3 (optional) Podio uses [Catch2 v3](https://github.com/catchorg/Catch2/tree/devel) for some From 8c4152f040239a46356319a0ea71a1864d4d7267 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 3 Feb 2025 21:07:34 +0100 Subject: [PATCH 26/26] Add possibility to limit collections to RDataSource --- include/podio/DataSource.h | 30 ++++++++++++++--- src/DataSource.cc | 23 +++++++------ tests/root_io/read_with_rdatasource_root.cpp | 34 ++++++++++++++++++++ 3 files changed, 72 insertions(+), 15 deletions(-) diff --git a/include/podio/DataSource.h b/include/podio/DataSource.h index ba00fe4e4..d13f78487 100644 --- a/include/podio/DataSource.h +++ b/include/podio/DataSource.h @@ -23,12 +23,25 @@ class DataSource : public ROOT::RDF::RDataSource { /// /// @brief Construct the podio::DataSource from the provided file. /// - explicit DataSource(const std::string& filePath, int nEvents = -1); + /// @param filePath Path to the file that should be read + /// @param nEvents Number of events to process (optional, defaults to -1 for + /// all events) + /// @param collsToRead The collections that should be made available (optional, + /// defaults to empty vector for all collections) + /// + explicit DataSource(const std::string& filePath, int nEvents = -1, const std::vector& collsToRead = {}); /// /// @brief Construct the podio::DataSource from the provided file list. /// - explicit DataSource(const std::vector& filePathList, int nEvents = -1); + /// @param filePathList Paths to the files that should be read + /// @param nEvents Number of events to process (optional, defaults to -1 for + /// all events) + /// @param collsToRead The collections that should be made available (optional, + /// defaults to empty vector for all collections) + /// + explicit DataSource(const std::vector& filePathList, int nEvents = -1, + const std::vector& collsToRead = {}); /// /// @brief Inform the podio::DataSource of the desired level of parallelism. @@ -139,7 +152,7 @@ class DataSource : public ROOT::RDF::RDataSource { /// /// @param[in] nEvents Number of events. /// - void SetupInput(int nEvents); + void SetupInput(int nEvents, const std::vector& collsToRead); }; /// @@ -147,17 +160,24 @@ class DataSource : public ROOT::RDF::RDataSource { /// /// @param[in] filePathList List of file paths from which the RDataFrame /// will be created. +/// @param[in] collsToRead List of collection names that should be made +/// available +/// /// @return RDataFrame created from input file list. /// -ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList); +ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList, + const std::vector& collsToRead = {}); /// /// @brief Create RDataFrame from a Podio file. /// /// @param[in] filePath File path from which the RDataFrame will be created. +/// @param[in] collsToRead List of collection names that should be made +/// available +/// /// @return RDataFrame created from input file list. /// -ROOT::RDataFrame CreateDataFrame(const std::string& filePath); +ROOT::RDataFrame CreateDataFrame(const std::string& filePath, const std::vector& collsToRead = {}); } // namespace podio #endif /* PODIO_DATASOURCE_H */ diff --git a/src/DataSource.cc b/src/DataSource.cc index 4b8fac0af..05334a2bd 100644 --- a/src/DataSource.cc +++ b/src/DataSource.cc @@ -13,17 +13,19 @@ #include namespace podio { -DataSource::DataSource(const std::string& filePath, int nEvents) : m_nSlots{1} { +DataSource::DataSource(const std::string& filePath, int nEvents, const std::vector& collsToRead) : + m_nSlots{1} { m_filePathList.emplace_back(filePath); - SetupInput(nEvents); + SetupInput(nEvents, collsToRead); } -DataSource::DataSource(const std::vector& filePathList, int nEvents) : +DataSource::DataSource(const std::vector& filePathList, int nEvents, + const std::vector& collNames) : m_nSlots{1}, m_filePathList{filePathList} { - SetupInput(nEvents); + SetupInput(nEvents, collNames); } -void DataSource::SetupInput(int nEvents) { +void DataSource::SetupInput(int nEvents, const std::vector& collsToRead) { if (m_filePathList.empty()) { throw std::runtime_error("podio::DataSource: No input files provided!"); } @@ -36,7 +38,7 @@ void DataSource::SetupInput(int nEvents) { unsigned int nEventsInFiles = 0; auto podioReader = podio::makeReader(m_filePathList); nEventsInFiles = podioReader.getEntries(podio::Category::Event); - frame = podioReader.readFrame(podio::Category::Event, 0); + frame = podioReader.readFrame(podio::Category::Event, 0, collsToRead); // Determine over how many events to run if (nEventsInFiles <= 0) { @@ -173,14 +175,15 @@ std::string DataSource::GetTypeName(std::string_view columnName) const { return m_columnTypes.at(typeIndex); } -ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList) { - ROOT::RDataFrame rdf(std::make_unique(filePathList)); +ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList, + const std::vector& collsToRead) { + ROOT::RDataFrame rdf(std::make_unique(filePathList, -1, collsToRead)); return rdf; } -ROOT::RDataFrame CreateDataFrame(const std::string& filePath) { - ROOT::RDataFrame rdf(std::make_unique(filePath)); +ROOT::RDataFrame CreateDataFrame(const std::string& filePath, const std::vector& collsToRead) { + ROOT::RDataFrame rdf(std::make_unique(filePath, -1, collsToRead)); return rdf; } diff --git a/tests/root_io/read_with_rdatasource_root.cpp b/tests/root_io/read_with_rdatasource_root.cpp index 29bad4319..7ef70a4ed 100644 --- a/tests/root_io/read_with_rdatasource_root.cpp +++ b/tests/root_io/read_with_rdatasource_root.cpp @@ -1,6 +1,8 @@ #include "datamodel/ExampleClusterCollection.h" #include "podio/DataSource.h" +#include "podio/Reader.h" +#include #include #include @@ -28,8 +30,40 @@ int main(int argc, const char* argv[]) { dframe.Describe().Print(); std::cout << std::endl; + const auto expectedCollNames = [&inputFile]() { + auto reader = podio::makeReader(inputFile); + auto cols = reader.readNextEvent().getAvailableCollections(); + std::ranges::sort(cols); + return cols; + }(); + const auto allColNames = [&dframe]() { + auto cols = dframe.GetColumnNames(); + std::ranges::sort(cols); + return cols; + }(); + + if (!std::ranges::equal(expectedCollNames, allColNames)) { + std::cerr << "Column names are note as expected\n expected: ["; + for (const auto& name : expectedCollNames) { + std::cerr << name << " "; + } + std::cerr << "]\n actual: ["; + for (const auto& name : allColNames) { + std::cerr << name << " "; + } + std::cerr << "]" << std::endl; + + return EXIT_FAILURE; + } + auto cluterEnergy = dframe.Define("cluster_energy", getEnergy, {"clusters"}).Histo1D("cluster_energy"); cluterEnergy->Print(); + dframe = podio::CreateDataFrame(inputFile, {"hits"}); + if (dframe.GetColumnNames()[0] != "hits") { + std::cerr << "Limiting to only one collection didn't work as expected" << std::endl; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; }