diff --git a/include/podio/SIOFrameData.h b/include/podio/SIOFrameData.h index dd2dddb20..25ac7120c 100644 --- a/include/podio/SIOFrameData.h +++ b/include/podio/SIOFrameData.h @@ -35,11 +35,13 @@ class SIOFrameData { /// tableBuffer containing the necessary information for unpacking the /// collections. The two size parameters denote the uncompressed size of the /// respective buffers. - SIOFrameData(sio::buffer&& collBuffers, std::size_t dataSize, sio::buffer&& tableBuffer, std::size_t tableSize) : + SIOFrameData(sio::buffer&& collBuffers, std::size_t dataSize, sio::buffer&& tableBuffer, std::size_t tableSize, + std::vector limitColls = {}) : m_recBuffer(std::move(collBuffers)), m_tableBuffer(std::move(tableBuffer)), m_dataSize(dataSize), - m_tableSize(tableSize) { + m_tableSize(tableSize), + m_limitColls(std::move(limitColls)) { } std::optional getCollectionBuffers(const std::string& name); @@ -79,6 +81,10 @@ class SIOFrameData { std::vector m_subsetCollectionBits{}; podio::GenericParameters m_parameters{}; + + /// The collections that should be made available for a Frame constructed from + /// this (if non-empty) + std::vector m_limitColls{}; }; } // namespace podio diff --git a/include/podio/SIOReader.h b/include/podio/SIOReader.h index f9f5ba02a..62585e1df 100644 --- a/include/podio/SIOReader.h +++ b/include/podio/SIOReader.h @@ -37,21 +37,39 @@ class SIOReader { /// Read the next data entry for a given category. /// + /// @note Given how the SIO files are currently layed out it is in fact not + /// possible to only read a subset of a Frame. Rather the subset of + /// collections to read will be an artificial limit on the returned + /// SIOFrameData. Limiting the collections to read will not improve I/O + /// performance. + /// /// @param name The category name for which to read the next entry + /// @param collsToRead (optional) the collection names that should be read. If + /// not provided (or empty) all collections will be read /// /// @returns FrameData from which a podio::Frame can be constructed if the /// category exists and if there are still entries left to read. /// Otherwise a nullptr - std::unique_ptr readNextEntry(const std::string& name); + std::unique_ptr readNextEntry(const std::string& name, + const std::vector& collsToRead = {}); /// Read the desired data entry for a given category. /// + /// @note Given how the SIO files are currently layed out it is in fact not + /// possible to only read a subset of a Frame. Rather the subset of + /// collections to read will be an artificial limit on the returned + /// SIOFrameData. Limiting the collections to read will not improve I/O + /// performance. + /// /// @param name The category name for which to read the next entry /// @param entry The entry number to read + /// @param collsToRead (optional) the collection names that should be read. If + /// not provided (or empty) all collections will be read /// /// @returns FrameData from which a podio::Frame can be constructed if the /// category and the desired entry exist. Otherwise a nullptr - std::unique_ptr readEntry(const std::string& name, const unsigned entry); + std::unique_ptr readEntry(const std::string& name, const unsigned entry, + const std::vector& collsToRead = {}); /// Get the number of entries for the given name /// diff --git a/src/SIOFrameData.cc b/src/SIOFrameData.cc index 5615463a0..28c78acef 100644 --- a/src/SIOFrameData.cc +++ b/src/SIOFrameData.cc @@ -18,6 +18,10 @@ std::optional SIOFrameData::getCollectionBuffers(c const auto nameIt = std::find(std::begin(names), std::end(names), name); // collection indices start at 1! const auto index = std::distance(std::begin(names), nameIt) + 1; + // This collection is not available (artificially!) + if (m_availableBlocks[index] == 0) { + return std::nullopt; + } // Mark this block as consumed m_availableBlocks[index] = 0; @@ -38,11 +42,8 @@ std::vector SIOFrameData::getAvailableCollections() { std::vector collections; for (size_t i = 1; i < m_blocks.size(); ++i) { if (m_availableBlocks[i]) { - // We have to get the collID of this collection in the idTable as there is - // no guarantee that it coincides with the index in the blocks. - // Additionally, collection indices start at 1 - const auto collID = m_idTable.ids()[i - 1]; - collections.push_back(m_idTable.name(collID).value()); + const auto name = m_idTable.names()[i - 1]; + collections.push_back(name); } } @@ -67,6 +68,22 @@ void SIOFrameData::unpackBuffers() { sio::buffer uncBuffer{m_dataSize}; compressor.uncompress(m_recBuffer.span(), uncBuffer); sio::api::read_blocks(uncBuffer.span(), m_blocks); + + if (m_limitColls.empty()) { + return; + } + + // In order to save on memory and to not litter the rest of the implementation + // with similar checks, we immediately throw away all collections that should + // not become available + for (size_t i = 1; i < m_blocks.size(); ++i) { + const auto name = m_idTable.names()[i - 1]; + if (std::ranges::find(m_limitColls, name) == m_limitColls.end()) { + auto buffers = dynamic_cast(m_blocks[i].get())->getBuffers(); + buffers.deleteBuffers(buffers); + m_availableBlocks[i] = 0; + } + } } void SIOFrameData::createBlocks() { diff --git a/src/SIOReader.cc b/src/SIOReader.cc index a54e5670b..4d9147b6f 100644 --- a/src/SIOReader.cc +++ b/src/SIOReader.cc @@ -26,7 +26,8 @@ void SIOReader::openFile(const std::string& filename) { readEDMDefinitions(); // Potentially could do this lazily } -std::unique_ptr SIOReader::readNextEntry(const std::string& name) { +std::unique_ptr SIOReader::readNextEntry(const std::string& name, + const std::vector& collsToRead) { // Skip to where the next record of this name starts in the file, based on // how many times we have already read this name // @@ -44,14 +45,15 @@ std::unique_ptr SIOReader::readNextEntry(const std::string& name) m_nameCtr[name]++; return std::make_unique(std::move(dataBuffer), dataInfo._uncompressed_length, std::move(tableBuffer), - tableInfo._uncompressed_length); + tableInfo._uncompressed_length, collsToRead); } -std::unique_ptr SIOReader::readEntry(const std::string& name, const unsigned entry) { +std::unique_ptr SIOReader::readEntry(const std::string& name, const unsigned entry, + const std::vector& collsToRead) { // NOTE: Will create or overwrite the entry counter // All checks are done in the following function m_nameCtr[name] = entry; - return readNextEntry(name); + return readNextEntry(name, collsToRead); } std::vector SIOReader::getAvailableCategories() const { diff --git a/tests/sio_io/read_frame_sio.cpp b/tests/sio_io/read_frame_sio.cpp index 8a27743ac..34bad94d8 100644 --- a/tests/sio_io/read_frame_sio.cpp +++ b/tests/sio_io/read_frame_sio.cpp @@ -12,5 +12,5 @@ int main(int argc, char* argv[]) { } return read_frames(inputFile, assertBuildVersion) + - test_frame_aux_info(inputFile); + test_frame_aux_info(inputFile) + test_read_frame_limited(inputFile); }