Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added RNTuple read/write prototype #47402

Open
wants to merge 1 commit into
base: CMSSW_15_1_RNTUPLE_X
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions FWIO/RNTuple/bin/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<bin name="edmRntupleStorage" file="edmRntupleStorage.cc">
<use name="rootntuple"/>
<use name="boost_program_options"/>
</bin>
152 changes: 152 additions & 0 deletions FWIO/RNTuple/bin/edmRntupleStorage.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#include "TFile.h"
#include "ROOT/RNTupleReader.hxx"
#include "ROOT/RNTuple.hxx"
#include <iostream>
#include <sstream>
#include <boost/program_options.hpp>

namespace {

class InfoDump {
public:
InfoDump(std::string iOut) : dump_(std::move(iOut)) {}

std::optional<std::string_view> nextLine();
std::optional<std::pair<std::string_view, unsigned long long>> nextFieldInfo();
void moveToStartOfFields();
void moveToLineWith(std::string_view);

private:
std::string dump_;
std::string::size_type start_ = 0;
};

std::optional<std::string_view> InfoDump::nextLine() {
auto lastStart = start_;
start_ = dump_.find('\n', start_);
if (start_ == std::string::npos) {
return std::optional<std::string_view>();
}
return std::string_view(dump_.data() + lastStart, start_++ - lastStart);
}

void InfoDump::moveToStartOfFields() {
auto line = nextLine();
while (line) {
if (*line == "COLUMN DETAILS") {
nextLine();
return;
}
line = nextLine();
}
return;
}

std::optional<std::pair<std::string_view, unsigned long long>> InfoDump::nextFieldInfo() {
auto line = nextLine();
if (not line) {
return {};
}
auto name = line->substr(2, line->find_first_of(" .", 2) - 2);

nextLine();
nextLine();
nextLine();
nextLine();
line = nextLine();
//std::cout <<line->substr(line->find_first_not_of(" ",line->find_first_of(":")+1))<<std::endl;
auto size = std::atoll(line->substr(line->find_first_not_of(" ", line->find_first_of(":") + 1)).data());
moveToLineWith("............................................................");
return std::make_pair(name, size);
}

void InfoDump::moveToLineWith(std::string_view iCheck) {
auto line = nextLine();
while (line) {
if (*line == iCheck) {
return;
}
line = nextLine();
}
return;
}
} // namespace

int main(int iArgc, char const* iArgv[]) {
// Add options here

boost::program_options::options_description desc("Allowed options");
desc.add_options()("help,h", "print help message")(
"file,f", boost::program_options::value<std::string>(), "data file")("print,P", "Print list of data products")(
"verbose,v", "Verbose printout")("printProductDetails,p", "Call PrintInfo() for selected rntuple")(
"rntuple,r", boost::program_options::value<std::string>(), "Select rntuple used with -P and -p options")(
"sizeSummary,s", "Print size on disk for each data product")(
"events,e",
"Print list of all Events, Runs, and LuminosityBlocks in the file sorted by run number, luminosity block number, "
"and event number. Also prints the entry numbers and whether it is possible to use fast copy with the file.")(
"eventsInLumis", "Print how many Events are in each LuminosityBlock.");

// What rntuples do we require for this to be a valid collection?
std::vector<std::string> expectedRNTuples;
expectedRNTuples.push_back("MetaData");
expectedRNTuples.push_back("Events");

boost::program_options::positional_options_description p;
p.add("file", -1);

boost::program_options::variables_map vm;

try {
boost::program_options::store(
boost::program_options::command_line_parser(iArgc, iArgv).options(desc).positional(p).run(), vm);
} catch (boost::program_options::error const& x) {
std::cerr << "Option parsing failure:\n" << x.what() << "\n\n";
std::cerr << desc << "\n";
return 1;
}

boost::program_options::notify(vm);

if (vm.count("help")) {
std::cout << desc << "\n";
return 1;
}

using namespace ROOT::Experimental;

auto file = TFile::Open(vm["file"].as<std::string>().c_str(), "r");

auto ntuple = RNTupleReader::Open(*file->Get<ROOT::RNTuple>("Events"));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note here and everywhere else: The caller is responsible for deleteing the pointer returned by Get<ROOT::RNTuple> (because it's not inheriting from TObject and thus no automatic memory-management)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So could this be handled inside ROOT? One can have TFile::Get have different return types for different classes via

#include <memory>
#include <concepts>
struct RNTuple;

struct TFile {
    template<typename T>
    requires requires {requires !std::same_as<T, RNTuple>;}
    T* Get();

    template<typename T>
    requires requires {requires std::same_as<T, RNTuple>;}
    std::unique_ptr<T> Get();

};

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will discuss. Note however that TFile::Get always hands out memory ownership for classes that don't inherit from TObject and the caller is responsible for deleteing it to avoid memory leaks.

Also just to point out, you don't need to keep the ROOT::RNTuple anchor alive, a local std::unique_ptr<ROOT::RNTuple> anchor(file->Get<ROOT::RNTuple>("Events")); is sufficient.


if (vm.count("printProductDetails")) {
ntuple->PrintInfo(ENTupleInfo::kStorageDetails, std::cout);
return 0;
}
std::stringstream s;
ntuple->PrintInfo(ENTupleInfo::kStorageDetails, s);

InfoDump info{s.str()};

info.moveToStartOfFields();

std::string presentField;
unsigned long long size = 0;
auto field = info.nextFieldInfo();
while (field) {
if (field->first == presentField) {
size += field->second;
} else {
if (not presentField.empty()) {
std::cout << presentField << " " << size << std::endl;
}
presentField = field->first;
size = 0;
}
field = info.nextFieldInfo();
}
if (not presentField.empty()) {
std::cout << presentField << " " << size << std::endl;
}

return 0;
}
8 changes: 8 additions & 0 deletions FWIO/RNTuple/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<use name="DataFormats/Provenance"/>
<use name="FWCore/Framework"/>
<use name="FWCore/MessageLogger"/>
<use name="FWCore/ParameterSet"/>
<use name="FWCore/Utilities"/>
<use name="DataFormats/Common"/>
<use name="rootntuple"/>
<flags EDM_PLUGIN="1"/>
86 changes: 86 additions & 0 deletions FWIO/RNTuple/plugins/DataProductsRNTuple.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#include "DataProductsRNTuple.h"
#include "ROOT/RNTuple.hxx"
#include "IOPool/Common/interface/getWrapperBasePtr.h"
#include "FWCore/Utilities/interface/EDMException.h"

using namespace edm::input;
using namespace ROOT::Experimental;

namespace {
std::string fixName(std::string_view iName) {
if (not iName.empty() and iName.back() == '.') {
iName.remove_suffix(1);
}
return std::string(iName);
}

std::unique_ptr<ROOT::RNTuple> get_and_check_RNTuple(TFile* iFile, std::string const& iName) {
auto ret = std::unique_ptr<ROOT::RNTuple>(iFile->Get<ROOT::RNTuple>(iName.c_str()));
if (not ret) {
throw edm::Exception(edm::errors::FileReadError)
<< "The entry for '" << iName << "' does not exist or is not an RNTuple";
}
return ret;
}

} // namespace

DataProductsRNTuple::DataProductsRNTuple(TFile* iFile,
std::string const& iName,
std::string const& iAux,
ROOT::Experimental::RNTupleReadOptions const& iOps)
: reader_(RNTupleReader::Open(*get_and_check_RNTuple(iFile, iName), iOps)) {
auxDesc_ = reader_->GetDescriptor().FindFieldId(iAux);
}

bool DataProductsRNTuple::setupToReadProductIfAvailable(ProductDescription& iProduct) {
auto fixedName = fixName(iProduct.branchName());
auto desc = reader_->GetDescriptor().FindFieldId(fixedName);
if (desc == ROOT::Experimental::kInvalidDescriptorId) {
return false;
}
iProduct.initFromDictionary();
iProduct.setOnDemand(true);
infos_.emplace(iProduct.branchID().id(), ProductInfo(iProduct.wrappedName(), desc, std::move(fixedName)));
return true;
}

TClass const* DataProductsRNTuple::WrapperFactory::wrapperBase() {
static TClass const* const s_base = TClass::GetClass("edm::WrapperBase");
return s_base;
}

DataProductsRNTuple::WrapperFactory::WrapperFactory(std::string const& iTypeName)
: wrapperClass_(TClass::GetClass(iTypeName.c_str())) {
offsetToWrapperBase_ = wrapperClass_->GetBaseClassOffset(wrapperBase());
}

void DataProductsRNTuple::WrapperFactory::Deleter::operator()(void* iPtr) const { class_->Destructor(iPtr); }

std::unique_ptr<void, DataProductsRNTuple::WrapperFactory::Deleter> DataProductsRNTuple::WrapperFactory::newWrapper()
const {
return std::unique_ptr<void, Deleter>(wrapperClass_->New(), Deleter(wrapperClass_));
}

std::shared_ptr<edm::WrapperBase> DataProductsRNTuple::WrapperFactory::toWrapperBase(
std::unique_ptr<void, Deleter> iProduct) const {
return getWrapperBasePtr(iProduct.release(), offsetToWrapperBase_);
}

std::shared_ptr<edm::WrapperBase> DataProductsRNTuple::dataProduct(edm::BranchID const& iProduct, int iEntry) {
auto const& info = infos_.find(iProduct.id());
if (info == infos_.end()) {
throw cms::Exception("RNTupleError") << " unable to find branch id " << iProduct.id() << " for entry " << iEntry;
}

//std::cout <<"dataProduct "<<info->second.name_<<std::endl;
auto product = info->second.factory_.newWrapper();
if (not info->second.view_) {
info->second.view_ = reader_->GetView<void>(info->second.descriptor_, product.get());
} else {
info->second.view_->BindRawPtr(product.get());
}
(*info->second.view_)(iEntry);

return info->second.factory_.toWrapperBase(std::move(product));
}
78 changes: 78 additions & 0 deletions FWIO/RNTuple/plugins/DataProductsRNTuple.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#if !defined(DataProductsRNTuple_h)
#define DataProductsRNTuple_h

#include "DataFormats/Provenance/interface/ProductDescription.h"
#include "DataFormats/Common/interface/WrapperBase.h"
#include "FWCore/Utilities/interface/InputType.h"
#include "ROOT/RNTupleReader.hxx"
#include "ROOT/RNTupleReadOptions.hxx"
#include "TFile.h"
#include "TClass.h"
#include <string>
#include <memory>
#include <optional>

namespace edm::input {
class DataProductsRNTuple {
public:
DataProductsRNTuple(TFile*,
std::string const& iName,
std::string const& iAux,
ROOT::Experimental::RNTupleReadOptions const& iOpts);

bool setupToReadProductIfAvailable(ProductDescription&);

std::shared_ptr<edm::WrapperBase> dataProduct(edm::BranchID const&, int iEntry);

template <typename T>
ROOT::Experimental::RNTupleView<T> auxView(std::shared_ptr<T> oStorage) {
return reader_->GetView(auxDesc_, std::move(oStorage));
}

ROOT::Experimental::NTupleSize_t numberOfEntries() const { return reader_->GetNEntries(); }

ROOT::Experimental::DescriptorId_t findDescriptorID(std::string const& iFieldName) {
return reader_->GetDescriptor().FindFieldId(iFieldName);
}

template <typename T>
ROOT::Experimental::RNTupleView<T> viewFor(ROOT::Experimental::DescriptorId_t iID, std::shared_ptr<T> oStorage) {
return reader_->GetView(iID, std::move(oStorage));
}

void printInfo(std::ostream& iStream) { reader_->PrintInfo(ROOT::Experimental::ENTupleInfo::kMetrics, iStream); }

private:
struct WrapperFactory {
struct Deleter {
Deleter(TClass* iClass) : class_(iClass) {}
void operator()(void*) const;
TClass* class_;
};

WrapperFactory(std::string const& iTypeName);

std::unique_ptr<void, Deleter> newWrapper() const;
std::shared_ptr<edm::WrapperBase> toWrapperBase(std::unique_ptr<void, Deleter>) const;
TClass* wrapperClass_;
Int_t offsetToWrapperBase_;

static TClass const* wrapperBase();
};

struct ProductInfo {
ProductInfo(std::string const& iTypeName, ROOT::Experimental::DescriptorId_t iDesc, std::string iName)
: factory_(iTypeName), descriptor_(iDesc), name_(std::move(iName)) {}
WrapperFactory factory_;
ROOT::Experimental::DescriptorId_t descriptor_;
std::string name_;
std::optional<ROOT::Experimental::RNTupleView<void>> view_;
};

std::unique_ptr<ROOT::Experimental::RNTupleReader> reader_;
std::unordered_map<edm::BranchID::value_type, ProductInfo> infos_;
ROOT::Experimental::DescriptorId_t auxDesc_;
};
} // namespace edm::input

#endif
58 changes: 58 additions & 0 deletions FWIO/RNTuple/plugins/RNTupleDelayedReader.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include "RNTupleDelayedReader.h"
#include "DataProductsRNTuple.h"

#include "DataFormats/Common/interface/EDProductGetter.h"
#include "DataFormats/Common/interface/RefCoreStreamer.h"

#include "FWCore/Framework/interface/SharedResourcesAcquirer.h"
#include "FWCore/Framework/interface/SharedResourcesRegistry.h"

#include "IOPool/Common/interface/getWrapperBasePtr.h"

#include "FWCore/Utilities/interface/EDMException.h"

namespace edm::input {
RNTupleDelayedReader::RNTupleDelayedReader(DataProductsRNTuple* iRNTuple,
SharedResourcesAcquirer* iAcquirer,
std::recursive_mutex* iMutex)
: rntuple_(iRNTuple), resourceAcquirer_(iAcquirer), mutex_(iMutex) {}

std::pair<SharedResourcesAcquirer*, std::recursive_mutex*> RNTupleDelayedReader::sharedResources_() const {
return std::make_pair(resourceAcquirer_, mutex_);
}

std::shared_ptr<WrapperBase> RNTupleDelayedReader::getProduct_(BranchID const& k, EDProductGetter const* ep) {
if (lastException_) {
try {
std::rethrow_exception(lastException_);
} catch (edm::Exception const& e) {
//avoid growing the context each time the exception is rethrown.
auto copy = e;
copy.addContext("Rethrowing an exception that happened on a different read request.");
throw copy;
} catch (cms::Exception& e) {
//If we do anything here to 'copy', we would lose the actual type of the exception.
e.addContext("Rethrowing an exception that happened on a different read request.");
throw;
}
}

setRefCoreStreamer(ep);
//make code exception safe
std::shared_ptr<void> refCoreStreamerGuard(nullptr, [](void*) { setRefCoreStreamer(false); });

std::shared_ptr<WrapperBase> edp;
try {
edp = rntuple_->dataProduct(k, entry_);
} catch (...) {
lastException_ = std::current_exception();
std::rethrow_exception(lastException_);
}
//if (rntuple_->branchType() == InEvent) {
// CMS-THREADING For the primary input source calls to this function need to be serialized
//InputFile::reportReadBranch(inputType_, std::string(br->GetName()));
//}
Comment on lines +51 to +54
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this commented out code still useful?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will need to do such reporting in the future so keeping it is probably a good idea.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this as a future task


return edp;
}
} // namespace edm::input
Loading