-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added RNTuple read/write prototype #47402
base: CMSSW_15_1_RNTUPLE_X
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
<bin name="edmRntupleStorage" file="edmRntupleStorage.cc"> | ||
<use name="rootntuple"/> | ||
<use name="boost_program_options"/> | ||
</bin> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
#include "TFile.h" | ||
#include "ROOT/RNTupleReader.hxx" | ||
#include "ROOT/RNTuple.hxx" | ||
#include <iostream> | ||
#include <sstream> | ||
#include <boost/program_options.hpp> | ||
|
||
namespace { | ||
|
||
class InfoDump { | ||
public: | ||
InfoDump(std::string iOut) : dump_(std::move(iOut)) {} | ||
|
||
std::optional<std::string_view> nextLine(); | ||
std::optional<std::pair<std::string_view, unsigned long long>> nextFieldInfo(); | ||
void moveToStartOfFields(); | ||
void moveToLineWith(std::string_view); | ||
|
||
private: | ||
std::string dump_; | ||
std::string::size_type start_ = 0; | ||
}; | ||
|
||
std::optional<std::string_view> InfoDump::nextLine() { | ||
auto lastStart = start_; | ||
start_ = dump_.find('\n', start_); | ||
if (start_ == std::string::npos) { | ||
return std::optional<std::string_view>(); | ||
} | ||
return std::string_view(dump_.data() + lastStart, start_++ - lastStart); | ||
} | ||
|
||
void InfoDump::moveToStartOfFields() { | ||
auto line = nextLine(); | ||
while (line) { | ||
if (*line == "COLUMN DETAILS") { | ||
nextLine(); | ||
return; | ||
} | ||
line = nextLine(); | ||
} | ||
return; | ||
} | ||
|
||
std::optional<std::pair<std::string_view, unsigned long long>> InfoDump::nextFieldInfo() { | ||
auto line = nextLine(); | ||
if (not line) { | ||
return {}; | ||
} | ||
auto name = line->substr(2, line->find_first_of(" .", 2) - 2); | ||
|
||
nextLine(); | ||
nextLine(); | ||
nextLine(); | ||
nextLine(); | ||
line = nextLine(); | ||
//std::cout <<line->substr(line->find_first_not_of(" ",line->find_first_of(":")+1))<<std::endl; | ||
auto size = std::atoll(line->substr(line->find_first_not_of(" ", line->find_first_of(":") + 1)).data()); | ||
moveToLineWith("............................................................"); | ||
return std::make_pair(name, size); | ||
} | ||
|
||
void InfoDump::moveToLineWith(std::string_view iCheck) { | ||
auto line = nextLine(); | ||
while (line) { | ||
if (*line == iCheck) { | ||
return; | ||
} | ||
line = nextLine(); | ||
} | ||
return; | ||
} | ||
} // namespace | ||
|
||
int main(int iArgc, char const* iArgv[]) { | ||
// Add options here | ||
|
||
boost::program_options::options_description desc("Allowed options"); | ||
desc.add_options()("help,h", "print help message")( | ||
"file,f", boost::program_options::value<std::string>(), "data file")("print,P", "Print list of data products")( | ||
"verbose,v", "Verbose printout")("printProductDetails,p", "Call PrintInfo() for selected rntuple")( | ||
"rntuple,r", boost::program_options::value<std::string>(), "Select rntuple used with -P and -p options")( | ||
"sizeSummary,s", "Print size on disk for each data product")( | ||
"events,e", | ||
"Print list of all Events, Runs, and LuminosityBlocks in the file sorted by run number, luminosity block number, " | ||
"and event number. Also prints the entry numbers and whether it is possible to use fast copy with the file.")( | ||
"eventsInLumis", "Print how many Events are in each LuminosityBlock."); | ||
|
||
// What rntuples do we require for this to be a valid collection? | ||
std::vector<std::string> expectedRNTuples; | ||
expectedRNTuples.push_back("MetaData"); | ||
expectedRNTuples.push_back("Events"); | ||
|
||
boost::program_options::positional_options_description p; | ||
p.add("file", -1); | ||
|
||
boost::program_options::variables_map vm; | ||
|
||
try { | ||
boost::program_options::store( | ||
boost::program_options::command_line_parser(iArgc, iArgv).options(desc).positional(p).run(), vm); | ||
} catch (boost::program_options::error const& x) { | ||
std::cerr << "Option parsing failure:\n" << x.what() << "\n\n"; | ||
std::cerr << desc << "\n"; | ||
return 1; | ||
} | ||
|
||
boost::program_options::notify(vm); | ||
|
||
if (vm.count("help")) { | ||
std::cout << desc << "\n"; | ||
return 1; | ||
} | ||
|
||
using namespace ROOT::Experimental; | ||
|
||
auto file = TFile::Open(vm["file"].as<std::string>().c_str(), "r"); | ||
|
||
auto ntuple = RNTupleReader::Open(*file->Get<ROOT::RNTuple>("Events")); | ||
|
||
if (vm.count("printProductDetails")) { | ||
ntuple->PrintInfo(ENTupleInfo::kStorageDetails, std::cout); | ||
return 0; | ||
} | ||
std::stringstream s; | ||
ntuple->PrintInfo(ENTupleInfo::kStorageDetails, s); | ||
|
||
InfoDump info{s.str()}; | ||
|
||
info.moveToStartOfFields(); | ||
|
||
std::string presentField; | ||
unsigned long long size = 0; | ||
auto field = info.nextFieldInfo(); | ||
while (field) { | ||
if (field->first == presentField) { | ||
size += field->second; | ||
} else { | ||
if (not presentField.empty()) { | ||
std::cout << presentField << " " << size << std::endl; | ||
} | ||
presentField = field->first; | ||
size = 0; | ||
} | ||
field = info.nextFieldInfo(); | ||
} | ||
if (not presentField.empty()) { | ||
std::cout << presentField << " " << size << std::endl; | ||
} | ||
|
||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
<use name="DataFormats/Provenance"/> | ||
<use name="FWCore/Framework"/> | ||
<use name="FWCore/MessageLogger"/> | ||
<use name="FWCore/ParameterSet"/> | ||
<use name="FWCore/Utilities"/> | ||
<use name="DataFormats/Common"/> | ||
<use name="rootntuple"/> | ||
<flags EDM_PLUGIN="1"/> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#include "DataProductsRNTuple.h" | ||
#include "ROOT/RNTuple.hxx" | ||
#include "IOPool/Common/interface/getWrapperBasePtr.h" | ||
#include "FWCore/Utilities/interface/EDMException.h" | ||
|
||
using namespace edm::input; | ||
using namespace ROOT::Experimental; | ||
|
||
namespace { | ||
std::string fixName(std::string_view iName) { | ||
if (not iName.empty() and iName.back() == '.') { | ||
iName.remove_suffix(1); | ||
} | ||
return std::string(iName); | ||
} | ||
|
||
std::unique_ptr<ROOT::RNTuple> get_and_check_RNTuple(TFile* iFile, std::string const& iName) { | ||
auto ret = std::unique_ptr<ROOT::RNTuple>(iFile->Get<ROOT::RNTuple>(iName.c_str())); | ||
if (not ret) { | ||
throw edm::Exception(edm::errors::FileReadError) | ||
<< "The entry for '" << iName << "' does not exist or is not an RNTuple"; | ||
} | ||
return ret; | ||
} | ||
|
||
} // namespace | ||
|
||
DataProductsRNTuple::DataProductsRNTuple(TFile* iFile, | ||
std::string const& iName, | ||
std::string const& iAux, | ||
ROOT::Experimental::RNTupleReadOptions const& iOps) | ||
: reader_(RNTupleReader::Open(*get_and_check_RNTuple(iFile, iName), iOps)) { | ||
auxDesc_ = reader_->GetDescriptor().FindFieldId(iAux); | ||
} | ||
|
||
bool DataProductsRNTuple::setupToReadProductIfAvailable(ProductDescription& iProduct) { | ||
auto fixedName = fixName(iProduct.branchName()); | ||
auto desc = reader_->GetDescriptor().FindFieldId(fixedName); | ||
if (desc == ROOT::Experimental::kInvalidDescriptorId) { | ||
return false; | ||
} | ||
iProduct.initFromDictionary(); | ||
iProduct.setOnDemand(true); | ||
infos_.emplace(iProduct.branchID().id(), ProductInfo(iProduct.wrappedName(), desc, std::move(fixedName))); | ||
return true; | ||
} | ||
|
||
TClass const* DataProductsRNTuple::WrapperFactory::wrapperBase() { | ||
static TClass const* const s_base = TClass::GetClass("edm::WrapperBase"); | ||
return s_base; | ||
} | ||
|
||
DataProductsRNTuple::WrapperFactory::WrapperFactory(std::string const& iTypeName) | ||
: wrapperClass_(TClass::GetClass(iTypeName.c_str())) { | ||
offsetToWrapperBase_ = wrapperClass_->GetBaseClassOffset(wrapperBase()); | ||
} | ||
|
||
void DataProductsRNTuple::WrapperFactory::Deleter::operator()(void* iPtr) const { class_->Destructor(iPtr); } | ||
|
||
std::unique_ptr<void, DataProductsRNTuple::WrapperFactory::Deleter> DataProductsRNTuple::WrapperFactory::newWrapper() | ||
const { | ||
return std::unique_ptr<void, Deleter>(wrapperClass_->New(), Deleter(wrapperClass_)); | ||
} | ||
|
||
std::shared_ptr<edm::WrapperBase> DataProductsRNTuple::WrapperFactory::toWrapperBase( | ||
std::unique_ptr<void, Deleter> iProduct) const { | ||
return getWrapperBasePtr(iProduct.release(), offsetToWrapperBase_); | ||
} | ||
|
||
std::shared_ptr<edm::WrapperBase> DataProductsRNTuple::dataProduct(edm::BranchID const& iProduct, int iEntry) { | ||
auto const& info = infos_.find(iProduct.id()); | ||
if (info == infos_.end()) { | ||
throw cms::Exception("RNTupleError") << " unable to find branch id " << iProduct.id() << " for entry " << iEntry; | ||
} | ||
|
||
//std::cout <<"dataProduct "<<info->second.name_<<std::endl; | ||
auto product = info->second.factory_.newWrapper(); | ||
if (not info->second.view_) { | ||
info->second.view_ = reader_->GetView<void>(info->second.descriptor_, product.get()); | ||
} else { | ||
info->second.view_->BindRawPtr(product.get()); | ||
} | ||
(*info->second.view_)(iEntry); | ||
|
||
return info->second.factory_.toWrapperBase(std::move(product)); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
#if !defined(DataProductsRNTuple_h) | ||
#define DataProductsRNTuple_h | ||
|
||
#include "DataFormats/Provenance/interface/ProductDescription.h" | ||
#include "DataFormats/Common/interface/WrapperBase.h" | ||
#include "FWCore/Utilities/interface/InputType.h" | ||
#include "ROOT/RNTupleReader.hxx" | ||
#include "ROOT/RNTupleReadOptions.hxx" | ||
#include "TFile.h" | ||
#include "TClass.h" | ||
#include <string> | ||
#include <memory> | ||
#include <optional> | ||
|
||
namespace edm::input { | ||
class DataProductsRNTuple { | ||
public: | ||
DataProductsRNTuple(TFile*, | ||
std::string const& iName, | ||
std::string const& iAux, | ||
ROOT::Experimental::RNTupleReadOptions const& iOpts); | ||
|
||
bool setupToReadProductIfAvailable(ProductDescription&); | ||
|
||
std::shared_ptr<edm::WrapperBase> dataProduct(edm::BranchID const&, int iEntry); | ||
|
||
template <typename T> | ||
ROOT::Experimental::RNTupleView<T> auxView(std::shared_ptr<T> oStorage) { | ||
return reader_->GetView(auxDesc_, std::move(oStorage)); | ||
} | ||
|
||
ROOT::Experimental::NTupleSize_t numberOfEntries() const { return reader_->GetNEntries(); } | ||
|
||
ROOT::Experimental::DescriptorId_t findDescriptorID(std::string const& iFieldName) { | ||
return reader_->GetDescriptor().FindFieldId(iFieldName); | ||
} | ||
|
||
template <typename T> | ||
ROOT::Experimental::RNTupleView<T> viewFor(ROOT::Experimental::DescriptorId_t iID, std::shared_ptr<T> oStorage) { | ||
return reader_->GetView(iID, std::move(oStorage)); | ||
} | ||
|
||
void printInfo(std::ostream& iStream) { reader_->PrintInfo(ROOT::Experimental::ENTupleInfo::kMetrics, iStream); } | ||
|
||
private: | ||
struct WrapperFactory { | ||
struct Deleter { | ||
Deleter(TClass* iClass) : class_(iClass) {} | ||
void operator()(void*) const; | ||
TClass* class_; | ||
}; | ||
|
||
WrapperFactory(std::string const& iTypeName); | ||
|
||
std::unique_ptr<void, Deleter> newWrapper() const; | ||
std::shared_ptr<edm::WrapperBase> toWrapperBase(std::unique_ptr<void, Deleter>) const; | ||
TClass* wrapperClass_; | ||
Int_t offsetToWrapperBase_; | ||
|
||
static TClass const* wrapperBase(); | ||
}; | ||
|
||
struct ProductInfo { | ||
ProductInfo(std::string const& iTypeName, ROOT::Experimental::DescriptorId_t iDesc, std::string iName) | ||
: factory_(iTypeName), descriptor_(iDesc), name_(std::move(iName)) {} | ||
WrapperFactory factory_; | ||
ROOT::Experimental::DescriptorId_t descriptor_; | ||
std::string name_; | ||
std::optional<ROOT::Experimental::RNTupleView<void>> view_; | ||
}; | ||
|
||
std::unique_ptr<ROOT::Experimental::RNTupleReader> reader_; | ||
std::unordered_map<edm::BranchID::value_type, ProductInfo> infos_; | ||
ROOT::Experimental::DescriptorId_t auxDesc_; | ||
}; | ||
} // namespace edm::input | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#include "RNTupleDelayedReader.h" | ||
#include "DataProductsRNTuple.h" | ||
|
||
#include "DataFormats/Common/interface/EDProductGetter.h" | ||
#include "DataFormats/Common/interface/RefCoreStreamer.h" | ||
|
||
#include "FWCore/Framework/interface/SharedResourcesAcquirer.h" | ||
#include "FWCore/Framework/interface/SharedResourcesRegistry.h" | ||
|
||
#include "IOPool/Common/interface/getWrapperBasePtr.h" | ||
|
||
#include "FWCore/Utilities/interface/EDMException.h" | ||
|
||
namespace edm::input { | ||
RNTupleDelayedReader::RNTupleDelayedReader(DataProductsRNTuple* iRNTuple, | ||
SharedResourcesAcquirer* iAcquirer, | ||
std::recursive_mutex* iMutex) | ||
: rntuple_(iRNTuple), resourceAcquirer_(iAcquirer), mutex_(iMutex) {} | ||
|
||
std::pair<SharedResourcesAcquirer*, std::recursive_mutex*> RNTupleDelayedReader::sharedResources_() const { | ||
return std::make_pair(resourceAcquirer_, mutex_); | ||
} | ||
|
||
std::shared_ptr<WrapperBase> RNTupleDelayedReader::getProduct_(BranchID const& k, EDProductGetter const* ep) { | ||
if (lastException_) { | ||
try { | ||
std::rethrow_exception(lastException_); | ||
} catch (edm::Exception const& e) { | ||
//avoid growing the context each time the exception is rethrown. | ||
auto copy = e; | ||
copy.addContext("Rethrowing an exception that happened on a different read request."); | ||
throw copy; | ||
} catch (cms::Exception& e) { | ||
//If we do anything here to 'copy', we would lose the actual type of the exception. | ||
e.addContext("Rethrowing an exception that happened on a different read request."); | ||
throw; | ||
} | ||
} | ||
|
||
setRefCoreStreamer(ep); | ||
//make code exception safe | ||
std::shared_ptr<void> refCoreStreamerGuard(nullptr, [](void*) { setRefCoreStreamer(false); }); | ||
|
||
std::shared_ptr<WrapperBase> edp; | ||
try { | ||
edp = rntuple_->dataProduct(k, entry_); | ||
} catch (...) { | ||
lastException_ = std::current_exception(); | ||
std::rethrow_exception(lastException_); | ||
} | ||
//if (rntuple_->branchType() == InEvent) { | ||
// CMS-THREADING For the primary input source calls to this function need to be serialized | ||
//InputFile::reportReadBranch(inputType_, std::string(br->GetName())); | ||
//} | ||
Comment on lines
+51
to
+54
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this commented out code still useful? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We will need to do such reporting in the future so keeping it is probably a good idea. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added this as a future task |
||
|
||
return edp; | ||
} | ||
} // namespace edm::input |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note here and everywhere else: The caller is responsible for
delete
ing the pointer returned byGet<ROOT::RNTuple>
(because it's not inheriting fromTObject
and thus no automatic memory-management)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So could this be handled inside ROOT? One can have
TFile::Get
have different return types for different classes viaThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We will discuss. Note however that
TFile::Get
always hands out memory ownership for classes that don't inherit fromTObject
and the caller is responsible fordelete
ing it to avoid memory leaks.Also just to point out, you don't need to keep the
ROOT::RNTuple
anchor alive, a localstd::unique_ptr<ROOT::RNTuple> anchor(file->Get<ROOT::RNTuple>("Events"));
is sufficient.