From 903e928ba2da9f996afbd59185f4851d6db190ce Mon Sep 17 00:00:00 2001 From: "Brian C. Van Essen" Date: Tue, 26 Sep 2023 03:42:13 -0700 Subject: [PATCH] Added a check in the HDF5 data reader to check that the metadata for each field actually matches the dimensions of the data fields. Added a helper function for conduit to allow the calculation of a product of a data array's elements. --- include/lbann/utils/CMakeLists.txt | 1 + include/lbann/utils/conduit_extensions.hpp | 45 ++++++++++++++++++++++ src/data_readers/data_reader_HDF5.cpp | 19 ++++++++- 3 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 include/lbann/utils/conduit_extensions.hpp diff --git a/include/lbann/utils/CMakeLists.txt b/include/lbann/utils/CMakeLists.txt index f8a17ce773c..e0da391f278 100644 --- a/include/lbann/utils/CMakeLists.txt +++ b/include/lbann/utils/CMakeLists.txt @@ -31,6 +31,7 @@ set_full_path(THIS_DIR_HEADERS cloneable.hpp commify.hpp compiler_control.hpp + conduit_extensions.hpp dataset.hpp describable.hpp description.hpp diff --git a/include/lbann/utils/conduit_extensions.hpp b/include/lbann/utils/conduit_extensions.hpp new file mode 100644 index 00000000000..4be35f07917 --- /dev/null +++ b/include/lbann/utils/conduit_extensions.hpp @@ -0,0 +1,45 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory. +// Written by the LBANN Research Team (B. Van Essen, et al.) listed in +// the CONTRIBUTORS file. +// +// LLNL-CODE-697807. +// All rights reserved. +// +// This file is part of LBANN: Livermore Big Artificial Neural Network +// Toolkit. For details, see http://software.llnl.gov/LBANN or +// http://github.com/LBANN. +// +// Licensed under the Apache License, Version 2.0 (the "Licensee"); you +// may not use this file except in compliance with the License. You may +// obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the license. +//////////////////////////////////////////////////////////////////////////////// + +#include "conduit/conduit_data_array.hpp" + +namespace conduit { + +template +T +data_array_prod(DataArray a) +{ + T res = 1; + for(index_t i = 0; i < a.number_of_elements(); i++) + { + const T &val = a.element(i); + res *= val; + } + + return res; +} + +} // conduit diff --git a/src/data_readers/data_reader_HDF5.cpp b/src/data_readers/data_reader_HDF5.cpp index 14b16ce4c68..3710dfa1636 100644 --- a/src/data_readers/data_reader_HDF5.cpp +++ b/src/data_readers/data_reader_HDF5.cpp @@ -25,6 +25,7 @@ // ///////////////////////////////////////////////////////////////////////////////// #include "conduit/conduit_relay_mpi.hpp" +#include "lbann/utils/conduit_extensions.hpp" #include "lbann/data_readers/data_reader_HDF5.hpp" #include "lbann/data_readers/data_reader_sample_list_impl.hpp" @@ -357,7 +358,23 @@ void hdf5_data_reader::load_sample(conduit::Node& node, original_path, node[new_pathname]); } - + // Check that the dimensions of each node matches its metadata + if (metadata.has_child(HDF5_METADATA_KEY_DIMS)) { + int n_elts = node[pathname].dtype().number_of_elements(); + conduit::int64_array data_array_dims = metadata[HDF5_METADATA_KEY_DIMS].value(); + auto expected_n_elts = data_array_prod(data_array_dims); + + if (n_elts != expected_n_elts) { + LBANN_WARNING("Ingesting sample field ", + pathname, + " for sample ", + sample_name, + " where the dimensions in the metadata don't match the actual field: ", + expected_n_elts, + " != ", + n_elts); + } + } // check to see if there are integer types left in the sample and warn the // user auto dtype = node[new_pathname].dtype();