From e1344dcc3c8f2ad332550c7ed856d3f412686bf4 Mon Sep 17 00:00:00 2001 From: Aaron Lun Date: Mon, 15 Jan 2024 17:39:43 -0800 Subject: [PATCH] Purged global registries in favor of passing all functions in Options. (#35) This avoids issues where multiple applications are fiddling with the same global registries and causing problems for each other; now, every validate() call has its customized Options that cannot interfere with other calls. Unfortunately, this change requires us to make Options a non-const reference, in order to support applications that want to mutate state throughout the validate() call, e.g., to collect statistics, re-use connections, whatever. --- include/takane/_derived_from.hpp | 39 +++--- include/takane/_dimensions.hpp | 61 ++++----- include/takane/_height.hpp | 80 ++++++------ include/takane/_satisfies_interface.hpp | 55 ++++---- include/takane/_validate.hpp | 105 ++++++++------- include/takane/atomic_vector.hpp | 8 +- include/takane/atomic_vector_list.hpp | 8 +- include/takane/bam_file.hpp | 20 +-- include/takane/bcf_file.hpp | 20 +-- include/takane/bed_file.hpp | 21 +-- include/takane/bigbed_file.hpp | 20 +-- include/takane/bigwig_file.hpp | 20 +-- include/takane/bumpy_atomic_array.hpp | 12 +- include/takane/bumpy_data_frame_array.hpp | 12 +- include/takane/compressed_sparse_matrix.hpp | 12 +- include/takane/data_frame.hpp | 16 +-- include/takane/data_frame_factor.hpp | 32 ++--- include/takane/data_frame_list.hpp | 8 +- include/takane/delayed_array.hpp | 17 ++- include/takane/dense_array.hpp | 12 +- include/takane/fasta_file.hpp | 21 +-- include/takane/fastq_file.hpp | 21 +-- include/takane/genomic_ranges.hpp | 17 ++- include/takane/genomic_ranges_list.hpp | 8 +- include/takane/gff_file.hpp | 21 +-- include/takane/gmt_file.hpp | 20 +-- include/takane/multi_sample_dataset.hpp | 14 +- .../takane/ranged_summarized_experiment.hpp | 12 +- include/takane/sequence_information.hpp | 4 +- include/takane/sequence_string_set.hpp | 8 +- include/takane/simple_list.hpp | 10 +- include/takane/single_cell_experiment.hpp | 11 +- include/takane/spatial_experiment.hpp | 12 +- include/takane/string_factor.hpp | 8 +- include/takane/summarized_experiment.hpp | 24 ++-- include/takane/utils_bumpy_array.hpp | 18 +-- include/takane/utils_compressed_list.hpp | 16 +-- include/takane/utils_other.hpp | 14 +- include/takane/utils_public.hpp | 120 +++++++++++++++++- include/takane/vcf_experiment.hpp | 12 +- tests/src/bam_file.cpp | 18 +-- tests/src/bcf_file.cpp | 18 +-- tests/src/bed_file.cpp | 18 +-- tests/src/bigbed_file.cpp | 18 +-- tests/src/bigwig_file.cpp | 18 +-- tests/src/data_frame_factor.cpp | 7 +- tests/src/dispatch.cpp | 60 +++++---- tests/src/fasta_file.cpp | 18 +-- tests/src/fastq_file.cpp | 18 +-- tests/src/genomic_ranges.cpp | 7 +- tests/src/gff_file.cpp | 18 +-- tests/src/gmt_file.cpp | 18 +-- tests/src/utils.cpp | 12 ++ tests/src/utils.h | 5 + tests/src/utils_bumpy_array.cpp | 40 +++--- tests/src/utils_compressed_list.cpp | 16 ++- tests/src/utils_other.cpp | 12 +- 57 files changed, 650 insertions(+), 640 deletions(-) diff --git a/include/takane/_derived_from.hpp b/include/takane/_derived_from.hpp index 4c81008..b26c21b 100644 --- a/include/takane/_derived_from.hpp +++ b/include/takane/_derived_from.hpp @@ -5,9 +5,11 @@ #include #include +#include "utils_public.hpp" + /** * @file _derived_from.hpp - * @brief Registry of derived object types. + * @brief Check for derived object relationships. */ namespace takane { @@ -46,42 +48,41 @@ inline auto default_registry() { return registry; } +inline bool check(const std::string& type, const std::string& base, const std::unordered_map >& registry) { + auto it = registry.find(base); + if (it != registry.end()) { + const auto& listing = it->second; + return (listing.find(type) != listing.end()); + } + return false; +} + } /** * @endcond */ /** - * Registry of derived object types and their base types. - * Each key is the base object type and each value is the set of all of its derived types. + * Check whether a particular object type is derived from a base object type. * Derived types satisfy the same file requirements of the base type, but usually add more files to represent additional functionality. + * This can be used by specifications to check whether arbitrary objects satisfy the file structure expectations for a particular base type. * - * Applications can extend the **takane** framework by adding custom derived types to each set. + * Applications can add their own derived types for a given base class in `Options::custom_derived_from`. + * This extends the default relationships whereby `derived_from()` will take the union of all derived object types in the default and custom sets. * Note that derived types must be manually included in every base type's set, * e.g., if B is derived from A and C is derived from B, C must be added to the sets for both A and B. - */ -inline std::unordered_map > derived_from_registry = internal_derived_from::default_registry(); - -/** - * Check whether a particular object type is derived from a base objct type. - * This can be used by specifications to check that child components satisfy certain expectations. * * @param type Object type. * @param base Base object type. + * @param options Validation options, containing custom derived/base relationships. * @returns Whether `type` is derived from `base` or is equal to `base`. */ -inline bool derived_from(const std::string& type, const std::string& base) { +inline bool derived_from(const std::string& type, const std::string& base, const Options& options) { if (type == base) { return true; } - - auto it = derived_from_registry.find(base); - if (it == derived_from_registry.end()) { - return false; - } - - const auto& listing = it->second; - return (listing.find(type) != listing.end()); + static const auto derived_from_registry = internal_derived_from::default_registry(); + return internal_derived_from::check(type, base, derived_from_registry) || internal_derived_from::check(type, base, options.custom_derived_from); } } diff --git a/include/takane/_dimensions.hpp b/include/takane/_dimensions.hpp index 0139b56..3a9bee9 100644 --- a/include/takane/_dimensions.hpp +++ b/include/takane/_dimensions.hpp @@ -22,33 +22,29 @@ namespace takane { -/** - * Class to map object types to `dimensions()` functions. - */ -typedef std::unordered_map(const std::filesystem::path&, const ObjectMetadata&, const Options&)> > DimensionsRegistry; - /** * @cond */ namespace internal_dimensions { -inline DimensionsRegistry default_registry() { - DimensionsRegistry registry; +inline auto default_registry() { + std::unordered_map(const std::filesystem::path&, const ObjectMetadata&, Options& os)> > registry; + typedef std::vector Dims; - registry["data_frame"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector { return data_frame::dimensions(p, m, o); }; - registry["dense_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector { return dense_array::dimensions(p, m, o); }; - registry["compressed_sparse_matrix"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector { return compressed_sparse_matrix::dimensions(p, m, o); }; + registry["data_frame"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return data_frame::dimensions(p, m, o); }; + registry["dense_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return dense_array::dimensions(p, m, o); }; + registry["compressed_sparse_matrix"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return compressed_sparse_matrix::dimensions(p, m, o); }; // Subclasses of SE, so we just re-use the SE methods here. - registry["summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector { return summarized_experiment::dimensions(p, m, o); }; - registry["ranged_summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector { return summarized_experiment::dimensions(p, m, o); }; - registry["single_cell_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector { return summarized_experiment::dimensions(p, m, o); }; - registry["spatial_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector { return summarized_experiment::dimensions(p, m, o); }; + registry["summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return summarized_experiment::dimensions(p, m, o); }; + registry["ranged_summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return summarized_experiment::dimensions(p, m, o); }; + registry["single_cell_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return summarized_experiment::dimensions(p, m, o); }; + registry["spatial_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return summarized_experiment::dimensions(p, m, o); }; - registry["bumpy_atomic_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector { return bumpy_atomic_array::dimensions(p, m, o); }; - registry["bumpy_data_frame_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector { return bumpy_data_frame_array::dimensions(p, m, o); }; - registry["vcf_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector { return vcf_experiment::dimensions(p, m, o); }; - registry["delayed_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector { return delayed_array::dimensions(p, m, o); }; + registry["bumpy_atomic_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return bumpy_atomic_array::dimensions(p, m, o); }; + registry["bumpy_data_frame_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return bumpy_data_frame_array::dimensions(p, m, o); }; + registry["vcf_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return vcf_experiment::dimensions(p, m, o); }; + registry["delayed_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return delayed_array::dimensions(p, m, o); }; return registry; } @@ -58,27 +54,25 @@ inline DimensionsRegistry default_registry() { * @endcond */ -/** - * Registry of functions to be used by `dimensions()`. - * Applications can extend **takane** by adding new dimension functions for custom object types. - */ -inline DimensionsRegistry dimensions_registry = internal_dimensions::default_registry(); - /** * Get the dimensions of a multi-dimensional object in a subdirectory, based on the supplied object type. - * This searches the `dimensions_registry` to find a dimension function for the given type. + * + * Applications can supply custom dimension functions for a given type via `Options::custom_dimensions`. + * If available, the supplied custom function will be used instead of the default. * * @param path Path to a directory representing an object. * @param metadata Metadata for the object, typically determined from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * * @return Vector containing the object's dimensions. */ -inline std::vector dimensions(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { - if (!std::filesystem::exists(path) || std::filesystem::status(path).type() != std::filesystem::file_type::directory) { - throw std::runtime_error("expected '" + path.string() + "' to be a directory"); +inline std::vector dimensions(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { + auto cIt = options.custom_dimensions.find(metadata.type); + if (cIt != options.custom_dimensions.end()) { + return (cIt->second)(path, metadata, options); } + static const auto dimensions_registry = internal_dimensions::default_registry(); auto vrIt = dimensions_registry.find(metadata.type); if (vrIt == dimensions_registry.end()) { throw std::runtime_error("no registered 'dimensions' function for object type '" + metadata.type + "' at '" + path.string() + "'"); @@ -91,21 +85,22 @@ inline std::vector dimensions(const std::filesystem::path& path, const O * Get the dimensions of an object in a subdirectory, using its `OBJECT` file to automatically determine the type. * * @param path Path to a directory containing an object. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return The object's dimensions. */ -inline std::vector dimensions(const std::filesystem::path& path, const Options& options) { +inline std::vector dimensions(const std::filesystem::path& path, Options& options) { return dimensions(path, read_object_metadata(path), options); } /** - * Overload of `dimensions()` with default options. + * Overload of `dimensions()` with default settings. * * @param path Path to a directory containing an object. * @return The object's dimensions. */ inline std::vector dimensions(const std::filesystem::path& path) { - return dimensions(path, Options()); + Options options; + return dimensions(path, options); } } diff --git a/include/takane/_height.hpp b/include/takane/_height.hpp index eebcf48..4e18626 100644 --- a/include/takane/_height.hpp +++ b/include/takane/_height.hpp @@ -31,41 +31,36 @@ namespace takane { -/** - * Class to map object types to `height()` functions. - */ -typedef std::unordered_map > HeightRegistry; - /** * @cond */ namespace internal_height { -inline HeightRegistry default_registry() { - HeightRegistry registry; - registry["atomic_vector"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return atomic_vector::height(p, m, o); }; - registry["string_factor"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return string_factor::height(p, m, o); }; - registry["simple_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return simple_list::height(p, m, o); }; - registry["data_frame"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return data_frame::height(p, m, o); }; - registry["data_frame_factor"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return data_frame_factor::height(p, m, o); }; - registry["genomic_ranges"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return genomic_ranges::height(p, m, o); }; - registry["atomic_vector_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return atomic_vector_list::height(p, m, o); }; - registry["data_frame_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return data_frame_list::height(p, m, o); }; - registry["genomic_ranges_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return genomic_ranges_list::height(p, m, o); }; - registry["dense_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return dense_array::height(p, m, o); }; - registry["compressed_sparse_matrix"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return compressed_sparse_matrix::height(p, m, o); }; +inline auto default_registry() { + std::unordered_map > registry; + registry["atomic_vector"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return atomic_vector::height(p, m, o); }; + registry["string_factor"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return string_factor::height(p, m, o); }; + registry["simple_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return simple_list::height(p, m, o); }; + registry["data_frame"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return data_frame::height(p, m, o); }; + registry["data_frame_factor"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return data_frame_factor::height(p, m, o); }; + registry["genomic_ranges"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return genomic_ranges::height(p, m, o); }; + registry["atomic_vector_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return atomic_vector_list::height(p, m, o); }; + registry["data_frame_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return data_frame_list::height(p, m, o); }; + registry["genomic_ranges_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return genomic_ranges_list::height(p, m, o); }; + registry["dense_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return dense_array::height(p, m, o); }; + registry["compressed_sparse_matrix"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return compressed_sparse_matrix::height(p, m, o); }; // Subclasses of the SE, so we just re-use its methods here. - registry["summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return summarized_experiment::height(p, m, o); }; - registry["ranged_summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return summarized_experiment::height(p, m, o); }; - registry["single_cell_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return summarized_experiment::height(p, m, o); }; - registry["spatial_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return summarized_experiment::height(p, m, o); }; - - registry["sequence_string_set"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return sequence_string_set::height(p, m, o); }; - registry["bumpy_atomic_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return bumpy_atomic_array::height(p, m, o); }; - registry["bumpy_data_frame_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return bumpy_data_frame_array::height(p, m, o); }; - registry["vcf_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return vcf_experiment::height(p, m, o); }; - registry["delayed_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> size_t { return delayed_array::height(p, m, o); }; + registry["summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return summarized_experiment::height(p, m, o); }; + registry["ranged_summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return summarized_experiment::height(p, m, o); }; + registry["single_cell_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return summarized_experiment::height(p, m, o); }; + registry["spatial_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return summarized_experiment::height(p, m, o); }; + + registry["sequence_string_set"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return sequence_string_set::height(p, m, o); }; + registry["bumpy_atomic_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return bumpy_atomic_array::height(p, m, o); }; + registry["bumpy_data_frame_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return bumpy_data_frame_array::height(p, m, o); }; + registry["vcf_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return vcf_experiment::height(p, m, o); }; + registry["delayed_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> size_t { return delayed_array::height(p, m, o); }; return registry; } @@ -74,31 +69,29 @@ inline HeightRegistry default_registry() { * @endcond */ -/** - * Registry of functions to be used by `height()`. - * Applications can extend **takane** by adding new height functions for custom object types. - */ -inline HeightRegistry height_registry = internal_height::default_registry(); - /** * Get the height of an object in a subdirectory, based on the supplied object type. - * This searches the `height_registry` to find a height function for the given type. * * `height()` is used to check the shape of objects stored in vertical containers, e.g., columns of a `data_frame`. * For vectors or other 1-dimensional objects, the height is usually just the length of the object (for some object-specific definition of "length"). * For higher-dimensional objects, the height is usually the extent of the first dimension. * + * Applications can supply custom height functions for a given type via `Options::custom_height`. + * If available, the supplied custom function will be used instead of the default. + * * @param path Path to a directory representing an object. * @param metadata Metadata for the object, typically determined from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * * @return The object's height. */ -inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { - if (!std::filesystem::exists(path) || std::filesystem::status(path).type() != std::filesystem::file_type::directory) { - throw std::runtime_error("expected '" + path.string() + "' to be a directory"); +inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { + auto cIt = options.custom_height.find(metadata.type); + if (cIt != options.custom_height.end()) { + return (cIt->second)(path, metadata, options); } + static const auto height_registry = internal_height::default_registry(); auto vrIt = height_registry.find(metadata.type); if (vrIt == height_registry.end()) { throw std::runtime_error("no registered 'height' function for object type '" + metadata.type + "' at '" + path.string() + "'"); @@ -111,21 +104,22 @@ inline size_t height(const std::filesystem::path& path, const ObjectMetadata& me * Get the height of an object in a subdirectory, using its `OBJECT` file to automatically determine the type. * * @param path Path to a directory containing an object. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return The object's height. */ -inline size_t height(const std::filesystem::path& path, const Options& options) { +inline size_t height(const std::filesystem::path& path, Options& options) { return height(path, read_object_metadata(path), options); } /** - * Overload of `height()` with default options. + * Overload of `height()` with default settings. * * @param path Path to a directory containing an object. * @return The object's height. */ inline size_t height(const std::filesystem::path& path) { - return height(path, Options()); + Options options; + return height(path, options); } } diff --git a/include/takane/_satisfies_interface.hpp b/include/takane/_satisfies_interface.hpp index c8064eb..f0bc768 100644 --- a/include/takane/_satisfies_interface.hpp +++ b/include/takane/_satisfies_interface.hpp @@ -8,7 +8,7 @@ /** * @file _satisfies_interface.hpp - * @brief Registry of object interfaces. + * @brief Check if an object interface is satisfied. */ namespace takane { @@ -26,31 +26,9 @@ inline auto default_registry() { return registry; } -} -/** - * @endcond - */ - -/** - * Registry of object types that satisfy a particular object interface. - * Each key is the interface and each value is the set of all types that satisfy it. - * - * Applications can extend the **takane** framework by adding custom types to each set. - * Note that, if a type is included in a particular set, it is not necessary to add its derived types, as `satisfies_interface()` will automatically call `derived_from()`. - */ -inline std::unordered_map > satisfies_interface_registry = internal_satisfies_interface::default_registry(); - -/** - * Check whether a particular object type satisfies a particular object interface. - * This can be used by specifications to check that child components satisfy certain expectations. - * - * @param type Object type. - * @param interface Interface type. - * @returns Whether `type` satisfies `interface`. - */ -inline bool satisfies_interface(const std::string& type, const std::string& interface) { - auto it = satisfies_interface_registry.find(interface); - if (it == satisfies_interface_registry.end()) { +inline bool check(const std::string& type, const std::string& interface, const std::unordered_map >& registry, const Options& options) { + auto it = registry.find(interface); + if (it == registry.end()) { return false; } @@ -60,7 +38,7 @@ inline bool satisfies_interface(const std::string& type, const std::string& inte } for (const auto& d : listing) { - if (derived_from(type, d)) { + if (derived_from(type, d, options)) { return true; } } @@ -68,6 +46,29 @@ inline bool satisfies_interface(const std::string& type, const std::string& inte return false; } +} +/** + * @endcond + */ + +/** + * Check whether a particular object type satisfies a particular object interface. + * This can be used by specifications to check that child components satisfy certain user-level expectations for an abstract object (e.g., data frames, lists). + * + * Applications can extend the **takane** framework by adding custom types to `Options::custom_satisfies_interface`. + * This extends the default relationships whereby `satisfies_interface()` will take the union of all object types in the default and custom sets. + * Note that, if a type is included in a particular set, it is not necessary to add its derived types, as `satisfies_interface()` will automatically call `derived_from()`. + * + * @param type Object type. + * @param interface Interface type. + * @param options Validation options, containing custom object interface relationships. + * @returns Whether `type` satisfies `interface`. + */ +inline bool satisfies_interface(const std::string& type, const std::string& interface, const Options& options) { + static const auto satisfies_interface_registry = internal_satisfies_interface::default_registry(); + return internal_satisfies_interface::check(type, interface, satisfies_interface_registry, options) || internal_satisfies_interface::check(type, interface, options.custom_satisfies_interface, options); +} + } #endif diff --git a/include/takane/_validate.hpp b/include/takane/_validate.hpp index 575786d..4ca35c4 100644 --- a/include/takane/_validate.hpp +++ b/include/takane/_validate.hpp @@ -46,49 +46,44 @@ namespace takane { -/** - * Class to map object types to `validate()` functions. - */ -typedef std::unordered_map > ValidateRegistry; - /** * @cond */ namespace internal_validate { -inline ValidateRegistry default_registry() { - ValidateRegistry registry; - registry["atomic_vector"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { atomic_vector::validate(p, m, o); }; - registry["string_factor"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { string_factor::validate(p, m, o); }; - registry["simple_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { simple_list::validate(p, m, o); }; - registry["data_frame"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { data_frame::validate(p, m, o); }; - registry["data_frame_factor"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { data_frame_factor::validate(p, m, o); }; - registry["sequence_information"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { sequence_information::validate(p, m, o); }; - registry["genomic_ranges"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { genomic_ranges::validate(p, m, o); }; - registry["atomic_vector_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { atomic_vector_list::validate(p, m, o); }; - registry["data_frame_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { data_frame_list::validate(p, m, o); }; - registry["genomic_ranges_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { genomic_ranges_list::validate(p, m, o); }; - registry["dense_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { dense_array::validate(p, m, o); }; - registry["compressed_sparse_matrix"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { compressed_sparse_matrix::validate(p, m, o); }; - registry["summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { summarized_experiment::validate(p, m, o); }; - registry["ranged_summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { ranged_summarized_experiment::validate(p, m, o); }; - registry["single_cell_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { single_cell_experiment::validate(p, m, o); }; - registry["spatial_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { spatial_experiment::validate(p, m, o); }; - registry["multi_sample_dataset"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { multi_sample_dataset::validate(p, m, o); }; - registry["sequence_string_set"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { sequence_string_set::validate(p, m, o); }; - registry["bam_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { bam_file::validate(p, m, o); }; - registry["bcf_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { bcf_file::validate(p, m, o); }; - registry["bigwig_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { bigwig_file::validate(p, m, o); }; - registry["bigbed_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { bigbed_file::validate(p, m, o); }; - registry["fasta_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { fasta_file::validate(p, m, o); }; - registry["fastq_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { fastq_file::validate(p, m, o); }; - registry["bed_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { bed_file::validate(p, m, o); }; - registry["gmt_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { gmt_file::validate(p, m, o); }; - registry["gff_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { gff_file::validate(p, m, o); }; - registry["bumpy_atomic_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { bumpy_atomic_array::validate(p, m, o); }; - registry["bumpy_data_frame_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { bumpy_data_frame_array::validate(p, m, o); }; - registry["vcf_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { vcf_experiment::validate(p, m, o); }; - registry["delayed_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) { delayed_array::validate(p, m, o); }; +inline auto default_registry() { + std::unordered_map > registry; + registry["atomic_vector"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { atomic_vector::validate(p, m, o); }; + registry["string_factor"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { string_factor::validate(p, m, o); }; + registry["simple_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { simple_list::validate(p, m, o); }; + registry["data_frame"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { data_frame::validate(p, m, o); }; + registry["data_frame_factor"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { data_frame_factor::validate(p, m, o); }; + registry["sequence_information"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { sequence_information::validate(p, m, o); }; + registry["genomic_ranges"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { genomic_ranges::validate(p, m, o); }; + registry["atomic_vector_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { atomic_vector_list::validate(p, m, o); }; + registry["data_frame_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { data_frame_list::validate(p, m, o); }; + registry["genomic_ranges_list"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { genomic_ranges_list::validate(p, m, o); }; + registry["dense_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { dense_array::validate(p, m, o); }; + registry["compressed_sparse_matrix"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { compressed_sparse_matrix::validate(p, m, o); }; + registry["summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { summarized_experiment::validate(p, m, o); }; + registry["ranged_summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { ranged_summarized_experiment::validate(p, m, o); }; + registry["single_cell_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { single_cell_experiment::validate(p, m, o); }; + registry["spatial_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { spatial_experiment::validate(p, m, o); }; + registry["multi_sample_dataset"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { multi_sample_dataset::validate(p, m, o); }; + registry["sequence_string_set"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { sequence_string_set::validate(p, m, o); }; + registry["bam_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { bam_file::validate(p, m, o); }; + registry["bcf_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { bcf_file::validate(p, m, o); }; + registry["bigwig_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { bigwig_file::validate(p, m, o); }; + registry["bigbed_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { bigbed_file::validate(p, m, o); }; + registry["fasta_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { fasta_file::validate(p, m, o); }; + registry["fastq_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { fastq_file::validate(p, m, o); }; + registry["bed_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { bed_file::validate(p, m, o); }; + registry["gmt_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { gmt_file::validate(p, m, o); }; + registry["gff_file"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { gff_file::validate(p, m, o); }; + registry["bumpy_atomic_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { bumpy_atomic_array::validate(p, m, o); }; + registry["bumpy_data_frame_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { bumpy_data_frame_array::validate(p, m, o); }; + registry["vcf_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { vcf_experiment::validate(p, m, o); }; + registry["delayed_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) { delayed_array::validate(p, m, o); }; return registry; } @@ -97,25 +92,28 @@ inline ValidateRegistry default_registry() { * @endcond */ -/** - * Registry of functions to be used by `validate()`. - * Applications can extend **takane** by adding new validation functions for custom object types. - */ -inline ValidateRegistry validate_registry = internal_validate::default_registry(); - /** * Validate an object in a subdirectory, based on the supplied object type. - * This searches the `validate_registry` to find a validation function for the given type. + * + * Applications can supply custom validation functions for a given type via `Options::custom_validate`. + * If available, the supplied custom function will be used instead of the default. * * @param path Path to a directory representing an object. * @param metadata Metadata for the object, typically determined from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { - if (!std::filesystem::exists(path) || std::filesystem::status(path).type() != std::filesystem::file_type::directory) { - throw std::runtime_error("expected '" + path.string() + "' to be a directory"); +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { + auto cIt = options.custom_validate.find(metadata.type); + if (cIt != options.custom_validate.end()) { + try { + (cIt->second)(path, metadata, options); + } catch (std::exception& e) { + throw std::runtime_error("failed to validate '" + metadata.type + "' object at '" + path.string() + "'; " + std::string(e.what())); + } + return; } + static const auto validate_registry = internal_validate::default_registry(); auto vrIt = validate_registry.find(metadata.type); if (vrIt == validate_registry.end()) { throw std::runtime_error("no registered 'validate' function for object type '" + metadata.type + "' at '" + path.string() + "'"); @@ -132,19 +130,20 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me * Validate an object in a subdirectory, using its `OBJECT` file to automatically determine the type. * * @param path Path to a directory containing an object. - * @param options Validation options, mostly for input performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const Options& options) { +inline void validate(const std::filesystem::path& path, Options& options) { validate(path, read_object_metadata(path), options); } /** - * Overload of `validate()` with default options. + * Overload of `validate()` with default settings. * * @param path Path to a directory containing an object. */ inline void validate(const std::filesystem::path& path) { - validate(path, Options()); + Options options; + validate(path, options); } } diff --git a/include/takane/atomic_vector.hpp b/include/takane/atomic_vector.hpp index 2910b51..7fa7b3d 100644 --- a/include/takane/atomic_vector.hpp +++ b/include/takane/atomic_vector.hpp @@ -27,9 +27,9 @@ namespace atomic_vector { /** * @param path Path to the directory containing the atomic vector. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const auto& vstring = internal_json::extract_version_for_type(metadata.other, "atomic_vector"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -81,10 +81,10 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me /** * @param path Path to the directory containing the atomic vector. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. * @return Length of the vector. */ -inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { auto handle = ritsuko::hdf5::open_file(path / "contents.h5"); auto ghandle = handle.openGroup("atomic_vector"); auto dhandle = ghandle.openDataSet("values"); diff --git a/include/takane/atomic_vector_list.hpp b/include/takane/atomic_vector_list.hpp index fcad5e0..5a3a0e9 100644 --- a/include/takane/atomic_vector_list.hpp +++ b/include/takane/atomic_vector_list.hpp @@ -22,19 +22,19 @@ namespace atomic_vector_list { /** * @param path Path to the directory containing the atomic vector list. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { internal_compressed_list::validate_directory(path, "atomic_vector_list", "atomic_vector", metadata, options); } /** * @param path Path to a directory containing an atomic vector list. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return The length of the list. */ -inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { return internal_compressed_list::height(path, "atomic_vector_list", metadata, options); } diff --git a/include/takane/bam_file.hpp b/include/takane/bam_file.hpp index d4dae2b..012f339 100644 --- a/include/takane/bam_file.hpp +++ b/include/takane/bam_file.hpp @@ -23,22 +23,14 @@ namespace takane { namespace bam_file { /** - * Application-specific function to check the validity of a BAM file and its indices. + * If `Options::bam_file_strict_check` is provided, it is used to perform stricter checking of the BAM file contents and indices. + * By default, we don't look past the magic number to verify the files as this requires a dependency on heavy-duty libraries like, e.g., HTSlib. * - * This should accept a path to the directory containing the BAM file and indices, the object metadata, and additional reading options. - * It should throw an error if the BAM file is not valid, e.g., corrupted file, mismatched indices. - * - * If provided, this enables stricter checking of the BAM file contents and indices. - * Currently, we don't look past the magic number to verify the files as this requires a dependency on heavy-duty libraries like, e.g., HTSlib. - */ -inline std::function strict_check; - -/** * @param path Path to the directory containing the BAM file. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const std::string& vstring = internal_json::extract_version_for_type(metadata.other, "bam_file"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -64,8 +56,8 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me internal_files::check_signature(ixpath, "CSI\1", 4, "CSI index"); } - if (strict_check) { - strict_check(path, metadata, options); + if (options.bam_file_strict_check) { + options.bam_file_strict_check(path, metadata, options); } } diff --git a/include/takane/bcf_file.hpp b/include/takane/bcf_file.hpp index aa8410e..1607312 100644 --- a/include/takane/bcf_file.hpp +++ b/include/takane/bcf_file.hpp @@ -23,22 +23,14 @@ namespace takane { namespace bcf_file { /** - * Application-specific function to check the validity of a BCF file and its indices. + * If `Options::bcf_file_strict_check` is provided, it is used to perform stricter checking of the BCF file contents and indices. + * By default, we don't look past the magic number to verify the files as this requires a dependency on heavy-duty libraries like, e.g., HTSlib. * - * This should accept a path to the directory containing the BCF file and indices, the object metadata, and additional reading options. - * It should throw an error if the BCF file is not valid, e.g., corrupted file, mismatched indices. - * - * If provided, this enables stricter checking of the BCF file contents and indices. - * Currently, we don't look past the magic number to verify the files as this requires a dependency on heavy-duty libraries like, e.g., HTSlib. - */ -inline std::function strict_check; - -/** * @param path Path to the directory containing the BCF file. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const std::string& vstring = internal_json::extract_version_for_type(metadata.other, "bcf_file"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -67,8 +59,8 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me internal_files::check_signature(ixpath, "CSI\1", 4, "CSI index"); } - if (strict_check) { - strict_check(path, metadata, options); + if (options.bcf_file_strict_check) { + options.bcf_file_strict_check(path, metadata, options); } } diff --git a/include/takane/bed_file.hpp b/include/takane/bed_file.hpp index 36bcbfa..23d8c41 100644 --- a/include/takane/bed_file.hpp +++ b/include/takane/bed_file.hpp @@ -23,23 +23,14 @@ namespace takane { namespace bed_file { /** - * Application-specific function to check the validity of a BED file and its indices. - * - * This should accept a path to the directory containing the BED file, the object metadata, additional reading options, - * and a boolean indicating whether indices are expected to be present in the directory. - * It should throw an error if the BED file is not valid, e.g., corrupted file, mismatched indices. - * - * If provided, this enables stricter checking of the BED file contents and indices. + * If `Options::bed_file_strict_check` is provided, it is used to perform stricter checking of the BED file contents and indices. * Currently, we don't look past the magic number to verify the files as this requires a dependency on heavy-duty libraries like, e.g., HTSlib. - */ -inline std::function strict_check; - -/** + * * @param path Path to the directory containing the BED file. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { const auto& bedmap = internal_json::extract_typed_object_from_metadata(metadata.other, "bed_file"); const std::string& vstring = internal_json::extract_string_from_typed_object(bedmap, "version", "bed_file"); @@ -66,8 +57,8 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me internal_files::check_signature(ixpath, "TBI\1", 4, "tabix"); } - if (strict_check) { - strict_check(path, metadata, options, indexed); + if (options.bed_file_strict_check) { + options.bed_file_strict_check(path, metadata, options, indexed); } } diff --git a/include/takane/bigbed_file.hpp b/include/takane/bigbed_file.hpp index 949dca8..a0ebd41 100644 --- a/include/takane/bigbed_file.hpp +++ b/include/takane/bigbed_file.hpp @@ -23,22 +23,14 @@ namespace takane { namespace bigbed_file { /** - * Application-specific function to check the validity of a bigBed file. + * If `Options::bigbed_file_strict_check` is provided, it is used to perform stricter checking of the bigBed file contents. + * By default, we don't look past the magic number to verify the files as this requires a dependency on heavy-duty libraries like, e.g., HTSlib. * - * This should accept a path to the directory containing the bigBed file, the object metadata, and additional reading options. - * It should throw an error if the bigBed file is not valid, e.g., corrupted file, mismatched indices. - * - * If provided, this enables stricter checking of the bigBed file contents and indices. - * Currently, we don't look past the magic number to verify the files as this requires a dependency on heavy-duty libraries like, e.g., HTSlib. - */ -inline std::function strict_check; - -/** * @param path Path to the directory containing the bigBed file. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const std::string& vstring = internal_json::extract_version_for_type(metadata.other, "bigbed_file"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -57,8 +49,8 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me throw std::runtime_error("incorrect bigBed file signature for '" + ipath.string() + "'"); } - if (strict_check) { - strict_check(path, metadata, options); + if (options.bigbed_file_strict_check) { + options.bigbed_file_strict_check(path, metadata, options); } } diff --git a/include/takane/bigwig_file.hpp b/include/takane/bigwig_file.hpp index 31296c6..da1afa0 100644 --- a/include/takane/bigwig_file.hpp +++ b/include/takane/bigwig_file.hpp @@ -23,22 +23,14 @@ namespace takane { namespace bigwig_file { /** - * Application-specific function to check the validity of a bigWig file. + * If `Options::bigwig_file_strict_check` is provided, it is used to perform stricter checking of the bigWig file contents. + * By default, we don't look past the magic number to verify the files as this requires a dependency on heavy-duty libraries like, e.g., HTSlib. * - * This should accept a path to the directory containing the bigWig file, the object metadata, and additional reading options. - * It should throw an error if the bigWig file is not valid, e.g., corrupted file, mismatched indices. - * - * If provided, this enables stricter checking of the bigWig file contents and indices. - * Currently, we don't look past the magic number to verify the files as this requires a dependency on heavy-duty libraries like, e.g., HTSlib. - */ -inline std::function strict_check; - -/** * @param path Path to the directory containing the bigWig file. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const std::string& vstring = internal_json::extract_version_for_type(metadata.other, "bigwig_file"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -57,8 +49,8 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me throw std::runtime_error("incorrect bigWig file signature for '" + ipath.string() + "'"); } - if (strict_check) { - strict_check(path, metadata, options); + if (options.bigwig_file_strict_check) { + options.bigwig_file_strict_check(path, metadata, options); } } diff --git a/include/takane/bumpy_atomic_array.hpp b/include/takane/bumpy_atomic_array.hpp index db281fb..cb59d6e 100644 --- a/include/takane/bumpy_atomic_array.hpp +++ b/include/takane/bumpy_atomic_array.hpp @@ -22,29 +22,29 @@ namespace bumpy_atomic_array { /** * @param path Path to the directory containing the bumpy atomic array. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { internal_bumpy_array::validate_directory(path, "bumpy_atomic_array", "atomic_vector", metadata, options); } /** * @param path Path to a directory containing an bumpy atomic array. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return The height (i.e., first dimension extent) of the array. */ -inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { return internal_bumpy_array::height(path, "bumpy_atomic_array", metadata, options); } /** * @param path Path to a directory containing an bumpy atomic array. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return Vector containing the dimensions of the array. */ -inline std::vector dimensions(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline std::vector dimensions(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { return internal_bumpy_array::dimensions(path, "bumpy_atomic_array", metadata, options); } diff --git a/include/takane/bumpy_data_frame_array.hpp b/include/takane/bumpy_data_frame_array.hpp index cddefcd..3084edd 100644 --- a/include/takane/bumpy_data_frame_array.hpp +++ b/include/takane/bumpy_data_frame_array.hpp @@ -22,29 +22,29 @@ namespace bumpy_data_frame_array { /** * @param path Path to the directory containing the bumpy data frame array. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { internal_bumpy_array::validate_directory(path, "bumpy_data_frame_array", "DATA_FRAME", metadata, options); } /** * @param path Path to a directory containing an bumpy data frame array. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return The height (i.e., first dimension extent) of the array. */ -inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { return internal_bumpy_array::height(path, "bumpy_data_frame_array", metadata, options); } /** * @param path Path to a directory containing an bumpy data frame array. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return Vector containing the dimensions of the array. */ -inline std::vector dimensions(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline std::vector dimensions(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { return internal_bumpy_array::dimensions(path, "bumpy_data_frame_array", metadata, options); } diff --git a/include/takane/compressed_sparse_matrix.hpp b/include/takane/compressed_sparse_matrix.hpp index 669a11f..df86260 100644 --- a/include/takane/compressed_sparse_matrix.hpp +++ b/include/takane/compressed_sparse_matrix.hpp @@ -163,9 +163,9 @@ inline void validate_indices(const H5::Group& handle, const std::vector dimensions(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline std::vector dimensions(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { auto handle = ritsuko::hdf5::open_file(path / "matrix.h5"); auto ghandle = ritsuko::hdf5::open_group(handle, "compressed_sparse_matrix"); auto shandle = ritsuko::hdf5::open_dataset(ghandle, "shape"); diff --git a/include/takane/data_frame.hpp b/include/takane/data_frame.hpp index c5bb7b4..f0aad2f 100644 --- a/include/takane/data_frame.hpp +++ b/include/takane/data_frame.hpp @@ -28,8 +28,8 @@ namespace takane { /** * @cond */ -void validate(const std::filesystem::path&, const ObjectMetadata&, const Options&); -size_t height(const std::filesystem::path&, const ObjectMetadata&, const Options&); +void validate(const std::filesystem::path&, const ObjectMetadata&, Options& options); +size_t height(const std::filesystem::path&, const ObjectMetadata&, Options& options); /** * @endcond */ @@ -155,9 +155,9 @@ inline void validate_column(const H5::Group& dhandle, const std::string& dset_na /** * @param path Path to the directory containing the data frame. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const auto& vstring = internal_json::extract_version_for_type(metadata.other, "data_frame"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -225,10 +225,10 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me /** * @param path Path to a directory containing a data frame. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return The number of rows. */ -inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { // Assume it's all valid already. auto handle = ritsuko::hdf5::open_file(path / "basic_columns.h5"); auto ghandle = handle.openGroup("data_frame"); @@ -238,10 +238,10 @@ inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const O /** * @param path Path to a directory containing a data frame. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return A vector of length 2 containing the number of rows and columns in the data frame. */ -inline std::vector dimensions(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline std::vector dimensions(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { // Assume it's all valid already. auto handle = ritsuko::hdf5::open_file(path / "basic_columns.h5"); auto ghandle = handle.openGroup("data_frame"); diff --git a/include/takane/data_frame_factor.hpp b/include/takane/data_frame_factor.hpp index f041a64..636a4f7 100644 --- a/include/takane/data_frame_factor.hpp +++ b/include/takane/data_frame_factor.hpp @@ -23,9 +23,9 @@ namespace takane { /** * @cond */ -void validate(const std::filesystem::path&, const ObjectMetadata&, const Options&); -size_t height(const std::filesystem::path&, const ObjectMetadata&, const Options&); -bool satisfies_interface(const std::string&, const std::string&); +void validate(const std::filesystem::path&, const ObjectMetadata&, Options&); +size_t height(const std::filesystem::path&, const ObjectMetadata&, Options&); +bool satisfies_interface(const std::string&, const std::string&, const Options&); /** * @endcond */ @@ -37,22 +37,14 @@ bool satisfies_interface(const std::string&, const std::string&); namespace data_frame_factor { /** - * Application-specific function to determine whether there are duplicated rows in the data frame containing the levels of a `data_frame_factor`. - * - * This should accept a path to the directory containing the data frame, the object metadata, and additional reading options. - * It should return a boolean indicating whether any duplicate rows were found. - * - * If provided, this enables stricter checking of the uniqueness of the data frame levels. + * If `Options::data_frame_factor_any_duplicated` provided, it enables stricter checking of the uniqueness of the data frame levels. * Currently, we don't provide a default method for `data_frame` objects, as it's kind of tedious and we haven't gotten around to it yet. - */ -inline std::function any_duplicated; - -/** + * * @param path Path to the directory containing the data frame factor. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const auto& vstring = internal_json::extract_version_for_type(metadata.other, "data_frame_factor"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -62,7 +54,7 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me // Validating the levels. auto lpath = path / "levels"; auto lmeta = read_object_metadata(lpath); - if (!satisfies_interface(lmeta.type, "DATA_FRAME")) { + if (!satisfies_interface(lmeta.type, "DATA_FRAME", options)) { throw std::runtime_error("expected 'levels' to be an object that satifies the 'DATA_FRAME' interface"); } @@ -73,8 +65,8 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me } size_t num_levels = ::takane::height(lpath, lmeta, options); - if (any_duplicated) { - if (any_duplicated(lpath, lmeta, options)) { + if (options.data_frame_factor_any_duplicated) { + if (options.data_frame_factor_any_duplicated(lpath, lmeta, options)) { throw std::runtime_error("'levels' should not contain duplicated rows"); } } @@ -92,10 +84,10 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me /** * @param path Path to the directory containing the data frame factor. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. * @return Length of the factor. */ -inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { auto handle = ritsuko::hdf5::open_file(path / "contents.h5"); auto ghandle = handle.openGroup("data_frame_factor"); auto dhandle = ghandle.openDataSet("codes"); diff --git a/include/takane/data_frame_list.hpp b/include/takane/data_frame_list.hpp index 9b9fa79..0ab04f2 100644 --- a/include/takane/data_frame_list.hpp +++ b/include/takane/data_frame_list.hpp @@ -22,19 +22,19 @@ namespace data_frame_list { /** * @param path Path to the directory containing the data frame list. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { internal_compressed_list::validate_directory(path, "data_frame_list", "DATA_FRAME", metadata, options); } /** * @param path Path to a directory containing an data frame list. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return The length of the list. */ -inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { return internal_compressed_list::height(path, "data_frame_list", metadata, options); } diff --git a/include/takane/delayed_array.hpp b/include/takane/delayed_array.hpp index abb4994..ca1a8d7 100644 --- a/include/takane/delayed_array.hpp +++ b/include/takane/delayed_array.hpp @@ -24,9 +24,8 @@ namespace takane { /** * @cond */ -void validate(const std::filesystem::path&, const ObjectMetadata&, const Options&); -bool derived_from(const std::string&, const std::string&); -std::vector dimensions(const std::filesystem::path&, const ObjectMetadata&, const Options&); +void validate(const std::filesystem::path&, const ObjectMetadata&, Options&); +std::vector dimensions(const std::filesystem::path&, const ObjectMetadata&, Options&); /** * @endcond */ @@ -40,9 +39,9 @@ namespace delayed_array { /** * @param path Path to the directory containing a delayed array. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly related to reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { auto vstring = internal_json::extract_version_for_type(metadata.other, "delayed_array"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -104,10 +103,10 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me /** * @param path Path to the directory containing a delayed array. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly related to reading performance. + * @param options Validation options. * @return Extent of the first dimension. */ -inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { auto apath = path / "array.h5"; auto output = chihaya::validate(apath, "delayed_array"); return output.dimensions[0]; @@ -116,10 +115,10 @@ inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const O /** * @param path Path to the directory containing a delayed array. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly related to reading performance. + * @param options Validation options. * @return Dimensions of the array. */ -inline std::vector dimensions(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline std::vector dimensions(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { auto apath = path / "array.h5"; auto output = chihaya::validate(apath, "delayed_array"); return std::vector(output.dimensions.begin(), output.dimensions.end()); diff --git a/include/takane/dense_array.hpp b/include/takane/dense_array.hpp index 0cfd3de..92a21ec 100644 --- a/include/takane/dense_array.hpp +++ b/include/takane/dense_array.hpp @@ -140,9 +140,9 @@ inline void validate_string_contents(const H5::DataSet& dhandle, const std::vect /** * @param path Path to the directory containing a dense array. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly related to reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { auto vstring = internal_json::extract_version_for_type(metadata.other, "dense_array"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -197,10 +197,10 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me /** * @param path Path to the directory containing a dense array. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly related to reading performance. + * @param options Validation options. * @return Extent of the first dimension. */ -inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { auto handle = ritsuko::hdf5::open_file(path / "array.h5"); auto ghandle = ritsuko::hdf5::open_group(handle, "dense_array"); @@ -220,10 +220,10 @@ inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const O /** * @param path Path to the directory containing a dense array. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly related to reading performance. + * @param options Validation options. * @return Dimensions of the array. */ -inline std::vector dimensions(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline std::vector dimensions(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { auto handle = ritsuko::hdf5::open_file(path / "array.h5"); auto ghandle = ritsuko::hdf5::open_group(handle, "dense_array"); diff --git a/include/takane/fasta_file.hpp b/include/takane/fasta_file.hpp index 41706af..3a32156 100644 --- a/include/takane/fasta_file.hpp +++ b/include/takane/fasta_file.hpp @@ -22,23 +22,14 @@ namespace takane { namespace fasta_file { /** - * Application-specific function to check the validity of a FASTA file and its indices. + * If `Options::fasta_file_strict_check()` is provided, this enables stricter checking of the FASTA file contents and indices. + * By default, we just look at the first few bytes to verify the files. * - * This should accept a path to the directory containing the FASTA file, the object metadata, additional reading options, - * and a boolean indicating whether indices are expected to be present in the directory. - * It should throw an error if the FASTA file is not valid, e.g., corrupted file, mismatched indices. - * - * If provided, this enables stricter checking of the FASTA file contents and indices. - * Currently, we don't look past the magic number to verify the files as this requires a dependency on heavy-duty libraries like, e.g., HTSlib. - */ -inline std::function strict_check; - -/** * @param path Path to the directory containing the FASTA file. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const auto& famap = internal_json::extract_typed_object_from_metadata(metadata.other, "fasta_file"); const std::string& vstring = internal_json::extract_string_from_typed_object(famap, "version", "fasta_file"); @@ -78,8 +69,8 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me } } - if (strict_check) { - strict_check(path, metadata, options, indexed); + if (options.fasta_file_strict_check) { + options.fasta_file_strict_check(path, metadata, options, indexed); } } diff --git a/include/takane/fastq_file.hpp b/include/takane/fastq_file.hpp index 1de82d2..02e0533 100644 --- a/include/takane/fastq_file.hpp +++ b/include/takane/fastq_file.hpp @@ -22,23 +22,14 @@ namespace takane { namespace fastq_file { /** - * Application-specific function to check the validity of a FASTQ file and its indices. + * If `Options::fastq_file_strict_check` is provided, this enables stricter checking of the FASTQ file contents and indices. + * By default, we just look at the first few bytes to verify the files. * - * This should accept a path to the directory containing the FASTQ file, the object metadata, additional reading options, - * and a boolean indicating whether or not indices are expected to be present in the directory. - * It should throw an error if the FASTQ file is not valid, e.g., corrupted file, mismatched indices. - * - * If provided, this enables stricter checking of the FASTQ file contents and indices. - * Currently, we don't look past the magic number to verify the files as this requires a dependency on heavy-duty libraries like, e.g., HTSlib. - */ -inline std::function strict_check; - -/** * @param path Path to the directory containing the FASTQ file. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { const auto& fqmap = internal_json::extract_typed_object_from_metadata(metadata.other, "fastq_file"); const std::string& vstring = internal_json::extract_string_from_typed_object(fqmap, "version", "fastq_file"); @@ -104,8 +95,8 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me } } - if (strict_check) { - strict_check(path, metadata, options, indexed); + if (options.fastq_file_strict_check) { + options.fastq_file_strict_check(path, metadata, options, indexed); } } diff --git a/include/takane/genomic_ranges.hpp b/include/takane/genomic_ranges.hpp index 2325f8d..def2fe6 100644 --- a/include/takane/genomic_ranges.hpp +++ b/include/takane/genomic_ranges.hpp @@ -27,8 +27,8 @@ namespace takane { /** * @cond */ -void validate(const std::filesystem::path&, const ObjectMetadata&, const Options& options); -bool derived_from(const std::string&, const std::string&); +void validate(const std::filesystem::path&, const ObjectMetadata&, Options& options); +bool derived_from(const std::string&, const std::string&, const Options& options); /** * @endcond */ @@ -50,9 +50,9 @@ struct SequenceLimits { std::vector seqlen; }; -inline SequenceLimits find_sequence_limits(const std::filesystem::path& path, const Options& options) { +inline SequenceLimits find_sequence_limits(const std::filesystem::path& path, Options& options) { auto smeta = read_object_metadata(path); - if (!derived_from(smeta.type, "sequence_information")) { + if (!derived_from(smeta.type, "sequence_information", options)) { throw std::runtime_error("'sequence_information' directory should contain a 'sequence_information' object"); } ::takane::validate(path, smeta, options); @@ -90,9 +90,9 @@ inline SequenceLimits find_sequence_limits(const std::filesystem::path& path, co /** * @param path Path to the directory containing the genomic ranges. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const auto& vstring = internal_json::extract_version_for_type(metadata.other, "genomic_ranges"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -204,11 +204,10 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me /** * @param path Path to a directory containing genomic ranges. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return The number of ranges. */ -inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { - // Assume it's all valid already. +inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { auto handle = ritsuko::hdf5::open_file(path / "ranges.h5"); auto ghandle = handle.openGroup("genomic_ranges"); auto dhandle = ghandle.openDataSet("sequence"); diff --git a/include/takane/genomic_ranges_list.hpp b/include/takane/genomic_ranges_list.hpp index 0d611ab..3ad4c90 100644 --- a/include/takane/genomic_ranges_list.hpp +++ b/include/takane/genomic_ranges_list.hpp @@ -22,19 +22,19 @@ namespace genomic_ranges_list { /** * @param path Path to the directory containing the genomic ranges list. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { internal_compressed_list::validate_directory(path, "genomic_ranges_list", "genomic_ranges", metadata, options); } /** * @param path Path to a directory containing an genomic ranges list. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return The length of the list. */ -inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { return internal_compressed_list::height(path, "genomic_ranges_list", metadata, options); } diff --git a/include/takane/gff_file.hpp b/include/takane/gff_file.hpp index 211d33f..b690c24 100644 --- a/include/takane/gff_file.hpp +++ b/include/takane/gff_file.hpp @@ -22,23 +22,14 @@ namespace takane { namespace gff_file { /** - * Application-specific function to check the validity of a GFF file. + * If `Options::gff_file` is provided, this enables stricter checking of the GFF file contents. + * By default, we just look at the first few bytes to verify the files. * - * This should accept a path to the directory containing the GFF file, the object metadata, additional reading options. - * and a boolean indicating whether indices are expected to be present in the directory. - * It should throw an error if the GFF file is not valid, e.g., corrupted file, mismatched indices. - * - * If provided, this enables stricter checking of the GFF file contents. - * Currently, we don't look past the magic number to verify the files. - */ -inline std::function strict_check; - -/** * @param path Path to the directory containing the GFF file. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const auto& gffmap = internal_json::extract_typed_object_from_metadata(metadata.other, "gff_file"); const std::string& vstring = internal_json::extract_string_from_typed_object(gffmap, "version", "gff_file"); @@ -96,8 +87,8 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me internal_files::check_signature(ixpath, "TBI\1", 4, "tabix"); } - if (strict_check) { - strict_check(path, metadata, options, indexed); + if (options.gff_file_strict_check) { + options.gff_file_strict_check(path, metadata, options, indexed); } } diff --git a/include/takane/gmt_file.hpp b/include/takane/gmt_file.hpp index 4309c3a..2b5f75f 100644 --- a/include/takane/gmt_file.hpp +++ b/include/takane/gmt_file.hpp @@ -22,22 +22,14 @@ namespace takane { namespace gmt_file { /** - * Application-specific function to check the validity of a GMT file. + * If `Options::gmt_file` is provided, this enables stricter checking of the GMT file contents. + * By default, we just look at the first few bytes to verify the files. * - * This should accept a path to the directory containing the GMT file, the object metadata and additional reading options. - * It should throw an error if the GMT file is not valid, e.g., corrupted file, mismatched indices. - * - * If provided, this enables stricter checking of the GMT file contents. - * Currently, we don't look past the magic number to verify the files. - */ -inline std::function strict_check; - -/** * @param path Path to the directory containing the GMT file. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const std::string& vstring = internal_json::extract_version_for_type(metadata.other, "gmt_file"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -47,8 +39,8 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me auto fpath = path / "file.gmt.gz"; internal_files::check_gzip_signature(fpath); - if (strict_check) { - strict_check(path, metadata, options); + if (options.gmt_file_strict_check) { + options.gmt_file_strict_check(path, metadata, options); } } diff --git a/include/takane/multi_sample_dataset.hpp b/include/takane/multi_sample_dataset.hpp index 029d577..32ab532 100644 --- a/include/takane/multi_sample_dataset.hpp +++ b/include/takane/multi_sample_dataset.hpp @@ -20,9 +20,9 @@ namespace takane { /** * @cond */ -void validate(const std::filesystem::path&, const ObjectMetadata&, const Options& options); -size_t height(const std::filesystem::path&, const ObjectMetadata&, const Options& options); -bool satisfies_interface(const std::string&, const std::string&); +void validate(const std::filesystem::path&, const ObjectMetadata&, Options&); +size_t height(const std::filesystem::path&, const ObjectMetadata&, Options&); +bool satisfies_interface(const std::string&, const std::string&, const Options&); /** * @endcond */ @@ -36,9 +36,9 @@ namespace multi_sample_dataset { /** * @param path Path to the directory containing the multi-sample dataset. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const auto& dmap = internal_json::extract_typed_object_from_metadata(metadata.other, "multi_sample_dataset"); const std::string& vstring = internal_json::extract_string_from_typed_object(dmap, "version", "multi_sample_dataset"); @@ -50,7 +50,7 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me // Sample data should exist. auto sd_path = path / "sample_data"; auto sdmeta = read_object_metadata(sd_path); - if (!satisfies_interface(sdmeta.type, "DATA_FRAME")) { + if (!satisfies_interface(sdmeta.type, "DATA_FRAME", options)) { throw std::runtime_error("object in 'sample_data' should satisfy the 'DATA_FRAME' interface"); } try { @@ -72,7 +72,7 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me auto epath = edir / ename; auto emeta = read_object_metadata(epath); - if (!satisfies_interface(emeta.type, "SUMMARIZED_EXPERIMENT")) { + if (!satisfies_interface(emeta.type, "SUMMARIZED_EXPERIMENT", options)) { throw std::runtime_error("object in 'experiments/" + ename + "' should satisfy the 'SUMMARIZED_EXPERIMENT' interface"); } diff --git a/include/takane/ranged_summarized_experiment.hpp b/include/takane/ranged_summarized_experiment.hpp index 60df764..afb85d3 100644 --- a/include/takane/ranged_summarized_experiment.hpp +++ b/include/takane/ranged_summarized_experiment.hpp @@ -8,7 +8,6 @@ #include #include -#include #include /** @@ -21,8 +20,9 @@ namespace takane { /** * @cond */ -void validate(const std::filesystem::path&, const ObjectMetadata&, const Options& options); -size_t height(const std::filesystem::path&, const ObjectMetadata&, const Options& options); +void validate(const std::filesystem::path&, const ObjectMetadata&, Options& options); +size_t height(const std::filesystem::path&, const ObjectMetadata&, Options& options); +bool derived_from(const std::string&, const std::string&, const Options&); /** * @endcond */ @@ -36,9 +36,9 @@ namespace ranged_summarized_experiment { /** * @param path Path to the directory containing the ranged summarized experiment. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { ::takane::summarized_experiment::validate(path, metadata, options); const auto& rsemap = internal_json::extract_typed_object_from_metadata(metadata.other, "ranged_summarized_experiment"); @@ -52,7 +52,7 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me auto rangedir = path / "row_ranges"; if (std::filesystem::exists(rangedir)) { auto rangemeta = read_object_metadata(rangedir); - if (!derived_from(rangemeta.type, "genomic_ranges") && !derived_from(rangemeta.type, "genomic_ranges_list")) { + if (!derived_from(rangemeta.type, "genomic_ranges", options) && !derived_from(rangemeta.type, "genomic_ranges_list", options)) { throw std::runtime_error("object in 'row_ranges' must be a 'genomic_ranges' or 'genomic_ranges_list'"); } diff --git a/include/takane/sequence_information.hpp b/include/takane/sequence_information.hpp index f3b61a4..30bb2fc 100644 --- a/include/takane/sequence_information.hpp +++ b/include/takane/sequence_information.hpp @@ -28,9 +28,9 @@ namespace sequence_information { /** * @param path Path to the directory containing the data frame. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { auto vstring = internal_json::extract_version_for_type(metadata.other, "sequence_information"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { diff --git a/include/takane/sequence_string_set.hpp b/include/takane/sequence_string_set.hpp index b1b35fd..8bdabd2 100644 --- a/include/takane/sequence_string_set.hpp +++ b/include/takane/sequence_string_set.hpp @@ -209,9 +209,9 @@ size_t parse_names(const std::filesystem::path& path) { /** * @param path Path to a directory containing a sequence string set. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const auto& obj = internal_json::extract_typed_object_from_metadata(metadata.other, "sequence_string_set"); const auto& vstring = internal_json::extract_string_from_typed_object(obj, "version", "sequence_string_set"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); @@ -355,10 +355,10 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me /** * @param path Path to a directory containing a sequence string set. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return The number of sequences. */ -inline size_t height([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline size_t height([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { const auto& obj = internal_json::extract_typed_object_from_metadata(metadata.other, "sequence_string_set"); auto lIt = obj.find("length"); const auto& val = lIt->second; diff --git a/include/takane/simple_list.hpp b/include/takane/simple_list.hpp index 5b6475e..74e92de 100644 --- a/include/takane/simple_list.hpp +++ b/include/takane/simple_list.hpp @@ -21,7 +21,7 @@ namespace takane { /** * @cond */ -void validate(const std::filesystem::path&, const Options&); +void validate(const std::filesystem::path&, Options&); /** * @endcond */ @@ -57,9 +57,9 @@ inline std::string extract_format(const internal_json::JsonObjectMap& map) { /** * @param path Path to the directory containing the simple list. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const auto& metamap = internal_json::extract_typed_object_from_metadata(metadata.other, "simple_list"); const std::string& vstring = internal_json::extract_string_from_typed_object(metamap, "version", "simple_list"); @@ -110,10 +110,10 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me /** * @param path Path to the directory containing the simple list. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. * @return The number of list elements. */ -inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const auto& metamap = internal_json::extract_typed_object_from_metadata(metadata.other, "simple_list"); std::string format = internal::extract_format(metamap); diff --git a/include/takane/single_cell_experiment.hpp b/include/takane/single_cell_experiment.hpp index 18b4457..8c950be 100644 --- a/include/takane/single_cell_experiment.hpp +++ b/include/takane/single_cell_experiment.hpp @@ -22,8 +22,9 @@ namespace takane { /** * @cond */ -void validate(const std::filesystem::path&, const ObjectMetadata&, const Options& options); -std::vector dimensions(const std::filesystem::path&, const ObjectMetadata&, const Options& options); +void validate(const std::filesystem::path&, const ObjectMetadata&, Options& options); +std::vector dimensions(const std::filesystem::path&, const ObjectMetadata&, Options& options); +bool satisfies_interface(const std::string&, const std::string&, const Options&); /** * @endcond */ @@ -37,9 +38,9 @@ namespace single_cell_experiment { /** * @param path Path to the directory containing the single cell experiment. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { ::takane::ranged_summarized_experiment::validate(path, metadata, options); auto sedims = ::takane::summarized_experiment::dimensions(path, metadata, options); @@ -90,7 +91,7 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me auto aename = std::to_string(i); auto aepath = aedir / aename; auto aemeta = read_object_metadata(aepath); - if (!satisfies_interface(aemeta.type, "SUMMARIZED_EXPERIMENT")) { + if (!satisfies_interface(aemeta.type, "SUMMARIZED_EXPERIMENT", options)) { throw std::runtime_error("object in 'alternative_experiments/" + aename + "' should satisfy the 'SUMMARIZED_EXPERIMENT' interface"); } diff --git a/include/takane/spatial_experiment.hpp b/include/takane/spatial_experiment.hpp index 9743592..c5752c6 100644 --- a/include/takane/spatial_experiment.hpp +++ b/include/takane/spatial_experiment.hpp @@ -26,7 +26,7 @@ namespace takane { /** * @cond */ -bool derived_from(const std::string&, const std::string&); +bool derived_from(const std::string&, const std::string&, const Options& options); /** * @endcond */ @@ -42,10 +42,10 @@ namespace spatial_experiment { */ namespace internal { -inline void validate_coordinates(const std::filesystem::path& path, size_t ncols, const Options& options) { +inline void validate_coordinates(const std::filesystem::path& path, size_t ncols, Options& options) { auto coord_path = path / "coordinates"; auto coord_meta = read_object_metadata(coord_path); - if (!derived_from(coord_meta.type, "dense_array")) { + if (!derived_from(coord_meta.type, "dense_array", options)) { throw std::runtime_error("'coordinates' should be a dense array"); } @@ -101,7 +101,7 @@ inline void validate_image(const std::filesystem::path& path, size_t i, const st } } -inline void validate_images(const std::filesystem::path& path, size_t ncols, const Options& options) { +inline void validate_images(const std::filesystem::path& path, size_t ncols, Options& options) { auto image_dir = path / "images"; auto mappath = image_dir / "mapping.h5"; auto ihandle = ritsuko::hdf5::open_file(mappath); @@ -215,9 +215,9 @@ inline void validate_images(const std::filesystem::path& path, size_t ncols, con /** * @param path Path to the directory containing the spatial experiment. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { ::takane::single_cell_experiment::validate(path, metadata, options); const std::string& vstring = internal_json::extract_version_for_type(metadata.other, "spatial_experiment"); diff --git a/include/takane/string_factor.hpp b/include/takane/string_factor.hpp index 6fa6e8d..616da69 100644 --- a/include/takane/string_factor.hpp +++ b/include/takane/string_factor.hpp @@ -27,9 +27,9 @@ namespace string_factor { /** * @param path Path to the directory containing the string factor. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { auto vstring = internal_json::extract_version_for_type(metadata.other, "string_factor"); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -49,10 +49,10 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me /** * @param path Path to the directory containing the string factor. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. * @return Length of the factor. */ -inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { auto handle = ritsuko::hdf5::open_file(path / "contents.h5"); auto ghandle = handle.openGroup("string_factor"); auto dhandle = ghandle.openDataSet("codes"); diff --git a/include/takane/summarized_experiment.hpp b/include/takane/summarized_experiment.hpp index 4c90cb2..4392aba 100644 --- a/include/takane/summarized_experiment.hpp +++ b/include/takane/summarized_experiment.hpp @@ -10,7 +10,6 @@ #include #include -#include #include /** @@ -23,9 +22,10 @@ namespace takane { /** * @cond */ -void validate(const std::filesystem::path&, const ObjectMetadata&, const Options& options); -size_t height(const std::filesystem::path&, const ObjectMetadata&, const Options& options); -std::vector dimensions(const std::filesystem::path&, const ObjectMetadata&, const Options& options); +void validate(const std::filesystem::path&, const ObjectMetadata&, Options& options); +size_t height(const std::filesystem::path&, const ObjectMetadata&, Options& options); +std::vector dimensions(const std::filesystem::path&, const ObjectMetadata&, Options& options); +bool satisfies_interface(const std::string&, const std::string&, const Options&); /** * @endcond */ @@ -39,9 +39,9 @@ namespace summarized_experiment { /** * @param path Path to the directory containing the summarized experiment. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const auto& semap = internal_json::extract_typed_object_from_metadata(metadata.other, "summarized_experiment"); const std::string& vstring = internal_json::extract_string_from_typed_object(semap, "version", "summarized_experiment"); @@ -87,7 +87,7 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me auto rd_path = path / "row_data"; if (std::filesystem::exists(rd_path)) { auto rdmeta = read_object_metadata(rd_path); - if (!satisfies_interface(rdmeta.type, "DATA_FRAME")) { + if (!satisfies_interface(rdmeta.type, "DATA_FRAME", options)) { throw std::runtime_error("object in 'row_data' should satisfy the 'DATA_FRAME' interface"); } ::takane::validate(rd_path, rdmeta, options); @@ -99,7 +99,7 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me auto cd_path = path / "column_data"; if (std::filesystem::exists(cd_path)) { auto cdmeta = read_object_metadata(cd_path); - if (!satisfies_interface(cdmeta.type, "DATA_FRAME")) { + if (!satisfies_interface(cdmeta.type, "DATA_FRAME", options)) { throw std::runtime_error("object in 'column_data' should satisfy the 'DATA_FRAME' interface"); } ::takane::validate(cd_path, cdmeta, options); @@ -114,10 +114,10 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me /** * @param path Path to a directory containing a summarized experiment. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return Number of rows in the summarized experiment. */ -inline size_t height([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline size_t height([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { // Assume it's all valid, so we go straight for the kill. const auto& semap = internal_json::extract_object(metadata.other, "summarized_experiment"); auto dims = internal_summarized_experiment::extract_dimensions_json(semap, "summarized_experiment"); @@ -127,10 +127,10 @@ inline size_t height([[maybe_unused]] const std::filesystem::path& path, const O /** * @param path Path to a directory containing a summarized experiment. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return A vector of length 2 containing the dimensions of the summarized experiment. */ -inline std::vector dimensions([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline std::vector dimensions([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { // Assume it's all valid, so we go straight for the kill. const auto& semap = internal_json::extract_object(metadata.other, "summarized_experiment"); auto dims = internal_summarized_experiment::extract_dimensions_json(semap, "summarized_experiment"); diff --git a/include/takane/utils_bumpy_array.hpp b/include/takane/utils_bumpy_array.hpp index 232cde2..4b43a8a 100644 --- a/include/takane/utils_bumpy_array.hpp +++ b/include/takane/utils_bumpy_array.hpp @@ -19,10 +19,10 @@ namespace takane { -void validate(const std::filesystem::path&, const ObjectMetadata&, const Options&); -size_t height(const std::filesystem::path&, const ObjectMetadata&, const Options&); -bool satisfies_interface(const std::string&, const std::string&); -bool derived_from(const std::string&, const std::string&); +void validate(const std::filesystem::path&, const ObjectMetadata&, Options&); +size_t height(const std::filesystem::path&, const ObjectMetadata&, Options&); +bool satisfies_interface(const std::string&, const std::string&, const Options&); +bool derived_from(const std::string&, const std::string&, const Options&); namespace internal_bumpy_array { @@ -126,7 +126,7 @@ inline void validate_sparse_indices(const H5::Group& handle, const std::vector -void validate_directory(const std::filesystem::path& path, const std::string& object_type, const std::string& concatenated_type, const ObjectMetadata& metadata, const Options& options) try { +void validate_directory(const std::filesystem::path& path, const std::string& object_type, const std::string& concatenated_type, const ObjectMetadata& metadata, Options& options) try { auto vstring = internal_json::extract_version_for_type(metadata.other, object_type); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -136,11 +136,11 @@ void validate_directory(const std::filesystem::path& path, const std::string& ob auto catdir = path / "concatenated"; auto catmeta = read_object_metadata(catdir); if constexpr(satisfies_interface_) { - if (!satisfies_interface(catmeta.type, concatenated_type)) { + if (!satisfies_interface(catmeta.type, concatenated_type, options)) { throw std::runtime_error("'concatenated' should satisfy the '" + concatenated_type + "' interface"); } } else { - if (!derived_from(catmeta.type, concatenated_type)) { + if (!derived_from(catmeta.type, concatenated_type, options)) { throw std::runtime_error("'concatenated' should contain an '" + concatenated_type + "' object"); } } @@ -180,7 +180,7 @@ void validate_directory(const std::filesystem::path& path, const std::string& ob throw std::runtime_error("failed to validate a '" + object_type + "' object at '" + path.string() + "'; " + std::string(e.what())); } -inline size_t height(const std::filesystem::path& path, const std::string& name, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline size_t height(const std::filesystem::path& path, const std::string& name, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { auto handle = ritsuko::hdf5::open_file(path / "partitions.h5"); auto ghandle = handle.openGroup(name); auto dhandle = ghandle.openDataSet("dimensions"); @@ -189,7 +189,7 @@ inline size_t height(const std::filesystem::path& path, const std::string& name, return dims[0]; } -inline std::vector dimensions(const std::filesystem::path& path, const std::string& name, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline std::vector dimensions(const std::filesystem::path& path, const std::string& name, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { auto handle = ritsuko::hdf5::open_file(path / "partitions.h5"); auto ghandle = handle.openGroup(name); auto dhandle = ghandle.openDataSet("dimensions"); diff --git a/include/takane/utils_compressed_list.hpp b/include/takane/utils_compressed_list.hpp index f8c88ca..63cbb3b 100644 --- a/include/takane/utils_compressed_list.hpp +++ b/include/takane/utils_compressed_list.hpp @@ -18,10 +18,10 @@ namespace takane { -void validate(const std::filesystem::path&, const ObjectMetadata&, const Options&); -size_t height(const std::filesystem::path&, const ObjectMetadata&, const Options&); -bool satisfies_interface(const std::string&, const std::string&); -bool derived_from(const std::string&, const std::string&); +void validate(const std::filesystem::path&, const ObjectMetadata&, Options&); +size_t height(const std::filesystem::path&, const ObjectMetadata&, Options&); +bool satisfies_interface(const std::string&, const std::string&, const Options&); +bool derived_from(const std::string&, const std::string&, const Options&); namespace internal_compressed_list { @@ -45,7 +45,7 @@ inline hsize_t validate_group(const H5::Group& handle, size_t concatenated_lengt } template -void validate_directory(const std::filesystem::path& path, const std::string& object_type, const std::string& concatenated_type, const ObjectMetadata& metadata, const Options& options) try { +void validate_directory(const std::filesystem::path& path, const std::string& object_type, const std::string& concatenated_type, const ObjectMetadata& metadata, Options& options) try { auto vstring = internal_json::extract_version_for_type(metadata.other, object_type); auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true); if (version.major != 1) { @@ -55,11 +55,11 @@ void validate_directory(const std::filesystem::path& path, const std::string& ob auto catdir = path / "concatenated"; auto catmeta = read_object_metadata(catdir); if constexpr(satisfies_interface_) { - if (!satisfies_interface(catmeta.type, concatenated_type)) { + if (!satisfies_interface(catmeta.type, concatenated_type, options)) { throw std::runtime_error("'concatenated' should satisfy the '" + concatenated_type + "' interface"); } } else { - if (!derived_from(catmeta.type, concatenated_type)) { + if (!derived_from(catmeta.type, concatenated_type, options)) { throw std::runtime_error("'concatenated' should contain an '" + concatenated_type + "' object"); } } @@ -83,7 +83,7 @@ void validate_directory(const std::filesystem::path& path, const std::string& ob throw std::runtime_error("failed to validate an '" + object_type + "' object at '" + path.string() + "'; " + std::string(e.what())); } -inline size_t height(const std::filesystem::path& path, const std::string& name, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline size_t height(const std::filesystem::path& path, const std::string& name, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { auto handle = ritsuko::hdf5::open_file(path / "partitions.h5"); auto ghandle = handle.openGroup(name); auto dhandle = ghandle.openDataSet("lengths"); diff --git a/include/takane/utils_other.hpp b/include/takane/utils_other.hpp index 9984020..2925ede 100644 --- a/include/takane/utils_other.hpp +++ b/include/takane/utils_other.hpp @@ -11,9 +11,9 @@ namespace takane { /** * @cond */ -void validate(const std::filesystem::path&, const ObjectMetadata&, const Options&); -size_t height(const std::filesystem::path&, const ObjectMetadata&, const Options&); -bool satisfies_interface(const std::string&, const std::string&); +void validate(const std::filesystem::path&, const ObjectMetadata&, Options&); +size_t height(const std::filesystem::path&, const ObjectMetadata&, Options&); +bool satisfies_interface(const std::string&, const std::string&, const Options&); /** * @endcond */ @@ -29,14 +29,14 @@ Reader open_reader(const Path_& path, Args_&& ... args) { } } -inline void validate_mcols(const std::filesystem::path& parent, const std::string& name, size_t expected, const Options& options) try { +inline void validate_mcols(const std::filesystem::path& parent, const std::string& name, size_t expected, Options& options) try { auto path = parent / name; if (!std::filesystem::exists(path)) { return; } auto xmeta = read_object_metadata(path); - if (!satisfies_interface(xmeta.type, "DATA_FRAME")) { + if (!satisfies_interface(xmeta.type, "DATA_FRAME", options)) { throw std::runtime_error("expected an object that satisfies the 'DATA_FRAME' interface"); } ::takane::validate(path, xmeta, options); @@ -48,14 +48,14 @@ inline void validate_mcols(const std::filesystem::path& parent, const std::strin throw std::runtime_error("failed to validate '" + name + "'; " + std::string(e.what())); } -inline void validate_metadata(const std::filesystem::path& parent, const std::string& name, const Options& options) try { +inline void validate_metadata(const std::filesystem::path& parent, const std::string& name, Options& options) try { auto path = parent / name; if (!std::filesystem::exists(path)) { return; } auto xmeta = read_object_metadata(path); - if (!satisfies_interface(xmeta.type, "SIMPLE_LIST")) { + if (!satisfies_interface(xmeta.type, "SIMPLE_LIST", options)) { throw std::runtime_error("expected an object that satisfies the 'SIMPLE_LIST' interface'"); } ::takane::validate(path, xmeta, options); diff --git a/include/takane/utils_public.hpp b/include/takane/utils_public.hpp index 65353c3..5eb44c9 100644 --- a/include/takane/utils_public.hpp +++ b/include/takane/utils_public.hpp @@ -3,6 +3,8 @@ #include #include +#include +#include #include "H5Cpp.h" @@ -76,7 +78,14 @@ inline ObjectMetadata read_object_metadata(const std::filesystem::path& path) tr } /** - * @brief Validation options, mostly related to input performance. + * @brief Validation options. + * + * Collection of optional parameters to fine-tune the behavior of various **takane** functions. + * This can be configured by applications to, e.g., add more validation functions for custom types or to increase the strictness of some checks. + * + * Most **takane** functions will accept a non-`const` reference to an `Options` object. + * The lack of `const`-ness is intended to support custom functions that mutate some external variable, e.g., to collect statistics for certain object types. + * While unusual, it is permissible for a **takane** function to modify the supplied `Options`, as long as that modification is reversed upon exiting the function. */ struct Options { /** @@ -88,6 +97,115 @@ struct Options { * Buffer size to use when reading data from a HDF5 file. */ hsize_t hdf5_buffer_size = 10000; + +public: + /** + * Custom registry of functions to be used by `validate()`. + * If a type is specified here, the custom function replaces the default. + */ + std::unordered_map > custom_validate; + + /** + * Custom registry of functions to be used by `dimensions()`. + * If a type is specified here, the custom function replaces the default. + */ + std::unordered_map(const std::filesystem::path&, const ObjectMetadata&, Options&)> > custom_dimensions; + + /** + * Custom registry of functions to be used by `height()`. + * If a type is specified here, the custom function replaces the default. + */ + std::unordered_map > custom_height; + +public: + /** + * Custom registry of derived object types and their base types, to be used by `derived_from()`. + * Each key is the base object type and each value is the set of its derived types. + * If a type is specified here, the set of derived types is added to the the default set. + */ + std::unordered_map > custom_derived_from; + + /** + * Custom registry of object types that satisfy a particular object interface. + * Each key is the interface and each value is the set of all types that satisfy it. + * If a type is specified here, its set of types is added to the the default set. + */ + std::unordered_map > custom_satisfies_interface; + +public: + /** + * Application-specific function to check the validity of a BAM file and its indices in `bam_file::validate()`. + * This should accept a path to the directory containing the BAM file and indices, the object metadata, and additional reading options. + * It should throw an error if the BAM file is not valid, e.g., corrupted file, mismatched indices. + */ + std::function bam_file_strict_check; + + /** + * Application-specific function to check the validity of a BCF file and its indices in `bcf_file::validate()`. + * This should accept a path to the directory containing the BCF file and indices, the object metadata, and additional reading options. + * It should throw an error if the BCF file is not valid, e.g., corrupted file, mismatched indices. + */ + std::function bcf_file_strict_check; + + /** + * Application-specific function to check the validity of a BED file and its indices in `bed_file::validate()`. + * This should accept a path to the directory containing the BED file, the object metadata, additional reading options, + * and a boolean indicating whether indices are expected to be present in the directory. + * It should throw an error if the BED file is not valid, e.g., corrupted file, mismatched indices. + */ + std::function bed_file_strict_check; + + /** + * Application-specific function to check the validity of a bigBed file in `bigbed_file::validate()`. + * This should accept a path to the directory containing the bigBed file, the object metadata, and additional reading options. + * It should throw an error if the bigBed file is not valid, e.g., corrupted file, mismatched indices. + */ + std::function bigbed_file_strict_check; + + /** + * Application-specific function to check the validity of a bigWig file in `bigwig_file::validate()`. + * This should accept a path to the directory containing the bigWig file, the object metadata, and additional reading options. + * It should throw an error if the bigWig file is not valid, e.g., corrupted file, mismatched indices. + */ + std::function bigwig_file_strict_check; + + /** + * Application-specific function to determine whether there are duplicated rows in the data frame containing the levels of a data frame factor, to be used in `data_frame_factor::validate()` + * This should accept a path to the directory containing the data frame, the object metadata, and additional reading options. + * It should return a boolean indicating whether any duplicate rows were found. + */ + std::function data_frame_factor_any_duplicated; + + /** + * Application-specific function to check the validity of a FASTA file and its indices in `fasta_file::validate()`. + * This should accept a path to the directory containing the FASTA file, the object metadata, additional reading options, + * and a boolean indicating whether indices are expected to be present in the directory. + * It should throw an error if the FASTA file is not valid, e.g., corrupted file, mismatched indices. + */ + std::function fasta_file_strict_check; + + /** + * Application-specific function to check the validity of a FASTQ file and its indices in `fastq_file::validate()`. + * This should accept a path to the directory containing the FASTQ file, the object metadata, additional reading options, + * and a boolean indicating whether or not indices are expected to be present in the directory. + * It should throw an error if the FASTQ file is not valid, e.g., corrupted file, mismatched indices. + */ + std::function fastq_file_strict_check; + + /** + * Application-specific function to check the validity of a GFF file and its indices in `gff_file::validate()`. + * This should accept a path to the directory containing the GFF file, the object metadata, additional reading options. + * and a boolean indicating whether indices are expected to be present in the directory. + * It should throw an error if the GFF file is not valid, e.g., corrupted file, mismatched indices. + */ + std::function gff_file_strict_check; + + /** + * Application-specific function to check the validity of a GMT file and its indices in `gmt_file::validate()`. + * This should accept a path to the directory containing the GMT file, the object metadata and additional reading options. + * It should throw an error if the GMT file is not valid, e.g., corrupted file, mismatched indices. + */ + std::function gmt_file_strict_check; }; } diff --git a/include/takane/vcf_experiment.hpp b/include/takane/vcf_experiment.hpp index 7e656b7..91496fc 100644 --- a/include/takane/vcf_experiment.hpp +++ b/include/takane/vcf_experiment.hpp @@ -182,9 +182,9 @@ std::pair scan_vcf_dimensions(const std::filesystem::path& path, /** * @param path Path to the directory containing the VCF experiment. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, typically for reading performance. + * @param options Validation options. */ -inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) { +inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) { const auto& vcfmap = internal_json::extract_typed_object_from_metadata(metadata.other, "vcf_experiment"); const std::string& vstring = internal_json::extract_string_from_typed_object(vcfmap, "version", "vcf_experiment"); @@ -232,10 +232,10 @@ inline void validate(const std::filesystem::path& path, const ObjectMetadata& me /** * @param path Path to a directory containing a VCF experiment. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return Number of rows in the VCF experiment. */ -inline size_t height([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline size_t height([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { const auto& vcfmap = internal_json::extract_typed_object_from_metadata(metadata.other, "vcf_experiment"); auto dims = internal_summarized_experiment::extract_dimensions_json(vcfmap, "vcf_experiment"); return dims.first; @@ -244,10 +244,10 @@ inline size_t height([[maybe_unused]] const std::filesystem::path& path, const O /** * @param path Path to a directory containing a VCF experiment. * @param metadata Metadata for the object, typically read from its `OBJECT` file. - * @param options Validation options, mostly for input performance. + * @param options Validation options. * @return A vector of length 2 containing the dimensions of the VCF experiment. */ -inline std::vector dimensions([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] const Options& options) { +inline std::vector dimensions([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) { const auto& vcfmap = internal_json::extract_typed_object_from_metadata(metadata.other, "vcf_experiment"); auto dims = internal_summarized_experiment::extract_dimensions_json(vcfmap, "vcf_experiment"); return std::vector{ dims.first, dims.second }; diff --git a/tests/src/bam_file.cpp b/tests/src/bam_file.cpp index 15e63a6..7907994 100644 --- a/tests/src/bam_file.cpp +++ b/tests/src/bam_file.cpp @@ -18,15 +18,9 @@ struct BamFileTest : public ::testing::Test { std::filesystem::path dir; std::string name; - void expect_error(const std::string& msg) { - EXPECT_ANY_THROW({ - try { - test_validate(dir); - } catch (std::exception& e) { - EXPECT_THAT(e.what(), ::testing::HasSubstr(msg)); - throw; - } - }); + template + void expect_error(const std::string& msg, Args_&& ... args) { + expect_validation_error(dir, msg, std::forward(args)...); } }; @@ -86,7 +80,7 @@ TEST_F(BamFileTest, Strict) { handle.write("BAM\1"); } - takane::bam_file::strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) { throw std::runtime_error("ARGH"); }; - expect_error("ARGH"); - takane::bam_file::strict_check = nullptr; + takane::Options opts; + opts.bam_file_strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) { throw std::runtime_error("ARGH"); }; + expect_error("ARGH", opts); } diff --git a/tests/src/bcf_file.cpp b/tests/src/bcf_file.cpp index ba9c030..d7a66df 100644 --- a/tests/src/bcf_file.cpp +++ b/tests/src/bcf_file.cpp @@ -18,15 +18,9 @@ struct BcfFileTest : public ::testing::Test { std::filesystem::path dir; std::string name; - void expect_error(const std::string& msg) { - EXPECT_ANY_THROW({ - try { - test_validate(dir); - } catch (std::exception& e) { - EXPECT_THAT(e.what(), ::testing::HasSubstr(msg)); - throw; - } - }); + template + void expect_error(const std::string& msg, Args_&& ... args) { + expect_validation_error(dir, msg, std::forward(args)...); } }; @@ -86,7 +80,7 @@ TEST_F(BcfFileTest, Strict) { handle.write("BCF\2\1"); } - takane::bcf_file::strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) { throw std::runtime_error("ARGH"); }; - expect_error("ARGH"); - takane::bcf_file::strict_check = nullptr; + takane::Options opts; + opts.bcf_file_strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) { throw std::runtime_error("ARGH"); }; + expect_error("ARGH", opts); } diff --git a/tests/src/bed_file.cpp b/tests/src/bed_file.cpp index c1cdb35..831606c 100644 --- a/tests/src/bed_file.cpp +++ b/tests/src/bed_file.cpp @@ -18,15 +18,9 @@ struct BedFileTest : public ::testing::Test { std::filesystem::path dir; std::string name; - void expect_error(const std::string& msg) { - EXPECT_ANY_THROW({ - try { - test_validate(dir); - } catch (std::exception& e) { - EXPECT_THAT(e.what(), ::testing::HasSubstr(msg)); - throw; - } - }); + template + void expect_error(const std::string& msg, Args_&& ... args) { + expect_validation_error(dir, msg, std::forward(args)...); } }; @@ -80,7 +74,7 @@ TEST_F(BedFileTest, Strict) { fhandle.write("chr1\t1\t2\n"); } - takane::bed_file::strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&, bool) { throw std::runtime_error("ARGH"); }; - expect_error("ARGH"); - takane::bed_file::strict_check = nullptr; + takane::Options opts; + opts.bed_file_strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&, bool) { throw std::runtime_error("ARGH"); }; + expect_error("ARGH", opts); } diff --git a/tests/src/bigbed_file.cpp b/tests/src/bigbed_file.cpp index 9311398..70a63e9 100644 --- a/tests/src/bigbed_file.cpp +++ b/tests/src/bigbed_file.cpp @@ -18,15 +18,9 @@ struct BigBedFileTest : public ::testing::Test { std::filesystem::path dir; std::string name; - void expect_error(const std::string& msg) { - EXPECT_ANY_THROW({ - try { - test_validate(dir); - } catch (std::exception& e) { - EXPECT_THAT(e.what(), ::testing::HasSubstr(msg)); - throw; - } - }); + template + void expect_error(const std::string& msg, Args_&& ... args) { + expect_validation_error(dir, msg, std::forward(args)...); } }; @@ -65,7 +59,7 @@ TEST_F(BigBedFileTest, Strict) { handle.write(reinterpret_cast(&val), sizeof(val)); } - takane::bigbed_file::strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) { throw std::runtime_error("ARGH"); }; - expect_error("ARGH"); - takane::bigbed_file::strict_check = nullptr; + takane::Options opts; + opts.bigbed_file_strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) { throw std::runtime_error("ARGH"); }; + expect_error("ARGH", opts); } diff --git a/tests/src/bigwig_file.cpp b/tests/src/bigwig_file.cpp index 3431882..b2fe79d 100644 --- a/tests/src/bigwig_file.cpp +++ b/tests/src/bigwig_file.cpp @@ -18,15 +18,9 @@ struct BigWigFileTest : public ::testing::Test { std::filesystem::path dir; std::string name; - void expect_error(const std::string& msg) { - EXPECT_ANY_THROW({ - try { - test_validate(dir); - } catch (std::exception& e) { - EXPECT_THAT(e.what(), ::testing::HasSubstr(msg)); - throw; - } - }); + template + void expect_error(const std::string& msg, Args_&& ... args) { + expect_validation_error(dir, msg, std::forward(args)...); } }; @@ -65,7 +59,7 @@ TEST_F(BigWigFileTest, Strict) { handle.write(reinterpret_cast(&val), sizeof(val)); } - takane::bigwig_file::strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) { throw std::runtime_error("ARGH"); }; - expect_error("ARGH"); - takane::bigwig_file::strict_check = nullptr; + takane::Options opts; + opts.bigwig_file_strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) { throw std::runtime_error("ARGH"); }; + expect_error("ARGH", opts); } diff --git a/tests/src/data_frame_factor.cpp b/tests/src/data_frame_factor.cpp index 675aa80..f8e3f1c 100644 --- a/tests/src/data_frame_factor.cpp +++ b/tests/src/data_frame_factor.cpp @@ -62,11 +62,10 @@ TEST_F(DataFrameFactorTest, Levels) { initialize_directory_simple(ldir, "data_frame", "1.0"); expect_error("failed to validate 'levels'"); - takane::data_frame_factor::any_duplicated = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) -> bool { return true; }; + takane::Options opts; + opts.data_frame_factor_any_duplicated = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) -> bool { return true; }; data_frame::mock(ldir, 5, {}); - expect_error("duplicated rows"); - - takane::data_frame_factor::any_duplicated = nullptr; + expect_error("duplicated rows", opts); } TEST_F(DataFrameFactorTest, Codes) { diff --git a/tests/src/dispatch.cpp b/tests/src/dispatch.cpp index bcc81d3..e1bf206 100644 --- a/tests/src/dispatch.cpp +++ b/tests/src/dispatch.cpp @@ -7,13 +7,17 @@ #include TEST(GenericDispatch, Validate) { + takane::Options opts; + std::filesystem::path dir = "TEST_dispatcher"; initialize_directory_simple(dir, "foobar", "1.0"); - expect_validation_error(dir, "no registered 'validate' function"); + expect_validation_error(dir, "no registered 'validate' function", opts); + + opts.custom_validate["foobar"] = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) -> void {}; + test_validate(dir, opts); - takane::validate_registry["foobar"] = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) -> void {}; - test_validate(dir); - takane::validate_registry.erase("foobar"); + opts.custom_validate["foobar"] = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) -> void { throw std::runtime_error("YAY"); }; + expect_validation_error(dir, "YAY", opts); } template @@ -29,13 +33,14 @@ void expect_height_error(const std::filesystem::path& dir, const std::string& ms } TEST(GenericDispatch, Height) { + takane::Options opts; + std::filesystem::path dir = "TEST_dispatcher"; initialize_directory_simple(dir, "foobar", "1.0"); - expect_height_error(dir, "no registered 'height' function"); + expect_height_error(dir, "no registered 'height' function", opts); - takane::height_registry["foobar"] = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) -> size_t { return 11; }; - EXPECT_EQ(test_height(dir), 11); - takane::height_registry.erase("foobar"); + opts.custom_height["foobar"] = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) -> size_t { return 11; }; + EXPECT_EQ(test_height(dir, opts), 11); } template @@ -51,33 +56,36 @@ void expect_dimensions_error(const std::filesystem::path& dir, const std::string } TEST(GenericDispatch, Dimensions) { + takane::Options opts; + std::filesystem::path dir = "TEST_dispatcher"; initialize_directory_simple(dir, "foobar", "1.0"); - expect_dimensions_error(dir, "no registered 'dimensions' function"); + expect_dimensions_error(dir, "no registered 'dimensions' function", opts); std::vector expected { 11, 20 }; - takane::dimensions_registry["foobar"] = [&](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) -> std::vector { return expected; }; - EXPECT_EQ(test_dimensions(dir), expected); - takane::dimensions_registry.erase("foobar"); + opts.custom_dimensions["foobar"] = [&](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) -> std::vector { return expected; }; + EXPECT_EQ(test_dimensions(dir, opts), expected); } TEST(GenericDispatch, SatisfiesInterface) { - EXPECT_FALSE(takane::satisfies_interface("foo", "FOO")); - takane::satisfies_interface_registry["FOO"] = std::unordered_set{ "foo" }; - EXPECT_TRUE(takane::satisfies_interface("foo", "FOO")); - takane::satisfies_interface_registry.erase("FOO"); + takane::Options opts; + EXPECT_TRUE(takane::satisfies_interface("summarized_experiment", "SUMMARIZED_EXPERIMENT", opts)); + EXPECT_TRUE(takane::satisfies_interface("single_cell_experiment", "SUMMARIZED_EXPERIMENT", opts)); - EXPECT_TRUE(takane::satisfies_interface("summarized_experiment", "SUMMARIZED_EXPERIMENT")); - EXPECT_TRUE(takane::satisfies_interface("single_cell_experiment", "SUMMARIZED_EXPERIMENT")); + EXPECT_FALSE(takane::satisfies_interface("foo", "FOO", opts)); + opts.custom_satisfies_interface["FOO"] = std::unordered_set{ "foo" }; + EXPECT_TRUE(takane::satisfies_interface("foo", "FOO", opts)); } TEST(GenericDispatch, DerivedFrom) { - EXPECT_TRUE(takane::derived_from("summarized_experiment", "summarized_experiment")); - EXPECT_TRUE(takane::derived_from("ranged_summarized_experiment", "summarized_experiment")); - EXPECT_TRUE(takane::derived_from("single_cell_experiment", "summarized_experiment")); - EXPECT_FALSE(takane::derived_from("vcf_experiment", "summarized_experiment")); - - takane::derived_from_registry["FOO"] = std::unordered_set{ "foo" }; - EXPECT_TRUE(takane::derived_from("foo", "FOO")); - takane::derived_from_registry.erase("FOO"); + takane::Options opts; + + EXPECT_TRUE(takane::derived_from("summarized_experiment", "summarized_experiment", opts)); + EXPECT_TRUE(takane::derived_from("ranged_summarized_experiment", "summarized_experiment", opts)); + EXPECT_TRUE(takane::derived_from("single_cell_experiment", "summarized_experiment", opts)); + EXPECT_FALSE(takane::derived_from("vcf_experiment", "summarized_experiment", opts)); + + EXPECT_FALSE(takane::derived_from("foo", "FOO", opts)); + opts.custom_derived_from["FOO"] = std::unordered_set{ "foo" }; + EXPECT_TRUE(takane::derived_from("foo", "FOO", opts)); } diff --git a/tests/src/fasta_file.cpp b/tests/src/fasta_file.cpp index b5d1f66..de7da12 100644 --- a/tests/src/fasta_file.cpp +++ b/tests/src/fasta_file.cpp @@ -18,15 +18,9 @@ struct FastaFileTest : public ::testing::Test { std::filesystem::path dir; std::string name; - void expect_error(const std::string& msg) { - EXPECT_ANY_THROW({ - try { - test_validate(dir); - } catch (std::exception& e) { - EXPECT_THAT(e.what(), ::testing::HasSubstr(msg)); - throw; - } - }); + template + void expect_error(const std::string& msg, Args_&& ... args) { + expect_validation_error(dir, msg, std::forward(args)...); } }; @@ -115,7 +109,7 @@ TEST_F(FastaFileTest, Strict) { fhandle.write(">asdasd\nACGT\n"); } - takane::fasta_file::strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&, bool) { throw std::runtime_error("ARGH"); }; - expect_error("ARGH"); - takane::fasta_file::strict_check = nullptr; + takane::Options opts; + opts.fasta_file_strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&, bool) { throw std::runtime_error("ARGH"); }; + expect_error("ARGH", opts); } diff --git a/tests/src/fastq_file.cpp b/tests/src/fastq_file.cpp index e549b05..5e945e5 100644 --- a/tests/src/fastq_file.cpp +++ b/tests/src/fastq_file.cpp @@ -18,15 +18,9 @@ struct FastqFileTest : public ::testing::Test { std::filesystem::path dir; std::string name; - void expect_error(const std::string& msg) { - EXPECT_ANY_THROW({ - try { - test_validate(dir); - } catch (std::exception& e) { - EXPECT_THAT(e.what(), ::testing::HasSubstr(msg)); - throw; - } - }); + template + void expect_error(const std::string& msg, Args_&& ... args) { + expect_validation_error(dir, msg, std::forward(args)...); } }; @@ -146,7 +140,7 @@ TEST_F(FastqFileTest, Strict) { fhandle.write("@asdasd\nACGT\n+\n!!!!\n"); } - takane::fastq_file::strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&, bool) { throw std::runtime_error("ARGH"); }; - expect_error("ARGH"); - takane::fastq_file::strict_check = nullptr; + takane::Options opts; + opts.fastq_file_strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&, bool) { throw std::runtime_error("ARGH"); }; + expect_error("ARGH", opts); } diff --git a/tests/src/genomic_ranges.cpp b/tests/src/genomic_ranges.cpp index 0afca8c..172e75e 100644 --- a/tests/src/genomic_ranges.cpp +++ b/tests/src/genomic_ranges.cpp @@ -40,6 +40,7 @@ struct GenomicRangesTest : public ::testing::Test { TEST_F(GenomicRangesTest, SeqInfoRetrieval) { auto sidir = dir / "sequence_information"; + takane::Options opts; { initialize_directory_simple(dir, "genomic_ranges", "1.0"); @@ -49,7 +50,7 @@ TEST_F(GenomicRangesTest, SeqInfoRetrieval) { sequence_information::mock(ghandle, { "chrA", "chrB" }, { 100, 20 }, { 1, 0 }, { "mm10", "mm10 "}); } { - auto out = takane::genomic_ranges::internal::find_sequence_limits(sidir, takane::Options()); + auto out = takane::genomic_ranges::internal::find_sequence_limits(sidir, opts); EXPECT_EQ(out.has_circular, std::vector(2, true)); EXPECT_EQ(out.has_seqlen, std::vector(2, true)); std::vector expected_circular{ 1, 0 }; @@ -76,7 +77,7 @@ TEST_F(GenomicRangesTest, SeqInfoRetrieval) { } } { - auto out = takane::genomic_ranges::internal::find_sequence_limits(sidir, takane::Options()); + auto out = takane::genomic_ranges::internal::find_sequence_limits(sidir, opts); std::vector has_circular{0, 1}; EXPECT_EQ(out.has_circular, has_circular); std::vector has_seqlen{0, 1}; @@ -106,7 +107,7 @@ TEST_F(GenomicRangesTest, SeqInfoRetrieval) { } } { - auto out = takane::genomic_ranges::internal::find_sequence_limits(sidir, takane::Options()); + auto out = takane::genomic_ranges::internal::find_sequence_limits(sidir, opts); EXPECT_EQ(out.has_circular, std::vector(2, true)); EXPECT_EQ(out.has_seqlen, std::vector(2, true)); std::vector expected_circular{ 1, 0 }; diff --git a/tests/src/gff_file.cpp b/tests/src/gff_file.cpp index 8e4e347..eadfb68 100644 --- a/tests/src/gff_file.cpp +++ b/tests/src/gff_file.cpp @@ -18,15 +18,9 @@ struct GffFileTest : public ::testing::Test { std::filesystem::path dir; std::string name; - void expect_error(const std::string& msg) { - EXPECT_ANY_THROW({ - try { - test_validate(dir); - } catch (std::exception& e) { - EXPECT_THAT(e.what(), ::testing::HasSubstr(msg)); - throw; - } - }); + template + void expect_error(const std::string& msg, Args_&& ... args) { + expect_validation_error(dir, msg, std::forward(args)...); } }; @@ -106,7 +100,7 @@ TEST_F(GffFileTest, Strict) { fhandle.write("chr1\t1\t2\n"); } - takane::gff_file::strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&, bool) { throw std::runtime_error("ARGH"); }; - expect_error("ARGH"); - takane::gff_file::strict_check = nullptr; + takane::Options opts; + opts.gff_file_strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&, bool) { throw std::runtime_error("ARGH"); }; + expect_error("ARGH", opts); } diff --git a/tests/src/gmt_file.cpp b/tests/src/gmt_file.cpp index 497f51f..33bb451 100644 --- a/tests/src/gmt_file.cpp +++ b/tests/src/gmt_file.cpp @@ -18,15 +18,9 @@ struct GmtFileTest : public ::testing::Test { std::filesystem::path dir; std::string name; - void expect_error(const std::string& msg) { - EXPECT_ANY_THROW({ - try { - test_validate(dir); - } catch (std::exception& e) { - EXPECT_THAT(e.what(), ::testing::HasSubstr(msg)); - throw; - } - }); + template + void expect_error(const std::string& msg, Args_&& ... args) { + expect_validation_error(dir, msg, std::forward(args)...); } }; @@ -56,7 +50,7 @@ TEST_F(GmtFileTest, Strict) { fhandle.write("set\tmy set\ta\tb\tc\n"); } - takane::gmt_file::strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) { throw std::runtime_error("ARGH"); }; - expect_error("ARGH"); - takane::gmt_file::strict_check = nullptr; + takane::Options opts; + opts.gmt_file_strict_check = [](const std::filesystem::path&, const takane::ObjectMetadata&, const takane::Options&) { throw std::runtime_error("ARGH"); }; + expect_error("ARGH", opts); } diff --git a/tests/src/utils.cpp b/tests/src/utils.cpp index 666238e..dfdc535 100644 --- a/tests/src/utils.cpp +++ b/tests/src/utils.cpp @@ -10,14 +10,26 @@ void test_validate(const std::filesystem::path& dir) { takane::validate(dir); } +void test_validate(const std::filesystem::path& dir, takane::Options& opts) { + takane::validate(dir, opts); +} + size_t test_height(const std::filesystem::path& dir) { return takane::height(dir); } +size_t test_height(const std::filesystem::path& dir, takane::Options& opts) { + return takane::height(dir, opts); +} + std::vector test_dimensions(const std::filesystem::path& dir) { return takane::dimensions(dir); } +std::vector test_dimensions(const std::filesystem::path& dir, takane::Options& opts) { + return takane::dimensions(dir, opts); +} + // Just testing that our JSON dumping code works as expected. TEST(JsonDump, BasicDumps) { auto test = new millijson::Object; diff --git a/tests/src/utils.h b/tests/src/utils.h index f3897ef..69b5f2f 100644 --- a/tests/src/utils.h +++ b/tests/src/utils.h @@ -11,11 +11,16 @@ #include "ritsuko/hdf5/hdf5.hpp" #include "H5Cpp.h" +#include "takane/utils_public.hpp" + void test_validate(const std::filesystem::path&); +void test_validate(const std::filesystem::path&, takane::Options& opts); size_t test_height(const std::filesystem::path&); +size_t test_height(const std::filesystem::path&, takane::Options& opts); std::vector test_dimensions(const std::filesystem::path&); +std::vector test_dimensions(const std::filesystem::path&, takane::Options& opts); inline void initialize_directory(const std::filesystem::path& dir) { if (std::filesystem::exists(dir)) { diff --git a/tests/src/utils_bumpy_array.cpp b/tests/src/utils_bumpy_array.cpp index 55590bd..1aeb2db 100644 --- a/tests/src/utils_bumpy_array.cpp +++ b/tests/src/utils_bumpy_array.cpp @@ -32,9 +32,10 @@ struct BumpyArrayUtilsTest : public::testing::Test { template void expect_error(const std::string& msg) { auto meta = takane::read_object_metadata(dir); + takane::Options opts; EXPECT_ANY_THROW({ try { - takane::internal_bumpy_array::validate_directory(dir, name, "atomic_vector", meta, takane::Options()); + takane::internal_bumpy_array::validate_directory(dir, name, "atomic_vector", meta, opts); } catch (std::exception& e) { EXPECT_THAT(e.what(), ::testing::HasSubstr(msg)); throw; @@ -46,6 +47,7 @@ struct BumpyArrayUtilsTest : public::testing::Test { TEST_F(BumpyArrayUtilsTest, Basic) { initialize_directory_simple(dir, name, "2.0"); expect_error("unsupported version string"); + takane::Options opts; { auto handle = initialize(); @@ -77,10 +79,10 @@ TEST_F(BumpyArrayUtilsTest, Basic) { hdf5_utils::spawn_numeric_data(ghandle, "dimensions", H5::PredType::NATIVE_UINT8, { 2, 2 }); } auto meta = takane::read_object_metadata(dir); - takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, takane::Options()); + takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, opts); - EXPECT_EQ(takane::internal_bumpy_array::height(dir, name, meta, takane::Options()), 2); - auto dims = takane::internal_bumpy_array::dimensions(dir, name, meta, takane::Options()); + EXPECT_EQ(takane::internal_bumpy_array::height(dir, name, meta, opts), 2); + auto dims = takane::internal_bumpy_array::dimensions(dir, name, meta, opts); std::vector expected { 2, 2 }; EXPECT_EQ(dims, expected); } @@ -106,6 +108,8 @@ TEST_F(BumpyArrayUtilsTest, Lengths) { } TEST_F(BumpyArrayUtilsTest, Dense) { + takane::Options opts; + { auto handle = initialize(); auto ghandle = handle.createGroup(name); @@ -125,8 +129,8 @@ TEST_F(BumpyArrayUtilsTest, Dense) { } { auto meta = takane::read_object_metadata(dir); - takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, takane::Options()); - EXPECT_EQ(takane::internal_bumpy_array::height(dir, name, meta, takane::Options()), 0); + takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, opts); + EXPECT_EQ(takane::internal_bumpy_array::height(dir, name, meta, opts), 0); } // Higher-dimensional arrays. @@ -153,12 +157,14 @@ TEST_F(BumpyArrayUtilsTest, Dense) { } { auto meta = takane::read_object_metadata(dir); - takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, takane::Options()); - EXPECT_EQ(takane::internal_bumpy_array::height(dir, name, meta, takane::Options()), 13); + takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, opts); + EXPECT_EQ(takane::internal_bumpy_array::height(dir, name, meta, opts), 13); } } TEST_F(BumpyArrayUtilsTest, Sparse) { + takane::Options opts; + { auto handle = initialize(); auto ghandle = handle.createGroup(name); @@ -173,9 +179,9 @@ TEST_F(BumpyArrayUtilsTest, Sparse) { } auto meta = takane::read_object_metadata(dir); - takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, takane::Options()); - EXPECT_EQ(takane::internal_bumpy_array::height(dir, name, meta, takane::Options()), 4); - auto dims = takane::internal_bumpy_array::dimensions(dir, name, meta, takane::Options()); + takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, opts); + EXPECT_EQ(takane::internal_bumpy_array::height(dir, name, meta, opts), 4); + auto dims = takane::internal_bumpy_array::dimensions(dir, name, meta, opts); std::vector expected { 4, 3 }; EXPECT_EQ(dims, expected); @@ -252,8 +258,8 @@ TEST_F(BumpyArrayUtilsTest, Sparse) { } { auto meta = takane::read_object_metadata(dir); - takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, takane::Options()); - EXPECT_EQ(takane::internal_bumpy_array::height(dir, name, meta, takane::Options()), 4); + takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, opts); + EXPECT_EQ(takane::internal_bumpy_array::height(dir, name, meta, opts), 4); } // Get some coverage for higher-dimensional arrays. @@ -291,12 +297,14 @@ TEST_F(BumpyArrayUtilsTest, Sparse) { } { auto meta = takane::read_object_metadata(dir); - takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, takane::Options()); - EXPECT_EQ(takane::internal_bumpy_array::height(dir, name, meta, takane::Options()), 11); + takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, opts); + EXPECT_EQ(takane::internal_bumpy_array::height(dir, name, meta, opts), 11); } } TEST_F(BumpyArrayUtilsTest, Names) { + takane::Options opts; + { auto handle = initialize(); auto ghandle = handle.createGroup(name); @@ -309,7 +317,7 @@ TEST_F(BumpyArrayUtilsTest, Names) { hdf5_utils::spawn_string_data(nhandle, "1", H5T_VARIABLE, { "Aaron", "Charlie", "Echo", "Fooblewooble" }); } auto meta = takane::read_object_metadata(dir); - takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, takane::Options()); + takane::internal_bumpy_array::validate_directory(dir, "bumpy_atomic_array", "atomic_vector", meta, opts); { auto handle = reopen(); diff --git a/tests/src/utils_compressed_list.cpp b/tests/src/utils_compressed_list.cpp index 0ccf186..797dc81 100644 --- a/tests/src/utils_compressed_list.cpp +++ b/tests/src/utils_compressed_list.cpp @@ -32,9 +32,10 @@ struct CompressedListUtilsTest : public::testing::Test { template void expect_error(const std::string& msg) { auto meta = takane::read_object_metadata(dir); + takane::Options opts; EXPECT_ANY_THROW({ try { - takane::internal_compressed_list::validate_directory(dir, name, "atomic_vector", meta, takane::Options()); + takane::internal_compressed_list::validate_directory(dir, name, "atomic_vector", meta, opts); } catch (std::exception& e) { EXPECT_THAT(e.what(), ::testing::HasSubstr(msg)); throw; @@ -46,6 +47,7 @@ struct CompressedListUtilsTest : public::testing::Test { TEST_F(CompressedListUtilsTest, Basic) { initialize_directory_simple(dir, name, "2.0"); expect_error("unsupported version string"); + takane::Options opts; { auto handle = initialize(); @@ -64,8 +66,8 @@ TEST_F(CompressedListUtilsTest, Basic) { atomic_vector::mock(dir / "concatenated", 10, atomic_vector::Type::INTEGER); } auto meta = takane::read_object_metadata(dir); - takane::internal_compressed_list::validate_directory(dir, "atomic_vector_list", "atomic_vector", meta, takane::Options()); - EXPECT_EQ(takane::internal_compressed_list::height(dir, name, meta, takane::Options()), 4); + takane::internal_compressed_list::validate_directory(dir, "atomic_vector_list", "atomic_vector", meta, opts); + EXPECT_EQ(takane::internal_compressed_list::height(dir, name, meta, opts), 4); } TEST_F(CompressedListUtilsTest, Lengths) { @@ -88,6 +90,8 @@ TEST_F(CompressedListUtilsTest, Lengths) { } TEST_F(CompressedListUtilsTest, Names) { + takane::Options opts; + { auto handle = initialize(); auto ghandle = handle.createGroup(name); @@ -97,7 +101,7 @@ TEST_F(CompressedListUtilsTest, Names) { hdf5_utils::spawn_string_data(ghandle, "names", H5T_VARIABLE, { "Aaron", "Charlie", "Echo", "Fooblewooble" }); } auto meta = takane::read_object_metadata(dir); - takane::internal_compressed_list::validate_directory(dir, "atomic_vector_list", "atomic_vector", meta, takane::Options()); + takane::internal_compressed_list::validate_directory(dir, "atomic_vector_list", "atomic_vector", meta, opts); { auto handle = reopen(); @@ -109,6 +113,8 @@ TEST_F(CompressedListUtilsTest, Names) { } TEST_F(CompressedListUtilsTest, Metadata) { + takane::Options opts; + { auto handle = initialize(); auto ghandle = handle.createGroup(name); @@ -129,5 +135,5 @@ TEST_F(CompressedListUtilsTest, Metadata) { simple_list::mock(odir); auto meta = takane::read_object_metadata(dir); - takane::internal_compressed_list::validate_directory(dir, "atomic_vector_list", "atomic_vector", meta, takane::Options()); + takane::internal_compressed_list::validate_directory(dir, "atomic_vector_list", "atomic_vector", meta, opts); } diff --git a/tests/src/utils_other.cpp b/tests/src/utils_other.cpp index 7a6a673..c549e28 100644 --- a/tests/src/utils_other.cpp +++ b/tests/src/utils_other.cpp @@ -43,11 +43,12 @@ TEST_F(ValidateMetadataTest, Mcols) { auto subpath = path / "mcols"; data_frame::mock(subpath, 10, {}); - takane::internal_other::validate_mcols(path, "mcols", 10, takane::Options()); - expect_error_mcols("unexpected number of rows", path, "mcols", 20, takane::Options()); + takane::Options opts; + takane::internal_other::validate_mcols(path, "mcols", 10, opts); + expect_error_mcols("unexpected number of rows", path, "mcols", 20, opts); initialize_directory_simple(subpath, "simple_list", "1.0"); - expect_error_mcols("'DATA_FRAME'", path, "mcols", 10, takane::Options()); + expect_error_mcols("'DATA_FRAME'", path, "mcols", 10, opts); } TEST_F(ValidateMetadataTest, Metadata) { @@ -56,10 +57,11 @@ TEST_F(ValidateMetadataTest, Metadata) { auto subpath = path / "metadata"; simple_list::mock(subpath); - takane::internal_other::validate_metadata(path, "metadata", takane::Options()); + takane::Options opts; + takane::internal_other::validate_metadata(path, "metadata", opts); initialize_directory_simple(subpath, "data_frame", "1.0"); - expect_error_metadata("'SIMPLE_LIST'", path, "metadata", takane::Options()); + expect_error_metadata("'SIMPLE_LIST'", path, "metadata", opts); } TEST(CountDirectoryEntries, Basic) {