Skip to content

Commit

Permalink
Purged global registries in favor of passing all functions in Options. (
Browse files Browse the repository at this point in the history
#35)

This avoids issues where multiple applications are fiddling with the same
global registries and causing problems for each other; now, every validate()
call has its customized Options that cannot interfere with other calls. 

Unfortunately, this change requires us to make Options a non-const reference,
in order to support applications that want to mutate state throughout the 
validate() call, e.g., to collect statistics, re-use connections, whatever.
  • Loading branch information
LTLA authored Jan 16, 2024
1 parent 1d5a2db commit e1344dc
Show file tree
Hide file tree
Showing 57 changed files with 650 additions and 640 deletions.
39 changes: 20 additions & 19 deletions include/takane/_derived_from.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
#include <unordered_map>
#include <string>

#include "utils_public.hpp"

/**
* @file _derived_from.hpp
* @brief Registry of derived object types.
* @brief Check for derived object relationships.
*/

namespace takane {
Expand Down Expand Up @@ -46,42 +48,41 @@ inline auto default_registry() {
return registry;
}

inline bool check(const std::string& type, const std::string& base, const std::unordered_map<std::string, std::unordered_set<std::string> >& registry) {
auto it = registry.find(base);
if (it != registry.end()) {
const auto& listing = it->second;
return (listing.find(type) != listing.end());
}
return false;
}

}
/**
* @endcond
*/

/**
* Registry of derived object types and their base types.
* Each key is the base object type and each value is the set of all of its derived types.
* Check whether a particular object type is derived from a base object type.
* Derived types satisfy the same file requirements of the base type, but usually add more files to represent additional functionality.
* This can be used by specifications to check whether arbitrary objects satisfy the file structure expectations for a particular base type.
*
* Applications can extend the **takane** framework by adding custom derived types to each set.
* Applications can add their own derived types for a given base class in `Options::custom_derived_from`.
* This extends the default relationships whereby `derived_from()` will take the union of all derived object types in the default and custom sets.
* Note that derived types must be manually included in every base type's set,
* e.g., if B is derived from A and C is derived from B, C must be added to the sets for both A and B.
*/
inline std::unordered_map<std::string, std::unordered_set<std::string> > derived_from_registry = internal_derived_from::default_registry();

/**
* Check whether a particular object type is derived from a base objct type.
* This can be used by specifications to check that child components satisfy certain expectations.
*
* @param type Object type.
* @param base Base object type.
* @param options Validation options, containing custom derived/base relationships.
* @returns Whether `type` is derived from `base` or is equal to `base`.
*/
inline bool derived_from(const std::string& type, const std::string& base) {
inline bool derived_from(const std::string& type, const std::string& base, const Options& options) {
if (type == base) {
return true;
}

auto it = derived_from_registry.find(base);
if (it == derived_from_registry.end()) {
return false;
}

const auto& listing = it->second;
return (listing.find(type) != listing.end());
static const auto derived_from_registry = internal_derived_from::default_registry();
return internal_derived_from::check(type, base, derived_from_registry) || internal_derived_from::check(type, base, options.custom_derived_from);
}

}
Expand Down
61 changes: 28 additions & 33 deletions include/takane/_dimensions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,33 +22,29 @@

namespace takane {

/**
* Class to map object types to `dimensions()` functions.
*/
typedef std::unordered_map<std::string, std::function<std::vector<size_t>(const std::filesystem::path&, const ObjectMetadata&, const Options&)> > DimensionsRegistry;

/**
* @cond
*/
namespace internal_dimensions {

inline DimensionsRegistry default_registry() {
DimensionsRegistry registry;
inline auto default_registry() {
std::unordered_map<std::string, std::function<std::vector<size_t>(const std::filesystem::path&, const ObjectMetadata&, Options& os)> > registry;
typedef std::vector<size_t> Dims;

registry["data_frame"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector<size_t> { return data_frame::dimensions(p, m, o); };
registry["dense_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector<size_t> { return dense_array::dimensions(p, m, o); };
registry["compressed_sparse_matrix"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector<size_t> { return compressed_sparse_matrix::dimensions(p, m, o); };
registry["data_frame"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return data_frame::dimensions(p, m, o); };
registry["dense_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return dense_array::dimensions(p, m, o); };
registry["compressed_sparse_matrix"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return compressed_sparse_matrix::dimensions(p, m, o); };

// Subclasses of SE, so we just re-use the SE methods here.
registry["summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector<size_t> { return summarized_experiment::dimensions(p, m, o); };
registry["ranged_summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector<size_t> { return summarized_experiment::dimensions(p, m, o); };
registry["single_cell_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector<size_t> { return summarized_experiment::dimensions(p, m, o); };
registry["spatial_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector<size_t> { return summarized_experiment::dimensions(p, m, o); };
registry["summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return summarized_experiment::dimensions(p, m, o); };
registry["ranged_summarized_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return summarized_experiment::dimensions(p, m, o); };
registry["single_cell_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return summarized_experiment::dimensions(p, m, o); };
registry["spatial_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return summarized_experiment::dimensions(p, m, o); };

registry["bumpy_atomic_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector<size_t> { return bumpy_atomic_array::dimensions(p, m, o); };
registry["bumpy_data_frame_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector<size_t> { return bumpy_data_frame_array::dimensions(p, m, o); };
registry["vcf_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector<size_t> { return vcf_experiment::dimensions(p, m, o); };
registry["delayed_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, const Options& o) -> std::vector<size_t> { return delayed_array::dimensions(p, m, o); };
registry["bumpy_atomic_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return bumpy_atomic_array::dimensions(p, m, o); };
registry["bumpy_data_frame_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return bumpy_data_frame_array::dimensions(p, m, o); };
registry["vcf_experiment"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return vcf_experiment::dimensions(p, m, o); };
registry["delayed_array"] = [](const std::filesystem::path& p, const ObjectMetadata& m, Options& o) -> Dims { return delayed_array::dimensions(p, m, o); };

return registry;
}
Expand All @@ -58,27 +54,25 @@ inline DimensionsRegistry default_registry() {
* @endcond
*/

/**
* Registry of functions to be used by `dimensions()`.
* Applications can extend **takane** by adding new dimension functions for custom object types.
*/
inline DimensionsRegistry dimensions_registry = internal_dimensions::default_registry();

/**
* Get the dimensions of a multi-dimensional object in a subdirectory, based on the supplied object type.
* This searches the `dimensions_registry` to find a dimension function for the given type.
*
* Applications can supply custom dimension functions for a given type via `Options::custom_dimensions`.
* If available, the supplied custom function will be used instead of the default.
*
* @param path Path to a directory representing an object.
* @param metadata Metadata for the object, typically determined from its `OBJECT` file.
* @param options Validation options, mostly for input performance.
* @param options Validation options.
*
* @return Vector containing the object's dimensions.
*/
inline std::vector<size_t> dimensions(const std::filesystem::path& path, const ObjectMetadata& metadata, const Options& options) {
if (!std::filesystem::exists(path) || std::filesystem::status(path).type() != std::filesystem::file_type::directory) {
throw std::runtime_error("expected '" + path.string() + "' to be a directory");
inline std::vector<size_t> dimensions(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
auto cIt = options.custom_dimensions.find(metadata.type);
if (cIt != options.custom_dimensions.end()) {
return (cIt->second)(path, metadata, options);
}

static const auto dimensions_registry = internal_dimensions::default_registry();
auto vrIt = dimensions_registry.find(metadata.type);
if (vrIt == dimensions_registry.end()) {
throw std::runtime_error("no registered 'dimensions' function for object type '" + metadata.type + "' at '" + path.string() + "'");
Expand All @@ -91,21 +85,22 @@ inline std::vector<size_t> dimensions(const std::filesystem::path& path, const O
* Get the dimensions of an object in a subdirectory, using its `OBJECT` file to automatically determine the type.
*
* @param path Path to a directory containing an object.
* @param options Validation options, mostly for input performance.
* @param options Validation options.
* @return The object's dimensions.
*/
inline std::vector<size_t> dimensions(const std::filesystem::path& path, const Options& options) {
inline std::vector<size_t> dimensions(const std::filesystem::path& path, Options& options) {
return dimensions(path, read_object_metadata(path), options);
}

/**
* Overload of `dimensions()` with default options.
* Overload of `dimensions()` with default settings.
*
* @param path Path to a directory containing an object.
* @return The object's dimensions.
*/
inline std::vector<size_t> dimensions(const std::filesystem::path& path) {
return dimensions(path, Options());
Options options;
return dimensions(path, options);
}

}
Expand Down
Loading

0 comments on commit e1344dc

Please sign in to comment.