diff --git a/R/saveBaseFactor.R b/R/saveBaseFactor.R
index 64a391b..379931d 100644
--- a/R/saveBaseFactor.R
+++ b/R/saveBaseFactor.R
@@ -35,39 +35,47 @@ setMethod("saveObject", "factor", function(x, path, ...) {
 
     fhandle <- H5Fopen(ofile)
     on.exit(H5Fclose(fhandle), add=TRUE)
+    ghandle <- H5Gopen(fhandle, host)
+    on.exit(H5Gclose(ghandle), add=TRUE, after=FALSE)
     (function (){
-        ghandle <- H5Gopen(fhandle, host)
-        on.exit(H5Gclose(ghandle), add=TRUE)
         h5writeAttribute("1.0", ghandle, "version", asScalar=TRUE)
         if (is.ordered(x)) {
             h5writeAttribute(1L, ghandle, "ordered", asScalar=TRUE)
         }
     })()
 
-    .simple_save_codes(fhandle, host, x)
-    h5write(levels(x), fhandle, paste0(host, "/levels"))
+    .simple_save_codes(ghandle, x)
+    h5write(levels(x), ghandle, "levels")
 
     write("string_factor", file=file.path(path, "OBJECT"))
     invisible(NULL)
 })
 
-.simple_save_codes <- function(fhandle, host, x, save.names=TRUE) {
+.simple_save_codes <- function(ghandle, x, save.names=TRUE) {
     codes <- as.integer(x) - 1L
 
     missing.placeholder <- NULL
     if (anyNA(codes)) {
-        missing.placeholder <- -1L
+        missing.placeholder <- nlevels(x)
         codes[is.na(codes)] <- missing.placeholder
     }
 
-    full.data.name <- paste0(host, "/codes")
-    h5write(codes, fhandle, full.data.name)
+    shandle <- H5Screate_simple(length(x))
+    on.exit(H5Sclose(shandle), add=TRUE)
+    dhandle <- H5Dcreate(ghandle, "codes", dtype_id="H5T_NATIVE_UINT32", h5space=shandle)
+    on.exit(H5Dclose(dhandle), add=TRUE, after=FALSE)
+    H5Dwrite(dhandle, codes)
+
     if (!is.null(missing.placeholder)) {
-        addMissingPlaceholderAttributeForHdf5(fhandle, full.data.name, missing.placeholder)
+        ashandle <- H5Screate("H5S_SCALAR")
+        on.exit(H5Sclose(ashandle), add=TRUE, after=FALSE)
+        ahandle <- H5Acreate(dhandle, "missing-value-placeholder", dtype_id="H5T_NATIVE_UINT32", h5space=ashandle)
+        on.exit(H5Aclose(ahandle), add=TRUE, after=FALSE)
+        H5Awrite(ahandle, missing.placeholder)
     }
 
     if (save.names && !is.null(names(x))) {
-        h5write(names(x), fhandle, paste0(host, "/names"))
+        h5write(names(x), ghandle, "names")
     }
 }
 
diff --git a/R/saveDataFrame.R b/R/saveDataFrame.R
index 0440b90..e0b5c6c 100644
--- a/R/saveDataFrame.R
+++ b/R/saveDataFrame.R
@@ -91,11 +91,10 @@ setMethod("saveObject", "DataFrame", function(x, path, ...) {
                 if (is.ordered(col)) {
                     h5writeAttribute(1L, ghandle, "ordered", asScalar=TRUE)
                 }
+                .simple_save_codes(ghandle, col, save.names=FALSE)
+                h5write(levels(col), ghandle, "levels");
             })()
 
-            .simple_save_codes(fhandle, full.data.name, col, save.names=FALSE)
-            h5write(levels(col), fhandle, paste0(full.data.name, "/levels"));
-
         } else if (.is_datetime(col)) {
             coltype <- "string"
             colformat <- "date-time"
diff --git a/R/saveDataFrameFactor.R b/R/saveDataFrameFactor.R
index 8cd6622..a79479b 100644
--- a/R/saveDataFrameFactor.R
+++ b/R/saveDataFrameFactor.R
@@ -32,13 +32,11 @@ setMethod("saveObject", "DataFrameFactor", function(x, path, ...) {
 
     fhandle <- H5Fopen(ofile)
     on.exit(H5Fclose(fhandle), add=TRUE)
-    (function (){
-        ghandle <- H5Gopen(fhandle, host)
-        on.exit(H5Gclose(ghandle), add=TRUE)
-        h5writeAttribute("1.0", ghandle, "version", asScalar=TRUE)
-    })()
+    ghandle <- H5Gopen(fhandle, host)
+    on.exit(H5Gclose(ghandle), add=TRUE, after=FALSE)
+    h5writeAttribute("1.0", ghandle, "version", asScalar=TRUE)
 
-    .simple_save_codes(fhandle, host, x)
+    .simple_save_codes(ghandle, x)
     stuff <- levels(x)
     altSaveObject(stuff, paste0(path, "/levels"), ...)
 
diff --git a/inst/include/fetch.sh b/inst/include/fetch.sh
index 57e43e2..3e105d1 100755
--- a/inst/include/fetch.sh
+++ b/inst/include/fetch.sh
@@ -31,7 +31,7 @@ harvester() {
 
 harvester millijson https://github.com/ArtifactDB/millijson v1.0.0 
 harvester byteme https://github.com/LTLA/byteme v1.1.0
-harvester uzuki2 https://github.com/ArtifactDB/uzuki2 v1.3.0 
 harvester comservatory https://github.com/ArtifactDB/comservatory v2.0.1
-harvester ritsuko https://github.com/ArtifactDB/ritsuko v0.3.3
+harvester uzuki2 https://github.com/ArtifactDB/uzuki2 master
+harvester ritsuko https://github.com/ArtifactDB/ritsuko master
 harvester takane https://github.com/ArtifactDB/takane master
diff --git a/inst/include/ritsuko/choose_missing_placeholder.hpp b/inst/include/ritsuko/choose_missing_placeholder.hpp
index 599857a..01d8014 100644
--- a/inst/include/ritsuko/choose_missing_placeholder.hpp
+++ b/inst/include/ritsuko/choose_missing_placeholder.hpp
@@ -15,20 +15,58 @@
 namespace ritsuko {
 
 /**
- * Choose an appropriate placeholder for missing values in an integer dataset.
+ * @cond
+ */
+template<class Iterator, class Mask, class Type>
+bool found(Iterator start, Iterator end, Mask mask, Type candidate) {
+    if constexpr(std::is_same<Mask, bool>::value) {
+        return (std::find(start, end, candidate) != end);
+    } else {
+        for (; start != end; ++start, ++mask) {
+            if (!*mask && candidate == *start) {
+                return true;
+            }
+        }
+        return false;
+    }
+}
+
+template<class Iterator, class Mask, class Type = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type>::type>
+std::set<Type> create_unique_set(Iterator start, Iterator end, Mask mask) {
+    if constexpr(std::is_same<Mask, bool>::value) {
+        return std::set<Type>(start, end);
+    } else {
+        std::set<Type> output;
+        for (; start != end; ++start, ++mask) {
+            if (!*mask) {
+                output.insert(*start);
+            }
+        }
+        return output;
+    }
+}
+/**
+ * @endcond
+ */
+
+/**
+ * Choose an appropriate placeholder for missing values in an integer dataset, after ignoring all the masked values.
  * This will try the various special values (the minimum, the maximum, and for signed types, 0)
  * before sorting the dataset and searching for an unused integer value.
  *
  * @tparam Iterator_ Forward iterator for integer values.
+ * @tparam Mask_ Random access iterator for mask values.
  * @tparam Type_ Integer type pointed to by `Iterator_`.
  *
  * @param start Start of the dataset.
  * @param end End of the dataset.
+ * @param mask Start of the mask vector. 
+ * This should have the same length as `end - start`; each entry is true if the corresponding value of the integer dataset is masked, and false otherwise.
  *
  * @return Pair containing (i) a boolean indicating whether a placeholder was successfully found, and (ii) the chosen placeholder if the previous boolean is true.
  */
-template<class Iterator, class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type>::type>
-std::pair<bool, Type_> choose_missing_integer_placeholder(Iterator start, Iterator end) {
+template<class Iterator, class Mask, class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type>::type>
+std::pair<bool, Type_> choose_missing_integer_placeholder(Iterator start, Iterator end, Mask mask) {
     static_assert(std::numeric_limits<Type_>::is_integer);
 
     // Trying important points first; minima and maxima, and 0.
@@ -42,7 +80,7 @@ std::pair<bool, Type_> choose_missing_integer_placeholder(Iterator start, Iterat
             } else {
                 candidate = 0;
             }
-            if (std::find(start, end, candidate) == end) {
+            if (!found(start, end, mask, candidate)) {
                 return std::make_pair(true, candidate);
             }
         }
@@ -55,14 +93,14 @@ std::pair<bool, Type_> choose_missing_integer_placeholder(Iterator start, Iterat
             } else {
                 candidate = 0;
             }
-            if (std::find(start, end, candidate) == end) {
+            if (!found(start, end, mask, candidate)) {
                 return std::make_pair(true, candidate);
             }
         }
     }
 
     // Well... going through it in order.
-    std::set<Type_> uniq_sort(start, end);
+    auto uniq_sort = create_unique_set(start, end, mask);
     Type_ last = std::numeric_limits<Type_>::min();
     for (auto x : uniq_sort) {
         if (last + 1 < x) {
@@ -75,7 +113,23 @@ std::pair<bool, Type_> choose_missing_integer_placeholder(Iterator start, Iterat
 }
 
 /**
- * Choose an appropriate placeholder for missing values in a floating-point dataset.
+ * Overload of `choose_missing_integer_placeholder()` where no values are masked.
+ *
+ * @tparam Iterator_ Forward iterator for integer values.
+ * @tparam Type_ Integer type pointed to by `Iterator_`.
+ *
+ * @param start Start of the dataset.
+ * @param end End of the dataset.
+ *
+ * @return Pair containing (i) a boolean indicating whether a placeholder was successfully found, and (ii) the chosen placeholder if the previous boolean is true.
+ */
+template<class Iterator, class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type>::type>
+std::pair<bool, Type_> choose_missing_integer_placeholder(Iterator start, Iterator end) {
+    return choose_missing_integer_placeholder(start, end, false);
+}
+
+/**
+ * Choose an appropriate placeholder for missing values in a floating-point dataset, after ignoring all masked values.
  * This will try the various IEEE special values (NaN, Inf, -Inf) and then some type-specific boundaries (the minimum, the maximum, and for signed types, 0)
  * before sorting the dataset and searching for an unused float. 
  *
@@ -84,22 +138,35 @@ std::pair<bool, Type_> choose_missing_integer_placeholder(Iterator start, Iterat
  *
  * @param start Start of the dataset.
  * @param end End of the dataset.
+ * @param mask Start of the mask vector.
  * @param skip_nan Whether to skip NaN as a potential placeholder. 
  * Useful in frameworks like R that need special consideration of NaN payloads.
  *
  * @return Pair containing (i) a boolean indicating whether a placeholder was successfully found, and (ii) the chosen placeholder if the previous boolean is true.
  */
-template<class Iterator, class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type>::type>
-std::pair<bool, Type_> choose_missing_float_placeholder(Iterator start, Iterator end, bool skip_nan = false) {
+template<class Iterator, class Mask, class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type>::type>
+std::pair<bool, Type_> choose_missing_float_placeholder(Iterator start, Iterator end, Mask mask, bool skip_nan) {
     if constexpr(std::numeric_limits<Type_>::is_iec559) {
         if (!skip_nan) {
             bool has_nan = false;
-            for (auto x = start; x != end; ++x) {
-                if (std::isnan(*x)) {
-                    has_nan = true;
-                    break;
+
+            if constexpr(std::is_same<Mask, bool>::value) {
+                for (auto x = start; x != end; ++x) {
+                    if (std::isnan(*x)) {
+                        has_nan = true;
+                        break;
+                    }
+                }
+            } else {
+                auto sIt = mask;
+                for (auto x = start; x != end; ++x, ++sIt) {
+                    if (!*sIt && std::isnan(*x)) {
+                        has_nan = true;
+                        break;
+                    }
                 }
             }
+
             if (!has_nan) {
                 return std::make_pair(true, std::numeric_limits<Type_>::quiet_NaN());
             }
@@ -107,7 +174,7 @@ std::pair<bool, Type_> choose_missing_float_placeholder(Iterator start, Iterator
 
         for (int i = 0; i < 2; ++i) {
             Type_ candidate = std::numeric_limits<Type_>::infinity() * (i == 0 ? 1 : -1);
-            if (std::find(start, end, candidate) == end) {
+            if (!found(start, end, mask, candidate)) {
                 return std::make_pair(true, candidate);
             }
         }
@@ -123,13 +190,13 @@ std::pair<bool, Type_> choose_missing_float_placeholder(Iterator start, Iterator
         } else {
             candidate = 0;
         }
-        if (std::find(start, end, candidate) == end) {
+        if (!found(start, end, mask, candidate)) {
             return std::make_pair(true, candidate);
         }
     }
 
     // Well... going through it in order.
-    std::set<Type_> uniq_sort(start, end);
+    auto uniq_sort = create_unique_set(start, end, mask);
     Type_ last = std::numeric_limits<Type_>::lowest();
     for (auto x : uniq_sort) {
         if (std::isfinite(x)) {
@@ -144,6 +211,23 @@ std::pair<bool, Type_> choose_missing_float_placeholder(Iterator start, Iterator
     return std::make_pair(false, 0);
 }
 
+/**
+ * Overload of `choose_missing_float_placeholder()` where no values are masked.
+ *
+ * @tparam Iterator_ Forward iterator for floating-point values.
+ * @tparam Type_ Integer type pointed to by `Iterator_`.
+ *
+ * @param start Start of the dataset.
+ * @param end End of the dataset.
+ * @param skip_nan Whether to skip NaN as a potential placeholder. 
+ *
+ * @return Pair containing (i) a boolean indicating whether a placeholder was successfully found, and (ii) the chosen placeholder if the previous boolean is true.
+ */
+template<class Iterator, class Type_ = typename std::remove_cv<typename std::remove_reference<decltype(*(std::declval<Iterator>()))>::type>::type>
+std::pair<bool, Type_> choose_missing_float_placeholder(Iterator start, Iterator end, bool skip_nan = false) {
+    return choose_missing_float_placeholder(start, end, false, skip_nan);
+}
+
 }
 
 #endif
diff --git a/inst/include/ritsuko/hdf5/Stream1dNumericDataset.hpp b/inst/include/ritsuko/hdf5/Stream1dNumericDataset.hpp
new file mode 100644
index 0000000..bbe866f
--- /dev/null
+++ b/inst/include/ritsuko/hdf5/Stream1dNumericDataset.hpp
@@ -0,0 +1,134 @@
+#ifndef RITSUKO_HDF5_STREAM_1D_NUMERIC_DATASET_HPP
+#define RITSUKO_HDF5_STREAM_1D_NUMERIC_DATASET_HPP
+
+#include "H5Cpp.h"
+
+#include <vector>
+#include <stdexcept>
+
+#include "pick_1d_block_size.hpp"
+#include "get_1d_length.hpp"
+#include "get_name.hpp"
+#include "as_numeric_datatype.hpp"
+
+/**
+ * @file Stream1dNumericDataset.hpp
+ * @brief Stream a numeric 1D HDF5 dataset into memory.
+ */
+
+namespace ritsuko {
+
+namespace hdf5 {
+
+/**
+ * @brief Stream a numeric 1D HDF5 dataset into memory.
+ * @tparam Type_ Type to represent the data in memory.
+ *
+ * This streams in a HDF5 dataset in contiguous blocks, using block sizes defined by `pick_1d_block_size()`.
+ * Callers can then extract one value at a time or they can acquire the entire block.
+ */
+template<typename Type_>
+class Stream1dNumericDataset {
+public:
+    /**
+     * @param ptr Pointer to a HDF5 dataset handle.
+     * @param size Length of the dataset as a 1-dimensional vector.
+     * @param buffer_size Size of the buffer for holding streamed blocks of values.
+     * Larger buffers improve speed at the cost of some memory efficiency.
+     */
+    Stream1dNumericDataset(const H5::DataSet* ptr, hsize_t length, hsize_t buffer_size) : 
+        ptr(ptr), 
+        full_length(length), 
+        block_size(pick_1d_block_size(ptr->getCreatePlist(), full_length, buffer_size)),
+        mspace(1, &block_size),
+        dspace(1, &full_length),
+        buffer(block_size)
+    {}
+
+    /**
+     * Overloaded constructor where the length is automatically determined.
+     *
+     * @param ptr Pointer to a HDF5 dataset handle.
+     * @param buffer_size Size of the buffer for holding streamed blocks of values.
+     */
+    Stream1dNumericDataset(const H5::DataSet* ptr, hsize_t buffer_size) : 
+        Stream1dNumericDataset(ptr, get_1d_length(ptr->getSpace(), false), buffer_size) 
+    {}
+
+public:
+    /**
+     * @return Value at the current position of the stream.
+     */
+    Type_ get() {
+        while (consumed >= available) {
+            consumed -= available;
+            load(); 
+        }
+        return buffer[consumed];
+    }
+
+    /**
+     * @return Pair containing a pointer to and the length of an array.
+     * The array holds all loaded values of the stream at its current position, up to the specified length.
+     * Note that the pointer is only valid until the next invocation of `next()`.
+     */
+    std::pair<const Type_*, size_t> get_many() {
+        while (consumed >= available) {
+            consumed -= available;
+            load();
+        }
+        return std::make_pair(buffer.data() + consumed, available - consumed);
+    }
+
+    /**
+     * Advance the position of the stream by `jump`.
+     *
+     * @param jump Number of positions by which to advance the stream.
+     */
+    void next(size_t jump = 1) {
+        consumed += jump;
+    }
+
+    /**
+     * @return Length of the dataset.
+     */
+    hsize_t length() const {
+        return full_length;
+    }
+
+    /**
+     * @return Current position on the stream.
+     */
+    hsize_t position() const {
+        return consumed + last_loaded;
+    }
+
+private:
+    const H5::DataSet* ptr;
+    hsize_t full_length, block_size;
+    H5::DataSpace mspace;
+    H5::DataSpace dspace;
+    std::vector<Type_> buffer;
+
+    hsize_t last_loaded = 0;
+    hsize_t consumed = 0;
+    hsize_t available = 0;
+
+    void load() {
+        if (last_loaded >= full_length) {
+            throw std::runtime_error("requesting data beyond the end of the dataset at '" + get_name(*ptr) + "'");
+        }
+        available = std::min(full_length - last_loaded, block_size);
+        constexpr hsize_t zero = 0;
+        mspace.selectHyperslab(H5S_SELECT_SET, &available, &zero);
+        dspace.selectHyperslab(H5S_SELECT_SET, &available, &last_loaded);
+        ptr->read(buffer.data(), as_numeric_datatype<Type_>(), mspace, dspace);
+        last_loaded += available;
+    }
+};
+
+}
+
+}
+
+#endif
diff --git a/inst/include/ritsuko/hdf5/Stream1dStringDataset.hpp b/inst/include/ritsuko/hdf5/Stream1dStringDataset.hpp
new file mode 100644
index 0000000..69c7515
--- /dev/null
+++ b/inst/include/ritsuko/hdf5/Stream1dStringDataset.hpp
@@ -0,0 +1,171 @@
+#ifndef RITSUKO_HDF5_STREAM_1D_STRING_DATASET_HPP
+#define RITSUKO_HDF5_STREAM_1D_STRING_DATASET_HPP
+
+#include "H5Cpp.h"
+
+#include <vector>
+#include <string>
+#include <stdexcept>
+
+#include "pick_1d_block_size.hpp"
+#include "get_1d_length.hpp"
+#include "get_name.hpp"
+#include "as_numeric_datatype.hpp"
+#include "_strings.hpp"
+
+/**
+ * @file Stream1dStringDataset.hpp
+ * @brief Stream a numeric 1D HDF5 dataset into memory.
+ */
+
+namespace ritsuko {
+
+namespace hdf5 {
+
+/**
+ * @brief Stream a 1D HDF5 string dataset into memory.
+ *
+ * This streams in a HDF5 dataset in contiguous blocks, using block sizes defined by `pick_1d_block_size()`.
+ * Callers can then extract one C-style string at a time.
+ */
+class Stream1dStringDataset {
+public:
+    /**
+     * @param ptr Pointer to a HDF5 dataset handle.
+     * @param length Length of the dataset as a 1-dimensional vector.
+     * @param buffer_size Size of the buffer for holding streamed blocks of values.
+     * Larger buffers improve speed at the cost of some memory efficiency.
+     */
+    Stream1dStringDataset(const H5::DataSet* ptr, hsize_t length, hsize_t buffer_size) : 
+        ptr(ptr), 
+        full_length(length), 
+        block_size(pick_1d_block_size(ptr->getCreatePlist(), full_length, buffer_size)),
+        mspace(1, &block_size),
+        dspace(1, &full_length),
+        dtype(ptr->getDataType()),
+        is_variable(dtype.isVariableStr())
+    {
+        if (is_variable) {
+            var_buffer.resize(block_size);
+        } else {
+            fixed_length = dtype.getSize();
+            fix_buffer.resize(fixed_length * block_size);
+        }
+        final_buffer.resize(block_size);
+    }
+
+    /**
+     * Overloaded constructor where the length is automatically determined.
+     *
+     * @param ptr Pointer to a HDF5 dataset handle.
+     * @param buffer_size Size of the buffer for holding streamed blocks of values.
+     */
+    Stream1dStringDataset(const H5::DataSet* ptr, hsize_t buffer_size) : 
+        Stream1dStringDataset(ptr, get_1d_length(ptr->getSpace(), false), buffer_size) 
+    {}
+
+public:
+    /**
+     * @return String at the current position of the stream.
+     */
+    std::string get() {
+        while (consumed >= available) {
+            consumed -= available;
+            load(); 
+        }
+        return final_buffer[consumed];
+    }
+
+    /**
+     * @return String at the current position of the stream.
+     * Unlike `get()`, this avoids a copy by directly acquiring the string,
+     * but it invalidates all subsequent `get()` and `steal()` requests until `next()` is called.
+     */
+    std::string steal() {
+        while (consumed >= available) {
+            consumed -= available;
+            load(); 
+        }
+        return std::move(final_buffer[consumed]);
+    }
+
+    /**
+     * Advance to the next position of the stream.
+     *
+     * @param jump Number of positions by which to advance the stream.
+     */
+    void next(size_t jump = 1) {
+        consumed += jump;
+    }
+
+    /**
+     * @return Length of the dataset.
+     */
+    hsize_t length() const {
+        return full_length;
+    }
+
+    /**
+     * @return Current position on the stream.
+     */
+    hsize_t position() const {
+        return consumed + last_loaded;
+    }
+
+private:
+    const H5::DataSet* ptr;
+    hsize_t full_length, block_size;
+    H5::DataSpace mspace;
+    H5::DataSpace dspace;
+
+    H5::DataType dtype;
+    bool is_variable;
+    std::vector<char*> var_buffer;
+    size_t fixed_length = 0;
+    std::vector<char> fix_buffer;
+    std::vector<std::string> final_buffer;
+
+    hsize_t last_loaded = 0;
+    hsize_t consumed = 0;
+    hsize_t available = 0;
+
+    void load() {
+        if (last_loaded >= full_length) {
+            throw std::runtime_error("requesting data beyond the end of the dataset at '" + get_name(*ptr) + "'");
+        }
+        available = std::min(full_length - last_loaded, block_size);
+        constexpr hsize_t zero = 0;
+        mspace.selectHyperslab(H5S_SELECT_SET, &available, &zero);
+        dspace.selectHyperslab(H5S_SELECT_SET, &available, &last_loaded);
+
+        if (is_variable) {
+            ptr->read(var_buffer.data(), dtype, mspace, dspace);
+            [[maybe_unused]] VariableStringCleaner deletor(dtype.getId(), mspace.getId(), var_buffer.data());
+            for (hsize_t i = 0; i < block_size; ++i) {
+                if (var_buffer[i] == NULL) {
+                    throw std::runtime_error("detected a NULL pointer for a variable length string in '" + get_name(*ptr) + "'");
+                }
+                auto& curstr = final_buffer[i];
+                curstr.clear();
+                curstr.insert(0, var_buffer[i]);
+            }
+
+        } else {
+            auto bptr = fix_buffer.data();
+            ptr->read(bptr, dtype, mspace, dspace);
+            for (size_t i = 0; i < available; ++i, bptr += fixed_length) {
+                auto& curstr = final_buffer[i];
+                curstr.clear();
+                curstr.insert(curstr.end(), bptr, bptr + find_string_length(bptr, fixed_length));
+            }
+        }
+
+        last_loaded += available;
+    }
+};
+
+}
+
+}
+
+#endif
diff --git a/inst/include/ritsuko/hdf5/_strings.hpp b/inst/include/ritsuko/hdf5/_strings.hpp
new file mode 100644
index 0000000..fe4e07f
--- /dev/null
+++ b/inst/include/ritsuko/hdf5/_strings.hpp
@@ -0,0 +1,29 @@
+#ifndef RITSUKO_HDF5_STRINGS_HPP
+#define RITSUKO_HDF5_STRINGS_HPP
+
+#include "H5Cpp.h"
+
+namespace ritsuko {
+
+namespace hdf5 {
+
+inline size_t find_string_length(const char* ptr, size_t max) {
+    size_t j = 0;
+    for (; j < max && ptr[j] != '\0'; ++j) {}
+    return j;
+}
+
+struct VariableStringCleaner {
+    VariableStringCleaner(hid_t did, hid_t mid, char** buffer) : did(did), mid(mid), buffer(buffer) {}
+    ~VariableStringCleaner() {
+        H5Dvlen_reclaim(did, mid, H5P_DEFAULT, buffer);
+    }
+    hid_t did, mid;
+    char** buffer; 
+};
+
+}
+
+}
+
+#endif
diff --git a/inst/include/ritsuko/hdf5/as_numeric_datatype.hpp b/inst/include/ritsuko/hdf5/as_numeric_datatype.hpp
new file mode 100644
index 0000000..ad02af3
--- /dev/null
+++ b/inst/include/ritsuko/hdf5/as_numeric_datatype.hpp
@@ -0,0 +1,55 @@
+#ifndef RITSUKO_AS_NUMERIC_DATATYPE_HPP
+#define RITSUKO_AS_NUMERIC_DATATYPE_HPP
+
+#include <type_traits>
+#include <cstdint>
+#include "H5Cpp.h"
+
+/**
+ * @file as_numeric_datatype.hpp
+ * @brief Choose a HDF5 datatype.
+ */
+
+namespace ritsuko {
+
+namespace hdf5 {
+
+/**
+ * Choose the HDF5 datatype object corresponding to a particular C++ numeric type.
+ * Currently, only fixed-width integer types (e.g., `uint16_t`, `int32_t`) and the usual floating-point types are supported.
+ *
+ * @tparam Type_ A numeric C++ type of fixed width.
+ * This can be any of the fixed-width integers or a floating-point number of known precision.
+ * @returns A HDF5 datatype object.
+ */
+template<typename Type_>
+H5::PredType as_numeric_datatype() {
+    if constexpr(std::is_same<Type_, uint8_t>::value) {
+        return H5::PredType::NATIVE_UINT8;
+    } else if constexpr(std::is_same<Type_, int8_t>::value) {
+        return H5::PredType::NATIVE_INT8;
+    } else if constexpr(std::is_same<Type_, uint16_t>::value) {
+        return H5::PredType::NATIVE_UINT16;
+    } else if constexpr(std::is_same<Type_, int16_t>::value) {
+        return H5::PredType::NATIVE_INT16;
+    } else if constexpr(std::is_same<Type_, uint32_t>::value) {
+        return H5::PredType::NATIVE_UINT32;
+    } else if constexpr(std::is_same<Type_, int32_t>::value) {
+        return H5::PredType::NATIVE_INT32;
+    } else if constexpr(std::is_same<Type_, uint64_t>::value) {
+        return H5::PredType::NATIVE_UINT64;
+    } else if constexpr(std::is_same<Type_, int64_t>::value) {
+        return H5::PredType::NATIVE_INT64;
+    } else if constexpr(std::is_same<Type_, float>::value) {
+        return H5::PredType::NATIVE_FLOAT;
+    } else {
+        static_assert(std::is_same<Type_, double>::value, "specified type is not yet supported");
+        return H5::PredType::NATIVE_DOUBLE;
+    }
+}
+
+}
+
+}
+
+#endif
diff --git a/inst/include/ritsuko/hdf5/get_1d_length.hpp b/inst/include/ritsuko/hdf5/get_1d_length.hpp
index a3ac9d5..3345b8c 100644
--- a/inst/include/ritsuko/hdf5/get_1d_length.hpp
+++ b/inst/include/ritsuko/hdf5/get_1d_length.hpp
@@ -39,6 +39,56 @@ inline hsize_t get_1d_length(const H5::DataSpace& space, bool allow_scalar) {
     return dims;
 }
 
+/**
+ * Overload of `get_1d_length()` that accepts a dataset handle.
+ *
+ * @param handle Handle to a HDF5 dataset.
+ * @param allow_scalar Whether to allow scalars.
+ * 
+ * @return Length of the dataset, i.e., the extent of its single dimension.
+ */
+inline hsize_t get_1d_length(const H5::DataSet& handle, bool allow_scalar) {
+    return get_1d_length(handle.getSpace(), allow_scalar);
+}
+
+/**
+ * Overload of `get_1d_length()` that accepts an attribute handle.
+ *
+ * @param handle Handle to a HDF5 attribute.
+ * @param allow_scalar Whether to allow scalars.
+ * 
+ * @return Length of the attribute, i.e., the extent of its single dimension.
+ */
+inline hsize_t get_1d_length(const H5::Attribute& handle, bool allow_scalar) {
+    return get_1d_length(handle.getSpace(), allow_scalar);
+}
+
+/**
+ * @param space The data space of the dataset.
+ * @return Whether `space` represents a scalar dataset.
+ */
+inline bool is_scalar(const H5::DataSpace& space) {
+    return space.getSimpleExtentNdims() == 0;
+}
+
+/**
+ * Overload of `is_scalar()` that accepts a dataset handle.
+ * @param handle Handle to a HDF5 dataset.
+ * @return Whether `space` represents a scalar dataset.
+ */
+inline bool is_scalar(const H5::DataSet& handle) {
+    return is_scalar(handle.getSpace());
+}
+
+/**
+ * Overload of `is_scalar()` that accepts an attribute handle.
+ * @param handle Handle to a HDF5 attribute.
+ * @return Whether `space` represents a scalar dataset.
+ */
+inline bool is_scalar(const H5::Attribute& handle) {
+    return is_scalar(handle.getSpace());
+}
+
 }
 
 }
diff --git a/inst/include/ritsuko/hdf5/get_dataset.hpp b/inst/include/ritsuko/hdf5/get_dataset.hpp
deleted file mode 100644
index dd64dd5..0000000
--- a/inst/include/ritsuko/hdf5/get_dataset.hpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef RITSUKO_HDF5_GET_DATASET_HPP
-#define RITSUKO_HDF5_GET_DATASET_HPP
-
-#include "H5Cpp.h"
-#include <string>
-
-/**
- * @file get_dataset.hpp
- * @brief Quick functions to get a dataset handle.
- */
-
-namespace ritsuko {
-
-namespace hdf5 {
-
-/**
- * @param handle Group containing the dataset.
- * @param name Name of the dataset inside the group.
- * @return Handle to the dataset.
- * An error is raised if `name` does not refer to a dataset. 
- */
-inline H5::DataSet get_dataset(const H5::Group& handle, const char* name) {
-    if (!handle.exists(name) || handle.childObjType(name) != H5O_TYPE_DATASET) {
-        throw std::runtime_error("expected a dataset at '" + std::string(name) + "'");
-    }
-    return handle.openDataSet(name);
-}
-
-/**
- * @param handle Group containing the scalar dataset.
- * @param name Name of the dataset inside the group.
- * @return Handle to a scalar dataset.
- * An error is raised if `name` does not refer to a scalar dataset. 
- */
-inline H5::DataSet get_scalar_dataset(const H5::Group& handle, const char* name) {
-    auto dhandle = get_dataset(handle, name);
-    auto dspace = dhandle.getSpace();
-    int ndims = dspace.getSimpleExtentNdims();
-    if (ndims != 0) {
-        throw std::runtime_error("expected a scalar dataset at '" + std::string(name) + "'");
-    }
-    return dhandle;
-}
-
-}
-
-}
-
-#endif
-
-
diff --git a/inst/include/ritsuko/hdf5/get_missing_placeholder_attribute.hpp b/inst/include/ritsuko/hdf5/get_missing_placeholder_attribute.hpp
deleted file mode 100644
index 029051a..0000000
--- a/inst/include/ritsuko/hdf5/get_missing_placeholder_attribute.hpp
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef RITSUKO_HDF5_GET_MISSING_PLACEHOLDER_ATTRIBUTE_HPP
-#define RITSUKO_HDF5_GET_MISSING_PLACEHOLDER_ATTRIBUTE_HPP
-
-#include "H5Cpp.h"
-#include <string>
-
-/**
- * @file get_missing_placeholder_attribute.hpp
- * @brief Get the missing placeholder attribute.
- */
-
-namespace ritsuko {
-
-namespace hdf5 {
-
-/**
- * @param handle Dataset handle.
- * @param attr_name Name of the attribute containing the missing value placeholder.
- * @param type_class_only Whether to only require identical type classes for the placeholder.
- * By default, we require identity in the types themselves.
- *
- * 
- * @return Handle to the attribute.
- * An error is raised if the attribute is not a scalar or has a different type (or type class, if `type_class_only_ = true`) to the dataset.
- */
-inline H5::Attribute get_missing_placeholder_attribute(const H5::DataSet& handle, const char* attr_name, bool type_class_only = false) {
-    auto attr = handle.openAttribute(attr_name);
-    if (attr.getSpace().getSimpleExtentNdims() != 0) {
-        throw std::runtime_error("expected the '" + std::string(attr_name) + "' attribute to be a scalar");
-    }
-
-    if (type_class_only) {
-        if (attr.getTypeClass() != handle.getTypeClass()) {
-            throw std::runtime_error("expected the '" + std::string(attr_name) + "' attribute to have the same type class as its dataset");
-        }
-    } else {
-        if (attr.getDataType() != handle.getDataType()) {
-            throw std::runtime_error("expected the '" + std::string(attr_name) + "' attribute to have the same type as its dataset");
-        }
-    }
-
-    return attr;
-}
-
-}
-
-}
-
-#endif
diff --git a/inst/include/ritsuko/hdf5/get_name.hpp b/inst/include/ritsuko/hdf5/get_name.hpp
index 8dd5ab1..f907117 100644
--- a/inst/include/ritsuko/hdf5/get_name.hpp
+++ b/inst/include/ritsuko/hdf5/get_name.hpp
@@ -16,16 +16,22 @@ namespace hdf5 {
 
 /**
  * Get the name of a HDF5 object from its handle, usually for printing informative error messages.
- * @tparam Handle_ Type of HDF5 handle, usually a `Group` or a `DataSet`.
+ * @tparam Handle_ Type of HDF5 handle, usually a `Group`, `DataSet` or `Attribute`.
  * @param handle Handle to a HDF5 object.
  * @return Name of the HDF5 object inside the file.
  */
 template<class Handle_>
 std::string get_name(const Handle_& handle) {
-    size_t len = H5Iget_name(handle.getId(), NULL, 0);
-    std::vector<char> buffer(len);
-    H5Iget_name(handle.getId(), buffer.data(), len+1);
-    return std::string(buffer.begin(), buffer.end());
+    if constexpr(std::is_same<Handle_, H5::Attribute>::value) {
+        std::string name;
+        handle.getName(name);
+        return name;
+    } else {
+        size_t len = H5Iget_name(handle.getId(), NULL, 0);
+        std::vector<char> buffer(len + 1);
+        H5Iget_name(handle.getId(), buffer.data(), buffer.size());
+        return std::string(buffer.begin(), buffer.begin() + len);
+    }
 }
 
 }
diff --git a/inst/include/ritsuko/hdf5/get_scalar_attribute.hpp b/inst/include/ritsuko/hdf5/get_scalar_attribute.hpp
deleted file mode 100644
index d5f4877..0000000
--- a/inst/include/ritsuko/hdf5/get_scalar_attribute.hpp
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef RITSUKO_HDF5_GET_ATTRIBUTE_HPP
-#define RITSUKO_HDF5_GET_ATTRIBUTE_HPP
-
-#include "H5Cpp.h"
-#include <string>
-
-/**
- * @file get_scalar_attribute.hpp
- * @brief Helper to get a scalar attribute handle.
- */
-
-namespace ritsuko {
-
-namespace hdf5 {
-
-/**
- * Convenient wrapper to get a scalar attribute with all of the usual error checks.
- *
- * @tparam Object_ Type of the HDF5 handle, usually a `DataSet` or `Group`.
- * @param handle HDF5 dataset or group handle.
- * @param name Name of the attribute.
- *
- * @return Attribute handle.
- */
-template<class Object_>
-H5::Attribute get_scalar_attribute(const Object_& handle, const char* name) {
-    if (!handle.attrExists(name)) {
-        throw std::runtime_error("expected an attribute at '" + std::string(name) + "'");
-    }
-
-    auto attr = handle.openAttribute(name);
-    if (attr.getSpace().getSimpleExtentNdims() != 0) {
-        throw std::runtime_error("expected a scalar attribute at '" + std::string(name) + "'");
-    }
-
-    return attr;
-}
-
-}
-
-}
-
-#endif
diff --git a/inst/include/ritsuko/hdf5/hdf5.hpp b/inst/include/ritsuko/hdf5/hdf5.hpp
index 778b370..7b09b7c 100644
--- a/inst/include/ritsuko/hdf5/hdf5.hpp
+++ b/inst/include/ritsuko/hdf5/hdf5.hpp
@@ -1,15 +1,19 @@
 #ifndef RITSUKO_HDF5_HPP
 #define RITSUKO_HDF5_HPP
 
+#include "Stream1dNumericDataset.hpp"
+#include "Stream1dStringDataset.hpp"
+#include "as_numeric_datatype.hpp"
 #include "exceeds_limit.hpp"
 #include "get_1d_length.hpp"
-#include "iterate_1d_blocks.hpp"
-#include "load_1d_string_dataset.hpp"
-#include "load_scalar_string_attribute.hpp"
-#include "get_missing_placeholder_attribute.hpp"
-#include "get_dataset.hpp"
-#include "get_scalar_attribute.hpp"
 #include "get_name.hpp"
+#include "load_attribute.hpp"
+#include "load_dataset.hpp"
+#include "missing_placeholder.hpp"
+#include "miscellaneous.hpp"
+#include "open.hpp"
+#include "pick_1d_block_size.hpp"
+#include "validate_string.hpp"
 
 /**
  * @file hdf5.hpp
diff --git a/inst/include/ritsuko/hdf5/iterate_1d_blocks.hpp b/inst/include/ritsuko/hdf5/iterate_1d_blocks.hpp
deleted file mode 100644
index 2680a09..0000000
--- a/inst/include/ritsuko/hdf5/iterate_1d_blocks.hpp
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef RITSUKO_HDF5_ITERATE_1D_BLOCKS_HPP
-#define RITSUKO_HDF5_ITERATE_1D_BLOCKS_HPP
-
-#include "H5Cpp.h"
-#include <algorithm>
-
-/**
- * @file iterate_1d_blocks.hpp
- * @brief Blockwise iteration through a 1-dimensional HDF5 dataset.
- */
-
-namespace ritsuko {
-
-namespace hdf5 {
-
-/**
- * Iterate through a 1-dimensional dataset via contiguous blocks.
- *
- * @param full_length Length of the dataset, usually obtained from `get_1d_length()`.
- * @param block_size Size of the blocks, usually calculated by `pick_1d_block_size()`.
- * @param fun Function that accepts `(hsize_t start, hsize_t len, H5::DataSpace& memspace, H5::DataSpace& dataspace)` and is called on each block.
- * In each call, the block contains elements from `[start, start + len)`.
- * `dataspace` is configured to extract that block from the dataset, while `memspace` is configured to deposit the block contents in a buffer from `[0, len)`.
- * It can be assumed that consecutive calls to `fun` will operate on consecutive contiguous blocks.
- */
-template<class Function_>
-void iterate_1d_blocks(hsize_t full_length, hsize_t block_size, Function_ fun) {
-    H5::DataSpace mspace(1, &block_size);
-    H5::DataSpace dspace(1, &full_length);
-    hsize_t start = 0;
-
-    for (hsize_t counter = 0; counter < full_length; counter += block_size) {
-        hsize_t limit = std::min(full_length - counter, block_size);
-        mspace.selectHyperslab(H5S_SELECT_SET, &limit, &start);
-        dspace.selectHyperslab(H5S_SELECT_SET, &limit, &counter);
-        fun(counter, limit, mspace, dspace);
-    }
-}
-
-}
-
-}
-
-#endif
diff --git a/inst/include/ritsuko/hdf5/load_1d_string_dataset.hpp b/inst/include/ritsuko/hdf5/load_1d_string_dataset.hpp
deleted file mode 100644
index 2be553b..0000000
--- a/inst/include/ritsuko/hdf5/load_1d_string_dataset.hpp
+++ /dev/null
@@ -1,114 +0,0 @@
-#ifndef RITSUKO_HDF5_LOAD_1D_STRING_DATASET_HPP
-#define RITSUKO_HDF5_LOAD_1D_STRING_DATASET_HPP
-
-#include "H5Cpp.h"
-#include <vector>
-#include <cstring>
-
-#include "pick_1d_block_size.hpp"
-#include "iterate_1d_blocks.hpp"
-
-/**
- * @file load_1d_string_dataset.hpp
- * @brief Load and iterate over a 1-dimensional HDF5 string dataset.
- */
-
-namespace ritsuko {
-
-namespace hdf5 {
-
-/**
- * Iterate across a string dataset, extracting each string and running a user-specified function.
- * This works for both variable- and fixed-length strings, and performs iteration via `iterate_1d_blocks()` to avoid loading everything into memory at once.
- *
- * @tparam Function_ Function class that accepts `(hsize_t i, const char* start, size_t len)`
- * where `i` is the index of the string from `[start, start + len)`.
- *
- * @param handle Handle to a string dataset.
- * @param full_length Length of the dataset in `handle`, usually obtained by `get_1d_length()`.
- * @param buffer_size Buffer size to use for iteration in `iterate_1d_blocks()`.
- * @param fun Function to be called on each string.
- * It can be assumed that the consecutive calls to `fun` will operate on consecutive `i`.
- */
-template<class Function_>
-void load_1d_string_dataset(const H5::DataSet& handle, hsize_t full_length, hsize_t buffer_size, Function_ fun) {
-    auto block_size = pick_1d_block_size(handle.getCreatePlist(), full_length, buffer_size);
-    auto dtype = handle.getDataType();
-
-    if (dtype.isVariableStr()) {
-        std::vector<char*> buffer(block_size);
-        iterate_1d_blocks(
-            full_length, 
-            block_size, 
-            [&](hsize_t start, hsize_t len, const H5::DataSpace& mspace, const H5::DataSpace& dspace) -> void {
-                handle.read(buffer.data(), dtype, mspace, dspace);
-                for (hsize_t i = 0; i < len; ++i) {
-                    fun(start + i, buffer[i], std::strlen(buffer[i]));
-                }
-                H5Dvlen_reclaim(dtype.getId(), mspace.getId(), H5P_DEFAULT, buffer.data());
-            }
-        );
-
-    } else {
-        size_t len = dtype.getSize();
-        std::vector<char> buffer(len * block_size);
-        iterate_1d_blocks(
-            full_length, 
-            block_size, 
-            [&](hsize_t start, hsize_t length, const H5::DataSpace& mspace, const H5::DataSpace& dspace) -> void {
-                handle.read(buffer.data(), dtype, mspace, dspace);
-                auto ptr = buffer.data();
-                for (size_t i = 0; i < length; ++i, ptr += len) {
-                    size_t j = 0;
-                    for (; j < len && ptr[j] != '\0'; ++j) {}
-                    fun(start + i, ptr, j);
-                }
-            }
-        );
-    }
-}
-
-/**
- * Iterate across a string attribute, extracting each string and running a user-specified function.
- * This works for both variable- and fixed-length strings.
- *
- * @tparam Function_ Function class that accepts `(hsize_t i, const char* start, size_t len)`
- * where `i` is the index of the string from `[start, start + len)`.
- *
- * @param handle Handle to a string attribute.
- * @param full_length Length of the attribute in `handle`, usually obtained by `get_1d_length()`.
- * @param fun Function to be called on each string.
- * It can be assumed that the consecutive calls to `fun` will operate on consecutive `i`.
- */
-template<class Function_>
-void load_1d_string_attribute(const H5::Attribute& handle, hsize_t full_length, Function_ fun) {
-    auto dtype = handle.getDataType();
-
-    if (dtype.isVariableStr()) {
-        std::vector<char*> buffer(full_length);
-        handle.read(dtype, buffer.data());
-        for (hsize_t i = 0; i < full_length; ++i) {
-            fun(i, buffer[i], std::strlen(buffer[i]));
-        }
-        auto mspace = handle.getSpace();
-        H5Dvlen_reclaim(dtype.getId(), mspace.getId(), H5P_DEFAULT, buffer.data());
-
-    } else {
-        size_t len = dtype.getSize();
-        std::vector<char> buffer(len * full_length);
-        handle.read(dtype, buffer.data());
-        auto ptr = buffer.data();
-        for (size_t i = 0; i < full_length; ++i, ptr += len) {
-            size_t j = 0;
-            for (; j < len && ptr[j] != '\0'; ++j) {}
-            fun(i, ptr, j);
-        }
-    }
-}
-
-}
-
-}
-
-#endif
-
diff --git a/inst/include/ritsuko/hdf5/load_attribute.hpp b/inst/include/ritsuko/hdf5/load_attribute.hpp
new file mode 100644
index 0000000..e2f4a5e
--- /dev/null
+++ b/inst/include/ritsuko/hdf5/load_attribute.hpp
@@ -0,0 +1,144 @@
+#ifndef RITSUKO_HDF5_LOAD_ATTRIBUTE_HPP
+#define RITSUKO_HDF5_LOAD_ATTRIBUTE_HPP
+
+#include "H5Cpp.h"
+
+#include <vector>
+#include <string>
+
+#include "get_1d_length.hpp"
+#include "as_numeric_datatype.hpp"
+#include "_strings.hpp"
+
+/**
+ * @file load_scalar_string_attribute.hpp
+ * @brief Load a scalar string HDF5 attribute.
+ */
+
+namespace ritsuko {
+
+namespace hdf5 {
+
+/**
+ * @param attr Handle to a scalar string attribute.
+ * Callers are responsible for checking that `attr` contains a string datatype class.
+ * @return The attribute as a string.
+ */
+inline std::string load_scalar_string_attribute(const H5::Attribute& attr) {
+    auto dtype = attr.getDataType();
+
+    // Unfortunately, we can't just do 'std::string output; attr.read(dtype, output);', 
+    // as we need to catch NULL pointers in the variable case.
+
+    if (dtype.isVariableStr()) {
+        auto mspace = attr.getSpace();
+        char* buffer;
+        attr.read(dtype, &buffer);
+        [[maybe_unused]] VariableStringCleaner deletor(dtype.getId(), mspace.getId(), &buffer);
+        if (buffer == NULL) {
+            throw std::runtime_error("detected a NULL pointer for a variable length string attribute");
+        }
+        return std::string(buffer);
+
+    } else {
+        size_t len = dtype.getSize();
+        std::vector<char> buffer(len);
+        attr.read(dtype, buffer.data());
+        auto ptr = buffer.data();
+        return std::string(ptr, ptr + find_string_length(ptr, len));
+    }
+}
+
+/**
+ * @tparam check_ Whether to check that `attr` is a 1-dimensional string attribute.
+ * @param attr Handle to a 1-dimensional string attribute.
+ * Callers are responsible for checking that `attr` contains a string datatype class.
+ * @param full_length Length of the attribute in `attr`, usually obtained by `get_1d_length()`.
+ * @return Vector of strings.
+ */
+inline std::vector<std::string> load_1d_string_attribute(const H5::Attribute& attr, hsize_t full_length) {
+    auto dtype = attr.getDataType();
+    auto mspace = attr.getSpace();
+    std::vector<std::string> output;
+    output.reserve(full_length);
+
+    if (dtype.isVariableStr()) {
+        std::vector<char*> buffer(full_length);
+        attr.read(dtype, buffer.data());
+        [[maybe_unused]] VariableStringCleaner deletor(dtype.getId(), mspace.getId(), buffer.data());
+        for (hsize_t i = 0; i < full_length; ++i) {
+            if (buffer[i] == NULL) {
+                throw std::runtime_error("detected a NULL pointer for a variable length string attribute");
+            }
+            output.emplace_back(buffer[i]);
+        }
+
+    } else {
+        size_t len = dtype.getSize();
+        std::vector<char> buffer(len * full_length);
+        attr.read(dtype, buffer.data());
+        auto ptr = buffer.data();
+        for (size_t i = 0; i < full_length; ++i, ptr += len) {
+            output.emplace_back(ptr, ptr + find_string_length(ptr, len));
+        }
+    }
+
+    return output;
+}
+
+/**
+ * Overload of `load_1d_string_attribute()` that determines the length of the attribute via `get_1d_length()`.
+ * @param attr Handle to a 1-dimensional string attribute.
+ * Callers are responsible for checking that `attr` contains a string datatype class.
+ * @return Vector of strings.
+ */
+inline std::vector<std::string> load_1d_string_attribute(const H5::Attribute& attr) {
+    return load_1d_string_attribute(attr, get_1d_length(attr.getSpace(), false));
+}
+
+/**
+ * @tparam Type_ Type for holding the data in memory, see `as_numeric_datatype()` for supported types.
+ * @param attr Handle to a scalar numeric attribute.
+ * Callers are responsible for checking that the datatype of `attr` is appropriate for `Type_`, e.g., with `exceeds_integer_limit()`.
+ * @return The value of the attribute.
+ */
+template<typename Type_>
+Type_ load_scalar_numeric_attribute(const H5::Attribute& attr) {
+    Type_ val;
+    auto mtype = as_numeric_datatype<Type_>();
+    attr.read(mtype, &val);
+    return val;
+}
+
+/**
+ * @tparam Type_ Type for holding the data in memory, see `as_numeric_datatype()` for supported types.
+ * @param attr Handle to a numeric attribute.
+ * Callers are responsible for checking that the datatype of `attr` is appropriate for `Type_`, e.g., with `exceeds_integer_limit()`.
+ * @param full_length Length of the attribute in `attr`, usually obtained by `get_1d_length()`.
+ * @return Vector containing the contents of the attribute.
+ */
+template<typename Type_>
+std::vector<Type_> load_1d_numeric_attribute(const H5::Attribute& attr, hsize_t full_length) {
+    auto mtype = as_numeric_datatype<Type_>();
+    std::vector<Type_> buffer(full_length);
+    attr.read(mtype, buffer.data());
+    return buffer;
+}
+
+/**
+ * Overload of `load_1d_numeric_attribute()` that determines the length of the attribute via `get_1d_length()`.
+ * @tparam Type_ Type for holding the data in memory, see `as_numeric_datatype()` for supported types.
+ * @param attr Handle to a numeric attribute.
+ * Callers are responsible for checking that the datatype of `attr` is appropriate for `Type_`, e.g., with `exceeds_integer_limit()`.
+ * @return Vector containing the contents of the attribute.
+ */
+template<typename Type_>
+std::vector<Type_> load_1d_numeric_attribute(const H5::Attribute& attr) {
+    return load_1d_numeric_attribute<Type_>(attr, get_1d_length(attr.getSpace(), false));
+}
+
+}
+
+}
+
+#endif
diff --git a/inst/include/ritsuko/hdf5/load_dataset.hpp b/inst/include/ritsuko/hdf5/load_dataset.hpp
new file mode 100644
index 0000000..3c0fe14
--- /dev/null
+++ b/inst/include/ritsuko/hdf5/load_dataset.hpp
@@ -0,0 +1,79 @@
+#ifndef RITSUKO_HDF5_LOAD_DATASET_HPP
+#define RITSUKO_HDF5_LOAD_DATASET_HPP
+
+#include <string>
+#include <vector>
+#include <stdexcept>
+
+#include "H5Cpp.h"
+
+#include "get_name.hpp"
+#include "Stream1dStringDataset.hpp"
+#include "_strings.hpp"
+
+/**
+ * @file load_dataset.hpp
+ * @brief Helper functions to load datasets.
+ */
+
+namespace ritsuko {
+
+namespace hdf5 {
+
+/**
+ * Load a scalar string dataset into a single string.
+ * @param handle Handle to the HDF5 scalar dataset.
+ * @return String containing the contents of the sole dataset entry.
+ */
+inline std::string load_scalar_string_dataset(const H5::DataSet& handle) {
+    auto dtype = handle.getDataType();
+    if (dtype.isVariableStr()) {
+        char* vptr;
+        handle.read(&vptr, dtype);
+        auto dspace = handle.getSpace(); // don't set as temporary in constructor below, otherwise it gets destroyed and the ID invalidated.
+        [[maybe_unused]] VariableStringCleaner deletor(dtype.getId(), dspace.getId(), &vptr);
+        if (vptr == NULL) {
+            throw std::runtime_error("detected a NULL pointer for a variable length string in '" + get_name(handle) + "'");
+        }
+        std::string output(vptr);
+        return output;
+    } else {
+        size_t fixed_length = dtype.getSize();
+        std::vector<char> buffer(fixed_length);
+        handle.read(buffer.data(), dtype);
+        return std::string(buffer.begin(), buffer.begin() + find_string_length(buffer.data(), fixed_length));
+    }
+}
+
+/**
+ * Load a 1-dimensional string dataset into a vector of strings.
+ * @param handle Handle to the HDF5 scalar dataset.
+ * @param full_length Length of the dataset as a 1-dimensional vector.
+ * @param buffer_size Size of the buffer for holding loaded strings.
+ * @return Vector of strings.
+ */
+inline std::vector<std::string> load_1d_string_dataset(const H5::DataSet& handle, hsize_t full_length, hsize_t buffer_size) {
+    Stream1dStringDataset stream(&handle, full_length, buffer_size);
+    std::vector<std::string> output;
+    output.reserve(full_length);
+    for (hsize_t i = 0; i < full_length; ++i, stream.next()) {
+        output.emplace_back(stream.steal());
+    }
+    return output;
+}
+
+/**
+ * Overload of `load_1d_string_dataset()` that determines the length via `get_1d_length()`.
+ * @param handle Handle to the HDF5 scalar dataset.
+ * @param buffer_size Size of the buffer for holding loaded strings.
+ * @return Vector of strings.
+ */
+inline std::vector<std::string> load_1d_string_dataset(const H5::DataSet& handle, hsize_t buffer_size) {
+    return load_1d_string_dataset(handle, get_1d_length(handle, false), buffer_size);
+}
+
+}
+
+}
+
+#endif
diff --git a/inst/include/ritsuko/hdf5/load_scalar_string_attribute.hpp b/inst/include/ritsuko/hdf5/load_scalar_string_attribute.hpp
deleted file mode 100644
index 12d4cc8..0000000
--- a/inst/include/ritsuko/hdf5/load_scalar_string_attribute.hpp
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef RITSUKO_HDF5_LOAD_SCALAR_STRING_ATTRIBUTE_HPP
-#define RITSUKO_HDF5_LOAD_SCALAR_STRING_ATTRIBUTE_HPP
-
-#include "H5Cpp.h"
-#include <string>
-
-#include "get_name.hpp"
-
-/**
- * @file load_scalar_string_attribute.hpp
- * @brief Load a scalar string HDF5 attribute.
- */
-
-namespace ritsuko {
-
-namespace hdf5 {
-
-/**
- * @param attr An ``Attribute`` handle.
- * @return The attribute as a string.
- */
-inline std::string load_scalar_string_attribute(const H5::Attribute& attr) {
-    if (attr.getTypeClass() != H5T_STRING || attr.getSpace().getSimpleExtentNdims() != 0) {
-        std::string name;
-        attr.getName(name);
-        throw std::runtime_error("expected attribute '" + name + "' to be a scalar string");
-    }
-    std::string output;
-    attr.read(attr.getStrType(), output);
-    return output;
-}
-
-/**
- * @tparam Object_ HDF5 object class, usually a ``DataSet`` or a ``Group``.
- *
- * @param handle Handle to a HDF5 object that can contain attributes.
- * @param field Name of the attribute.
- *
- * @return The attribute as a string.
- */
-template<class Object_>
-std::string load_scalar_string_attribute(const Object_& handle, const char* field) {
-    if (!handle.attrExists(field)) {
-        throw std::runtime_error("expected a '" + std::string(field) + "' attribute to be present");
-    }
-    return load_scalar_string_attribute(handle.openAttribute(field));
-}
-
-}
-
-}
-
-#endif
diff --git a/inst/include/ritsuko/hdf5/miscellaneous.hpp b/inst/include/ritsuko/hdf5/miscellaneous.hpp
new file mode 100644
index 0000000..91b8d90
--- /dev/null
+++ b/inst/include/ritsuko/hdf5/miscellaneous.hpp
@@ -0,0 +1,56 @@
+#ifndef RITSUKO_MISCELLANEOUS_HPP
+#define RITSUKO_MISCELLANEOUS_HPP
+
+#include <string>
+#include "H5Cpp.h"
+
+#include "open.hpp"
+#include "load_attribute.hpp"
+
+/**
+ * @file miscellaneous.hpp
+ * @brief Miscellaneous functions for user convenience.
+ */
+
+namespace ritsuko {
+
+namespace hdf5 {
+
+/**
+ * @tparam Object_ Type of the HDF5 handle, usually a `DataSet` or `Group`.
+ * @param handle HDF5 dataset or group handle.
+ * @param name Name of the attribute.
+ *
+ * @return Attribute handle.
+ * An error is raised if `name` does not refer to a scalar attribute.
+ */
+template<class H5Object_>
+inline H5::Attribute open_scalar_attribute(const H5Object_& handle, const char* name) {
+    auto attr = open_attribute(handle, name);
+    if (!is_scalar(attr)) {
+        throw std::runtime_error("expected '" + std::string(name) + "' attribute to be a scalar");
+    }
+    return attr;
+}
+
+/**
+ * @tparam Object_ Type of the HDF5 handle, usually a `DataSet` or `Group`.
+ * @param handle HDF5 dataset or group handle.
+ * @param name Name of the attribute.
+ *
+ * @return A string containing the attribute value.
+ */
+template<class H5Object_>
+std::string open_and_load_scalar_string_attribute(const H5Object_& handle, const char* name) {
+    auto attr = open_scalar_attribute(handle, name);
+    if (attr.getTypeClass() != H5T_STRING) {
+        throw std::runtime_error("expected '" + std::string(name) + "' attribute to be a string");
+    }
+    return load_scalar_string_attribute(attr);
+}
+
+}
+
+}
+
+#endif
diff --git a/inst/include/ritsuko/hdf5/missing_placeholder.hpp b/inst/include/ritsuko/hdf5/missing_placeholder.hpp
new file mode 100644
index 0000000..824ed03
--- /dev/null
+++ b/inst/include/ritsuko/hdf5/missing_placeholder.hpp
@@ -0,0 +1,98 @@
+#ifndef RITSUKO_HDF5_MISSING_PLACEHOLDER_HPP
+#define RITSUKO_HDF5_MISSING_PLACEHOLDER_HPP
+
+#include "H5Cpp.h"
+#include <string>
+
+#include "as_numeric_datatype.hpp"
+#include "load_attribute.hpp"
+#include "get_1d_length.hpp"
+#include "get_name.hpp"
+
+/**
+ * @file missing_placeholder.hpp
+ * @brief Get the missing placeholder attribute.
+ */
+
+namespace ritsuko {
+
+namespace hdf5 {
+
+/**
+ * Check the validity of a missing placeholder attribute on a dataset.
+ * An error is raised if the attribute is not a scalar or has a different type (or type class, if `type_class_only_ = true`) to the dataset.
+ *
+ * @param dset Dataset handle.
+ * @param attr Handle for the missing placeholder, typically as an attribute on `dset`.
+ * @param type_class_only Whether to only require identical type classes for the placeholder.
+ * If 0, this is false, and the types between `dset` and `attr` must be identical.
+ * If 1, this is true, and `dset` and `attr` just need to have the same type class.
+ * If -1 (default), this is true for all string types and false for all numeric types.
+ */
+inline void check_missing_placeholder_attribute(const H5::DataSet& dset, const H5::Attribute& attr, int type_class_only = -1) {
+    if (!is_scalar(attr)) {
+        throw std::runtime_error("expected the '" + get_name(attr) + "' attribute to be a scalar");
+    }
+
+    if (type_class_only == -1) {
+        type_class_only = (dset.getTypeClass() == H5T_STRING);
+    }
+
+    if (type_class_only == 1) {
+        if (attr.getTypeClass() != dset.getTypeClass()) {
+            throw std::runtime_error("expected the '" + get_name(attr) + "' attribute to have the same type class as its dataset");
+        }
+    } else {
+        if (attr.getDataType() != dset.getDataType()) {
+            throw std::runtime_error("expected the '" + get_name(attr) + "' attribute to have the same type as its dataset");
+        }
+    }
+}
+
+/**
+ * Check if a missing numeric placeholder attribute is present, and if so, open it and loads it value.
+ * This will also call `check_missing_placeholder_attribute()` to validate the placeholder's properties.
+ *
+ * @tparam Type_ Type to use to store the data in memory, see `as_numeric_datatype()` for supported types.
+ * @param handle Dataset handle.
+ * @param attr_name Name of the attribute containing the missing value placeholder.
+ * @return Pair containing (i) a boolean indicating whether the placeholder attribute was present, and (ii) the value of the placeholder if the first element is `true`.
+ */
+template<typename Type_>
+std::pair<bool, Type_> open_and_load_optional_numeric_missing_placeholder(const H5::DataSet& handle, const char* attr_name) {
+    std::pair<bool, Type_> output(false, 0);
+    if (!handle.attrExists(attr_name)) {
+        return output;
+    }
+    output.first = true;
+    auto ahandle = handle.openAttribute(attr_name);
+    check_missing_placeholder_attribute(handle, ahandle);
+    ahandle.read(as_numeric_datatype<Type_>(), &(output.second));
+    return output;
+}
+
+/**
+ * Check if a missing string placeholder attribute is present, and if so, open it and loads it value.
+ * This will also call `check_missing_placeholder_attribute()` to validate the placeholder's properties.
+ *
+ * @param handle Dataset handle.
+ * @param attr_name Name of the attribute containing the missing value placeholder.
+ * @return Pair containing (i) a boolean indicating whether the placeholder attribute was present, and (ii) the value of the placeholder if the first element is `true`.
+ */
+inline std::pair<bool, std::string> open_and_load_optional_string_missing_placeholder(const H5::DataSet& handle, const char* attr_name) {
+    std::pair<bool, std::string> output(false, "");
+    if (!handle.attrExists(attr_name)) {
+        return output;
+    }
+    output.first = true;
+    auto ahandle = handle.openAttribute(attr_name);
+    check_missing_placeholder_attribute(handle, ahandle);
+    output.second = load_scalar_string_attribute(ahandle);
+    return output;
+}
+
+}
+
+}
+
+#endif
diff --git a/inst/include/ritsuko/hdf5/open.hpp b/inst/include/ritsuko/hdf5/open.hpp
new file mode 100644
index 0000000..58c6c7b
--- /dev/null
+++ b/inst/include/ritsuko/hdf5/open.hpp
@@ -0,0 +1,79 @@
+#ifndef RITSUKO_HDF5_OPEN_HPP
+#define RITSUKO_HDF5_OPEN_HPP
+
+#include "H5Cpp.h"
+
+#include <filesystem>
+#include <stdexcept>
+#include <string>
+
+/**
+ * @file open.hpp
+ * @brief Convenience functions to safely open HDF5 handles.
+ */
+
+namespace ritsuko {
+
+namespace hdf5 {
+
+/**
+ * @param path Path to a HDF5 file.
+ * @return Handle to the file.
+ * An error is raised if `path` does not exist.
+ */
+inline H5::H5File open_file(const std::filesystem::path& path) try {
+    if (!std::filesystem::exists(path)) {
+        throw std::runtime_error("no file is present at '" + path.string() + "'");
+    }
+    return H5::H5File(path, H5F_ACC_RDONLY);
+} catch (H5::Exception& e) {
+    throw std::runtime_error("failed to open the HDF5 file at '" + path.string() + "'; " + e.getDetailMsg());
+}
+
+/**
+ * @param handle Parent group (or file).
+ * @param name Name of the group.
+ * @return Handle to the group.
+ * An error is raised if `name` does not refer to a dataset. 
+ */
+inline H5::Group open_group(const H5::Group& handle, const char* name) {
+    if (!handle.exists(name) || handle.childObjType(name) != H5O_TYPE_GROUP) {
+        throw std::runtime_error("expected a group at '" + std::string(name) + "'");
+    }
+    return handle.openGroup(name);
+}
+
+/**
+ * @param handle Group containing the dataset.
+ * @param name Name of the dataset inside the group.
+ * @return Handle to the dataset.
+ * An error is raised if `name` does not refer to a dataset. 
+ */
+inline H5::DataSet open_dataset(const H5::Group& handle, const char* name) {
+    if (!handle.exists(name) || handle.childObjType(name) != H5O_TYPE_DATASET) {
+        throw std::runtime_error("expected a dataset at '" + std::string(name) + "'");
+    }
+    return handle.openDataSet(name);
+}
+
+/**
+ * @tparam Object_ Type of the HDF5 handle, usually a `DataSet` or `Group`.
+ * @param handle HDF5 dataset or group handle.
+ * @param name Name of the attribute.
+ *
+ * @return Attribute handle.
+ * An error is raised if `name` does not refer to an attribute.
+ */
+template<class Object_>
+H5::Attribute open_attribute(const Object_& handle, const char* name) {
+    if (!handle.attrExists(name)) {
+        throw std::runtime_error("expected an attribute at '" + std::string(name) + "'");
+    }
+    return handle.openAttribute(name);
+}
+
+}
+
+}
+
+#endif
diff --git a/inst/include/ritsuko/hdf5/validate_string.hpp b/inst/include/ritsuko/hdf5/validate_string.hpp
new file mode 100644
index 0000000..7cf3573
--- /dev/null
+++ b/inst/include/ritsuko/hdf5/validate_string.hpp
@@ -0,0 +1,148 @@
+#ifndef RITSUKO_HDF5_VALIDATE_STRING_HPP
+#define RITSUKO_HDF5_VALIDATE_STRING_HPP
+
+#include <string>
+#include <vector>
+#include <stdexcept>
+
+#include "H5Cpp.h"
+
+#include "get_name.hpp"
+#include "pick_1d_block_size.hpp"
+#include "_strings.hpp"
+
+/**
+ * @file validate_string.hpp
+ * @brief Helper functions to validate strings.
+ */
+
+namespace ritsuko {
+
+namespace hdf5 {
+
+/**
+ * Check that a scalar string dataset is valid.
+ * Currently, this involves checking that there are no `NULL` entries for variable-length string datatypes.
+ * For fixed-width string datasets, this function is a no-op.
+ *
+ * @param handle Handle to the HDF5 string dataset.
+ */
+inline void validate_scalar_string_dataset(const H5::DataSet& handle) {
+    auto dtype = handle.getDataType();
+    if (!dtype.isVariableStr()) {
+        return;
+    }
+
+    char* vptr;
+    handle.read(&vptr, dtype);
+    auto dspace = handle.getSpace(); // don't set as temporary in constructor below, otherwise it gets destroyed and the ID invalidated.
+    [[maybe_unused]] VariableStringCleaner deletor(dtype.getId(), dspace.getId(), &vptr);
+    if (vptr == NULL) {
+        throw std::runtime_error("detected a NULL pointer for a variable length string in '" + get_name(handle) + "'");
+    }
+}
+
+/**
+ * Check that a 1-dimensional string dataset is valid.
+ * Currently, this involves checking that there are no `NULL` entries for variable-length string datatypes.
+ * For fixed-width string datasets, this function is a no-op.
+ *
+ * @param handle Handle to the HDF5 string dataset.
+ * @param full_length Length of the dataset as a 1-dimensional vector.
+ * @param buffer_size Size of the buffer for holding loaded strings.
+ */
+inline void validate_1d_string_dataset(const H5::DataSet& handle, hsize_t full_length, hsize_t buffer_size) {
+    auto dtype = handle.getDataType();
+    if (!dtype.isVariableStr()) {
+        return;
+    }
+
+    hsize_t block_size = pick_1d_block_size(handle.getCreatePlist(), full_length, buffer_size);
+    H5::DataSpace mspace(1, &block_size), dspace(1, &full_length);
+    std::vector<char*> buffer(block_size);
+
+    for (hsize_t i = 0; i < full_length; i += block_size) {
+        auto available = std::min(full_length - i, block_size);
+        constexpr hsize_t zero = 0;
+        mspace.selectHyperslab(H5S_SELECT_SET, &available, &zero);
+        dspace.selectHyperslab(H5S_SELECT_SET, &available, &i);
+
+        handle.read(buffer.data(), dtype, mspace, dspace);
+        [[maybe_unused]] VariableStringCleaner deletor(dtype.getId(), mspace.getId(), buffer.data());
+        for (hsize_t j = 0; j < available; ++j) {
+            if (buffer[j] == NULL) {
+                throw std::runtime_error("detected a NULL pointer for a variable length string in '" + get_name(handle) + "'");
+            }
+        }
+    }
+}
+
+/**
+ * Overload for `validate_1d_string_dataset()` that automatically determines its length via `get_1d_length()`.
+ * @param handle Handle to the HDF5 string dataset.
+ * @param buffer_size Size of the buffer for holding loaded strings.
+ */
+inline void validate_1d_string_dataset(const H5::DataSet& handle, hsize_t buffer_size) {
+    validate_1d_string_dataset(handle, get_1d_length(handle, false), buffer_size);
+}
+
+/**
+ * Check that a scalar string attribute is valid.
+ * Currently, this involves checking that there are no `NULL` entries for variable-length string datatypes.
+ * For fixed-width string attributes, this function is a no-op.
+ *
+ * @param handle Handle to the HDF5 string attribute.
+ */
+inline void validate_scalar_string_attribute(const H5::Attribute& attr) {
+    auto dtype = attr.getDataType();
+    if (!dtype.isVariableStr()) {
+        return;
+    }
+
+    auto mspace = attr.getSpace();
+    char* buffer;
+    attr.read(dtype, &buffer);
+    [[maybe_unused]] VariableStringCleaner deletor(dtype.getId(), mspace.getId(), &buffer);
+    if (buffer == NULL) {
+        throw std::runtime_error("detected a NULL pointer for a variable length string attribute");
+    }
+}
+
+/**
+ * Check that a 1-dimensional string attribute is valid.
+ * Currently, this involves checking that there are no `NULL` entries for variable-length string datatypes.
+ * For fixed-width string attributes, this function is a no-op.
+ *
+ * @param handle Handle to the HDF5 string attribute.
+ * @param full_length Length of the attribute as a 1-dimensional vector.
+ */
+inline void validate_1d_string_attribute(const H5::Attribute& attr, hsize_t full_length) {
+    auto dtype = attr.getDataType();
+    if (!dtype.isVariableStr()) {
+        return;
+    }
+
+    auto mspace = attr.getSpace();
+    std::vector<char*> buffer(full_length);
+    attr.read(dtype, buffer.data());
+    [[maybe_unused]] VariableStringCleaner deletor(dtype.getId(), mspace.getId(), buffer.data());
+    for (hsize_t i = 0; i < full_length; ++i) {
+        if (buffer[i] == NULL) {
+            throw std::runtime_error("detected a NULL pointer for a variable length string attribute");
+        }
+    }
+}
+
+/**
+ * Overload for `validate_1d_string_attribute()` that automatically determines its length via `get_1d_length()`.
+ * @param handle Handle to the HDF5 string attribute.
+ */
+inline void validate_1d_string_attribute(const H5::Attribute& attr) {
+    validate_1d_string_attribute(attr, get_1d_length(attr, false));
+}
+
+}
+
+}
+
+#endif
diff --git a/inst/include/takane/_height.hpp b/inst/include/takane/_height.hpp
index 218baec..7223214 100644
--- a/inst/include/takane/_height.hpp
+++ b/inst/include/takane/_height.hpp
@@ -12,6 +12,10 @@
 #include "simple_list.hpp"
 #include "data_frame.hpp"
 #include "data_frame_factor.hpp"
+#include "genomic_ranges.hpp"
+#include "atomic_vector_list.hpp"
+#include "data_frame_list.hpp"
+#include "genomic_ranges_list.hpp"
 
 /**
  * @file _height.hpp
@@ -32,6 +36,10 @@ inline auto default_registry() {
     registry["simple_list"] = [](const std::filesystem::path& p, const Options& o) -> size_t { return simple_list::height(p, o); };
     registry["data_frame"] = [](const std::filesystem::path& p, const Options& o) -> size_t { return data_frame::height(p, o); };
     registry["data_frame_factor"] = [](const std::filesystem::path& p, const Options& o) -> size_t { return data_frame_factor::height(p, o); };
+    registry["genomic_ranges"] = [](const std::filesystem::path& p, const Options& o) -> size_t { return genomic_ranges::height(p, o); };
+    registry["atomic_vector_list"] = [](const std::filesystem::path& p, const Options& o) -> size_t { return atomic_vector_list::height(p, o); };
+    registry["data_frame_list"] = [](const std::filesystem::path& p, const Options& o) -> size_t { return data_frame_list::height(p, o); };
+    registry["genomic_ranges_list"] = [](const std::filesystem::path& p, const Options& o) -> size_t { return genomic_ranges_list::height(p, o); };
     return registry;
 } 
 
diff --git a/inst/include/takane/_satisfies_interface.hpp b/inst/include/takane/_satisfies_interface.hpp
new file mode 100644
index 0000000..5fb49f4
--- /dev/null
+++ b/inst/include/takane/_satisfies_interface.hpp
@@ -0,0 +1,53 @@
+#ifndef TAKANE_SATISFIES_INTERFACE_HPP
+#define TAKANE_SATISFIES_INTERFACE_HPP
+
+#include <unordered_set>
+#include <unordered_map>
+#include <string>
+
+namespace takane {
+
+/**
+ * @cond
+ */
+namespace internal_satisfies_interface {
+
+inline auto default_registry() {
+    std::unordered_map<std::string, std::unordered_set<std::string> > registry;
+    registry["SIMPLE_LIST"] = { "simple_list" };
+    registry["DATA_FRAME"] = { "data_frame" };
+    return registry;
+}
+
+}
+/**
+ * @endcond
+ */
+
+/**
+ * Registry of object types that satisfy a particular object interface.
+ * Each key is the interface and each value is the set of all types that satisfy it.
+ * Applications can extend the **takane** framework by adding custom types to each set.
+ */
+inline std::unordered_map<std::string, std::unordered_set<std::string> > satisfies_interface_registry = internal_satisfies_interface::default_registry();
+
+/**
+ * Check whether a particular object type satisfies a particular object interface.
+ * This can be used by specifications to check that child components satisfy certain expectations.
+ *
+ * @param type Object type.
+ * @param interface Interface type.
+ * @returns Whether `type` satisfies `interface`.
+ */
+inline bool satisfies_interface(const std::string& type, const std::string& interface) {
+    auto it = satisfies_interface_registry.find(interface);
+    if (it == satisfies_interface_registry.end()) {
+        return false;
+    }
+    const auto& listing = it->second;
+    return listing.find(type) != listing.end();
+}
+
+}
+
+#endif
diff --git a/inst/include/takane/_validate.hpp b/inst/include/takane/_validate.hpp
index c960282..b3ae6a9 100644
--- a/inst/include/takane/_validate.hpp
+++ b/inst/include/takane/_validate.hpp
@@ -12,6 +12,11 @@
 #include "simple_list.hpp"
 #include "data_frame.hpp"
 #include "data_frame_factor.hpp"
+#include "sequence_information.hpp"
+#include "genomic_ranges.hpp"
+#include "atomic_vector_list.hpp"
+#include "data_frame_list.hpp"
+#include "genomic_ranges_list.hpp"
 
 /**
  * @file _validate.hpp
@@ -32,6 +37,11 @@ inline auto default_registry() {
     registry["simple_list"] = [](const std::filesystem::path& p, const Options& o) { simple_list::validate(p, o); };
     registry["data_frame"] = [](const std::filesystem::path& p, const Options& o) { data_frame::validate(p, o); };
     registry["data_frame_factor"] = [](const std::filesystem::path& p, const Options& o) { data_frame_factor::validate(p, o); };
+    registry["sequence_information"] = [](const std::filesystem::path& p, const Options& o) { sequence_information::validate(p, o); };
+    registry["genomic_ranges"] = [](const std::filesystem::path& p, const Options& o) { genomic_ranges::validate(p, o); };
+    registry["atomic_vector_list"] = [](const std::filesystem::path& p, const Options& o) { atomic_vector_list::validate(p, o); };
+    registry["data_frame_list"] = [](const std::filesystem::path& p, const Options& o) { data_frame_list::validate(p, o); };
+    registry["genomic_ranges_list"] = [](const std::filesystem::path& p, const Options& o) { genomic_ranges_list::validate(p, o); };
     return registry;
 } 
 
diff --git a/inst/include/takane/atomic_vector.hpp b/inst/include/takane/atomic_vector.hpp
index 5278317..44cfd9d 100644
--- a/inst/include/takane/atomic_vector.hpp
+++ b/inst/include/takane/atomic_vector.hpp
@@ -8,7 +8,7 @@
 #include "ritsuko/hdf5/hdf5.hpp"
 
 #include "utils_public.hpp"
-#include "utils_hdf5.hpp"
+#include "utils_string.hpp"
 
 /**
  * @file atomic_vector.hpp
@@ -28,42 +28,28 @@ namespace atomic_vector {
  * @param options Validation options, typically for reading performance.
  */
 inline void validate(const std::filesystem::path& path, const Options& options) try {
-    H5::H5File handle((path / "contents.h5").string(), H5F_ACC_RDONLY);
-
-    const char* parent = "atomic_vector";
-    if (!handle.exists(parent) || handle.childObjType(parent) != H5O_TYPE_GROUP) {
-        throw std::runtime_error("expected an 'atomic_vector' group");
-    }
-    auto ghandle = handle.openGroup(parent);
+    auto handle = ritsuko::hdf5::open_file(path / "contents.h5");
+    auto ghandle = ritsuko::hdf5::open_group(handle, "atomic_vector");
 
-    auto vstring = ritsuko::hdf5::load_scalar_string_attribute(ghandle, "version");
+    auto vstring = ritsuko::hdf5::open_and_load_scalar_string_attribute(ghandle, "version");
     auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
     if (version.major != 1) {
         throw std::runtime_error("unsupported version string '" + vstring + "'");
     }
 
-    auto dhandle = ritsuko::hdf5::get_dataset(ghandle, "values");
+    auto dhandle = ritsuko::hdf5::open_dataset(ghandle, "values");
     auto vlen = ritsuko::hdf5::get_1d_length(dhandle.getSpace(), false);
-    auto type = ritsuko::hdf5::load_scalar_string_attribute(ghandle, "type");
+    auto type = ritsuko::hdf5::open_and_load_scalar_string_attribute(ghandle, "type");
 
     const char* missing_attr_name = "missing-value-placeholder";
-    bool has_missing = dhandle.attrExists(missing_attr_name);
 
     if (type == "string") {
         if (dhandle.getTypeClass() != H5T_STRING) {
             throw std::runtime_error("expected a string datatype for 'values'");
         }
-
-        std::string missing_value;
-        if (has_missing) {
-            auto missing_attr = ritsuko::hdf5::get_missing_placeholder_attribute(dhandle, missing_attr_name, /* type_class_only = */ true);
-            missing_value = ritsuko::hdf5::load_scalar_string_attribute(missing_attr);
-        }
-
-        if (ghandle.attrExists("format")) {
-            auto format = ritsuko::hdf5::load_scalar_string_attribute(ghandle, "format");
-            internal_hdf5::validate_string_format(dhandle, vlen, format, has_missing, missing_value, options.hdf5_buffer_size);
-        }
+        auto missingness = ritsuko::hdf5::open_and_load_optional_string_missing_placeholder(dhandle, missing_attr_name);
+        std::string format = internal_string::fetch_format_attribute(ghandle);
+        internal_string::validate_string_format(dhandle, vlen, format, missingness.first, missingness.second, options.hdf5_buffer_size);
 
     } else {
         if (type == "integer") {
@@ -82,21 +68,13 @@ inline void validate(const std::filesystem::path& path, const Options& options)
             throw std::runtime_error("unsupported type '" + type + "'");
         }
 
-        if (has_missing) {
-            ritsuko::hdf5::get_missing_placeholder_attribute(dhandle, missing_attr_name);
+        if (dhandle.attrExists(missing_attr_name)) {
+            auto missing_attr = dhandle.openAttribute(missing_attr_name);
+            ritsuko::hdf5::check_missing_placeholder_attribute(dhandle, missing_attr);
         }
     }
 
-    if (ghandle.exists("names")) {
-        auto nhandle = ritsuko::hdf5::get_dataset(ghandle, "names");
-        if (nhandle.getTypeClass() != H5T_STRING) {
-            throw std::runtime_error("'names' should be a string datatype class");
-        }
-        auto nlen = ritsuko::hdf5::get_1d_length(nhandle.getSpace(), false);
-        if (vlen != nlen) {
-            throw std::runtime_error("'names' and 'values' should have the same length");
-        }
-    }
+    internal_string::validate_names(ghandle, "names", vlen, options.hdf5_buffer_size);
 
 } catch (std::exception& e) {
     throw std::runtime_error("failed to validate an 'atomic_vector' at '" + path.string() + "'; " + std::string(e.what()));
diff --git a/inst/include/takane/atomic_vector_list.hpp b/inst/include/takane/atomic_vector_list.hpp
new file mode 100644
index 0000000..22a577a
--- /dev/null
+++ b/inst/include/takane/atomic_vector_list.hpp
@@ -0,0 +1,45 @@
+#ifndef TAKANE_ATOMIC_VECTOR_LIST_HPP
+#define TAKANE_ATOMIC_VECTOR_LIST_HPP
+
+#include "H5Cpp.h"
+
+#include <filesystem>
+#include <stdexcept>
+#include <string>
+
+#include "utils_public.hpp"
+#include "utils_compressed_list.hpp"
+
+/**
+ * @file atomic_vector_list.hpp
+ * @brief Validation for atomic vector lists.
+ */
+
+namespace takane {
+
+namespace atomic_vector_list {
+
+/**
+ * @param path Path to the directory containing the atomic vector list.
+ * @param options Validation options, typically for reading performance.
+ */
+inline void validate(const std::filesystem::path& path, const Options& options) try {
+    internal_compressed_list::validate_directory<false>(path, "atomic_vector_list", "atomic_vector", options);
+} catch (std::exception& e) {
+    throw std::runtime_error("failed to validate an 'atomic_vector_list' object at '" + path.string() + "'; " + std::string(e.what()));
+}
+
+/**
+ * @param path Path to a directory containing an atomic vector list.
+ * @param options Validation options, mostly for input performance.
+ * @return The length of the list.
+ */
+inline size_t height(const std::filesystem::path& path, const Options& options) {
+    return internal_compressed_list::height(path, "atomic_vector_list", options);
+}
+
+}
+
+}
+
+#endif
diff --git a/inst/include/takane/compressed_list.hpp b/inst/include/takane/compressed_list.hpp
deleted file mode 100644
index 192653a..0000000
--- a/inst/include/takane/compressed_list.hpp
+++ /dev/null
@@ -1,143 +0,0 @@
-#ifndef TAKANE_COMPRESSED_LIST_HPP
-#define TAKANE_COMPRESSED_LIST_HPP
-
-#include "comservatory/comservatory.hpp"
-
-#include "utils_csv.hpp"
-
-#include <stdexcept>
-
-/**
- * @file compressed_list.hpp
- * @brief Validation for compressed lists.
- */
-
-namespace takane {
-
-/**
- * @namespace takane::compressed_list
- * @brief Definitions for compressed lists.
- */
-namespace compressed_list {
-
-/**
- * @brief Parameters for validating the compressed list file.
- */
-struct Parameters {
-    /**
-     * Length of the compressed list.
-     */
-    size_t length = 0;
- 
-    /**
-     * Total length of the concatenated elements.
-     */
-    size_t concatenated = 0;
-
-    /**
-     * Whether the compressed list is named.
-     */
-    bool has_names = false;
-
-    /**
-     * Whether to load and parse the file in parallel, see `comservatory::ReadOptions` for details.
-     */
-    bool parallel = false;
-
-    /**
-     * Version of the `compressed_list` format.
-     */
-    int version = 1;
-};
-
-/**
- * @cond
- */
-template<class ParseCommand>
-CsvContents validate_base(ParseCommand parse, const Parameters& params, CsvFieldCreator* creator = NULL) {
-    DummyCsvFieldCreator default_creator;
-    if (creator == NULL) {
-        creator = &default_creator;
-    }
-
-    comservatory::Contents contents;
-    CsvContents output;
-    if (params.has_names) {
-        auto ptr = creator->string();
-        output.fields.emplace_back(ptr); 
-        contents.fields.emplace_back(new CsvNameField(false, ptr));
-    }
-
-    auto ptr0 = creator->integer();
-    output.fields.emplace_back(ptr0);
-    auto ptr = new CsvCompressedLengthField(static_cast<int>(params.has_names), ptr0);
-    contents.fields.emplace_back(ptr);
-
-    comservatory::ReadOptions opt;
-    opt.parallel = params.parallel;
-    parse(contents, opt);
-    if (contents.num_records() != params.length) {
-        throw std::runtime_error("number of records in the CSV file does not match the expected length");
-    }
-
-    if (params.concatenated != ptr->total) {
-        throw std::runtime_error("sum of lengths in the compressed list did not equal the expected concatenated total");
-    }
-
-    if (contents.names.back() != "number") {
-        throw std::runtime_error("column containing the compressed list lengths should be named 'number'");
-    }
-
-    return output;
-}
-/**
- * @endcond
- */
-
-/**
- * Checks if a CSV is correctly formatted for the `compressed_list` format.
- * An error is raised if the file does not meet the specifications.
- *
- * @tparam Reader A **byteme** reader class.
- *
- * @param reader A stream of bytes from the CSV file.
- * @param params Validation parameters.
- * @param creator Factory to create objects for holding the contents of each CSV field.
- * Defaults to a pointer to a `DummyFieldCreator` instance.
- *
- * @return Contents of the loaded CSV.
- * Whether the `fields` member actually contains the CSV data depends on `creator`.
- * If `params.has_names = true`, an additional field containing the names is present at the start.
- */
-template<class Reader>
-CsvContents validate(Reader& reader, const Parameters& params, CsvFieldCreator* creator = NULL) {
-    return validate_base(
-        [&](comservatory::Contents& contents, const comservatory::ReadOptions& opts) -> void { comservatory::read(reader, contents, opts); },
-        params,
-        creator
-    );
-}
-
-/**
- * Overload of `compressed_list::validate()` that accepts a file path.
- *
- * @param path Path to the CSV file.
- * @param params Validation parameters.
- * @param creator Factory to create objects for holding the contents of each CSV field.
- * Defaults to a pointer to a `DummyFieldCreator` instance.
- *
- * @return Contents of the loaded CSV.
- */
-inline CsvContents validate(const char* path, const Parameters& params, CsvFieldCreator* creator = NULL) {
-    return validate_base(
-        [&](comservatory::Contents& contents, const comservatory::ReadOptions& opts) -> void { comservatory::read_file(path, contents, opts); },
-        params,
-        creator
-    );
-}
-
-}
-
-}
-
-#endif
diff --git a/inst/include/takane/data_frame.hpp b/inst/include/takane/data_frame.hpp
index 1e3729e..f227397 100644
--- a/inst/include/takane/data_frame.hpp
+++ b/inst/include/takane/data_frame.hpp
@@ -13,7 +13,8 @@
 #include <unordered_set>
 
 #include "utils_public.hpp"
-#include "utils_hdf5.hpp"
+#include "utils_string.hpp"
+#include "utils_factor.hpp"
 #include "utils_other.hpp"
 
 /**
@@ -37,7 +38,7 @@ namespace data_frame {
 /**
  * @cond
  */
-inline void validate_row_names(const H5::Group& handle, hsize_t num_rows) try {
+inline void validate_row_names(const H5::Group& handle, hsize_t num_rows, const Options& options) try {
     if (handle.childObjType("row_names") != H5O_TYPE_DATASET) {
         throw std::runtime_error("expected a 'row_names' dataset when row names are present");
     }
@@ -48,16 +49,13 @@ inline void validate_row_names(const H5::Group& handle, hsize_t num_rows) try {
     if (ritsuko::hdf5::get_1d_length(rnhandle.getSpace(), false) != num_rows) {
         throw std::runtime_error("expected 'row_names' to have length equal to the number of rows");
     }
+    ritsuko::hdf5::validate_1d_string_dataset(rnhandle, num_rows, options.hdf5_buffer_size);
 } catch (std::exception& e) {
     throw std::runtime_error("failed to validate the row names for '" + ritsuko::hdf5::get_name(handle) + "'; " + std::string(e.what()));
 }
 
 inline hsize_t validate_column_names(const H5::Group& ghandle, const Options& options) try {
-    if (!ghandle.exists("column_names") || ghandle.childObjType("column_names") != H5O_TYPE_DATASET) {
-        throw std::runtime_error("expected a 'column_names' dataset");
-    }
-
-    auto cnhandle = ghandle.openDataSet("column_names");
+    auto cnhandle = ritsuko::hdf5::open_dataset(ghandle, "column_names");
     if (cnhandle.getTypeClass() != H5T_STRING) {
         throw std::runtime_error("expected 'column_names' to be a string dataset");
     }
@@ -65,21 +63,17 @@ inline hsize_t validate_column_names(const H5::Group& ghandle, const Options& op
     auto num_cols = ritsuko::hdf5::get_1d_length(cnhandle.getSpace(), false);
 
     std::unordered_set<std::string> column_names;
-    ritsuko::hdf5::load_1d_string_dataset(
-        cnhandle, 
-        num_cols, 
-        options.hdf5_buffer_size,
-        [&](size_t, const char* p, size_t l) {
-            if (l == 0) {
-                throw std::runtime_error("column names should not be empty strings");
-            }
-            std::string col_name(p, p + l);
-            if (column_names.find(col_name) != column_names.end()) {
-                throw std::runtime_error("duplicated column name '" + col_name + "'");
-            }
-            column_names.insert(std::move(col_name));
+    ritsuko::hdf5::Stream1dStringDataset stream(&cnhandle, num_cols, options.hdf5_buffer_size);
+    for (size_t c = 0; c < num_cols; ++c, stream.next()) {
+        auto x = stream.steal();
+        if (x.empty()) {
+            throw std::runtime_error("column names should not be empty strings");
         }
-    );
+        if (column_names.find(x) != column_names.end()) {
+            throw std::runtime_error("duplicated column name '" + x + "'");
+        }
+        column_names.insert(std::move(x));
+    }
 
     return num_cols;
 
@@ -88,52 +82,38 @@ inline hsize_t validate_column_names(const H5::Group& ghandle, const Options& op
 }
 
 inline void validate_column(const H5::Group& dhandle, const std::string& dset_name, hsize_t num_rows, const Options& options) try { 
-    if (dhandle.childObjType(dset_name) == H5O_TYPE_GROUP) {
+    auto dtype = dhandle.childObjType(dset_name);
+    if (dtype == H5O_TYPE_GROUP) {
         auto fhandle = dhandle.openGroup(dset_name);
-        auto type = ritsuko::hdf5::load_scalar_string_attribute(fhandle, "type");
+        auto type = ritsuko::hdf5::open_and_load_scalar_string_attribute(fhandle, "type");
         if (type != "factor") {
             throw std::runtime_error("expected HDF5 groups to have a 'type' attribute set to 'factor'");
         }
 
-        if (fhandle.attrExists("ordered")) {
-            auto attr = ritsuko::hdf5::get_scalar_attribute(fhandle, "ordered");
-            if (ritsuko::hdf5::exceeds_integer_limit(attr, 32, true)) {
-                throw std::runtime_error("an 'ordered' attribute on a factor column should have a datatype that fits in a 32-bit signed integer");
-            }
-        }
+        internal_factor::check_ordered_attribute(fhandle);
 
-        auto num_levels = internal_hdf5::validate_factor_levels(fhandle, "levels", options.hdf5_buffer_size);
-        auto num_codes = internal_hdf5::validate_factor_codes(fhandle, "codes", num_levels, options.hdf5_buffer_size);
+        auto num_levels = internal_factor::validate_factor_levels(fhandle, "levels", options.hdf5_buffer_size);
+        auto num_codes = internal_factor::validate_factor_codes(fhandle, "codes", num_levels, options.hdf5_buffer_size);
         if (num_codes != num_rows) {
             throw std::runtime_error("expected column to have length equal to the number of rows");
         }
 
-
-    } else {
-        auto xhandle = ritsuko::hdf5::get_dataset(dhandle, dset_name.c_str());
+    } else if (dtype == H5O_TYPE_DATASET) {
+        auto xhandle = dhandle.openDataSet(dset_name);
         if (num_rows != ritsuko::hdf5::get_1d_length(xhandle.getSpace(), false)) {
             throw std::runtime_error("expected column to have length equal to the number of rows");
         }
 
         const char* missing_attr_name = "missing-value-placeholder";
-        bool has_missing = xhandle.attrExists(missing_attr_name);
 
-        auto type = ritsuko::hdf5::load_scalar_string_attribute(xhandle, "type");
+        auto type = ritsuko::hdf5::open_and_load_scalar_string_attribute(xhandle, "type");
         if (type == "string") {
             if (xhandle.getTypeClass() != H5T_STRING) {
                 throw std::runtime_error("expected column " + dset_name + " to be a string dataset");
             }
-
-            std::string missing_value;
-            if (has_missing) {
-                auto missing_attr = ritsuko::hdf5::get_missing_placeholder_attribute(xhandle, missing_attr_name, /* type_class_only = */ true);
-                missing_value = ritsuko::hdf5::load_scalar_string_attribute(missing_attr);
-            }
-
-            if (xhandle.attrExists("format")) {
-                auto format = ritsuko::hdf5::load_scalar_string_attribute(xhandle, "format");
-                internal_hdf5::validate_string_format(xhandle, num_rows, format, has_missing, missing_value, options.hdf5_buffer_size);
-            }
+            auto missingness = ritsuko::hdf5::open_and_load_optional_string_missing_placeholder(xhandle, missing_attr_name);
+            std::string format = internal_string::fetch_format_attribute(xhandle);
+            internal_string::validate_string_format(xhandle, num_rows, format, missingness.first, missingness.second, options.hdf5_buffer_size);
 
         } else {
             if (type == "integer") {
@@ -152,10 +132,14 @@ inline void validate_column(const H5::Group& dhandle, const std::string& dset_na
                 throw std::runtime_error("unknown column type '" + type + "'");
             }
 
-            if (has_missing) {
-                ritsuko::hdf5::get_missing_placeholder_attribute(xhandle, missing_attr_name);
+            if (xhandle.attrExists(missing_attr_name)) {
+                auto ahandle = xhandle.openAttribute(missing_attr_name);
+                ritsuko::hdf5::check_missing_placeholder_attribute(xhandle, ahandle);
             }
         }
+
+    } else {
+        throw std::runtime_error("unknown HDF5 object type");
     }
 
 } catch (std::exception& e) {
@@ -170,22 +154,17 @@ inline void validate_column(const H5::Group& dhandle, const std::string& dset_na
  * @param options Validation options, typically for reading performance.
  */
 inline void validate(const std::filesystem::path& path, const Options& options) {
-    auto h5path = path / "basic_columns.h5";
-
-    H5::H5File handle(h5path, H5F_ACC_RDONLY);
-    if (!handle.exists("data_frame") || handle.childObjType("data_frame") != H5O_TYPE_GROUP) {
-        throw std::runtime_error("expected a 'data_frame' group");
-    }
-    auto ghandle = handle.openGroup("data_frame");
+    auto handle = ritsuko::hdf5::open_file(path / "basic_columns.h5");
+    auto ghandle = ritsuko::hdf5::open_group(handle, "data_frame");
 
-    auto vstring = ritsuko::hdf5::load_scalar_string_attribute(ghandle, "version");
+    auto vstring = ritsuko::hdf5::open_and_load_scalar_string_attribute(ghandle, "version");
     auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
     if (version.major != 1) {
         throw std::runtime_error("unsupported version '" + vstring + "'");
     }
 
     // Checking the number of rows.
-    auto attr = ritsuko::hdf5::get_scalar_attribute(ghandle, "row-count");
+    auto attr = ritsuko::hdf5::open_scalar_attribute(ghandle, "row-count");
     if (ritsuko::hdf5::exceeds_integer_limit(attr, 64, false)) {
         throw std::runtime_error("'row-count' attribute should have a datatype that fits in a 64-bit unsigned integer");
     }
@@ -194,15 +173,12 @@ inline void validate(const std::filesystem::path& path, const Options& options)
 
     // Checking row and column names.
     if (ghandle.exists("row_names")) {
-        validate_row_names(ghandle, num_rows);
+        validate_row_names(ghandle, num_rows, options);
     }
     size_t NC = validate_column_names(ghandle, options);
 
     // Finally iterating through the columns.
-    if (!ghandle.exists("data") || ghandle.childObjType("data") != H5O_TYPE_GROUP) {
-        throw std::runtime_error("expected a 'data_frame/data' group");
-    }
-    auto dhandle = ghandle.openGroup("data");
+    auto dhandle = ritsuko::hdf5::open_group(ghandle, "data");
 
     hsize_t found = 0;
     for (size_t c = 0; c < NC; ++c) {
@@ -229,18 +205,8 @@ inline void validate(const std::filesystem::path& path, const Options& options)
         throw std::runtime_error("more objects present in the 'data_frame/data' group than expected");
     }
 
-    // Checking the metadata.
-    try {
-        internal_other::validate_mcols(path / "column_annotations", NC, options);
-    } catch (std::exception& e) {
-        throw std::runtime_error("failed to validate 'column_annotations'; " + std::string(e.what()));
-    }
-
-    try {
-        internal_other::validate_metadata(path / "other_annotations", options);
-    } catch (std::exception& e) {
-        throw std::runtime_error("failed to validate 'other_annotations'; " + std::string(e.what()));
-    }
+    internal_other::validate_mcols(path, "column_annotations", NC, options);
+    internal_other::validate_metadata(path, "other_annotations", options);
 }
 
 /**
@@ -254,10 +220,7 @@ inline size_t height(const std::filesystem::path& path, const Options&) {
     // Assume it's all valid already.
     H5::H5File handle(h5path, H5F_ACC_RDONLY);
     auto ghandle = handle.openGroup("data_frame");
-    auto attr = ritsuko::hdf5::get_scalar_attribute(ghandle, "row-count");
-    uint64_t num_rows = 0;
-    attr.read(H5::PredType::NATIVE_UINT64, &num_rows);
-    return num_rows;
+    return ritsuko::hdf5::load_scalar_numeric_attribute<uint64_t>(ghandle.openAttribute("row-count"));
 }
 
 }
diff --git a/inst/include/takane/data_frame_factor.hpp b/inst/include/takane/data_frame_factor.hpp
index 266c473..c4e8a04 100644
--- a/inst/include/takane/data_frame_factor.hpp
+++ b/inst/include/takane/data_frame_factor.hpp
@@ -8,7 +8,8 @@
 #include "ritsuko/hdf5/hdf5.hpp"
 
 #include "utils_public.hpp"
-#include "utils_hdf5.hpp"
+#include "utils_string.hpp"
+#include "utils_factor.hpp"
 
 /**
  * @file data_frame_factor.hpp
@@ -22,6 +23,7 @@ namespace takane {
  */
 void validate(const std::filesystem::path&, const std::string&, const Options&);
 size_t height(const std::filesystem::path&, const std::string&, const Options&);
+bool satisfies_interface(const std::string&, const std::string&);
 /**
  * @endcond
  */
@@ -48,15 +50,10 @@ inline std::function<bool(const std::filesystem::path&, const std::string&, cons
  * @param options Validation options, typically for reading performance.
  */
 inline void validate(const std::filesystem::path& path, const Options& options) try {
-    H5::H5File handle(path / "contents.h5", H5F_ACC_RDONLY);
-
-    const char* parent = "data_frame_factor";
-    if (!handle.exists(parent) || handle.childObjType(parent) != H5O_TYPE_GROUP) {
-        throw std::runtime_error("expected a 'data_frame_factor' group");
-    }
-    auto ghandle = handle.openGroup(parent);
+    auto handle = ritsuko::hdf5::open_file(path / "contents.h5");
+    auto ghandle = ritsuko::hdf5::open_group(handle, "data_frame_factor");
 
-    auto vstring = ritsuko::hdf5::load_scalar_string_attribute(ghandle, "version");
+    auto vstring = ritsuko::hdf5::open_and_load_scalar_string_attribute(ghandle, "version");
     auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
     if (version.major != 1) {
         throw std::runtime_error("unsupported version string '" + vstring + "'");
@@ -65,8 +62,8 @@ inline void validate(const std::filesystem::path& path, const Options& options)
     // Validating the levels.
     auto lpath = path / "levels";
     auto xtype = read_object_type(lpath);
-    if (!internal_other::ends_with(xtype, "data_frame")) {
-        throw std::runtime_error("expected 'levels' to be a 'data_frame' or one of its derivatives");
+    if (!satisfies_interface(xtype, "DATA_FRAME")) {
+        throw std::runtime_error("expected 'levels' to be an object that satifies the 'DATA_FRAME' interface");
     }
 
     try {
@@ -82,31 +79,12 @@ inline void validate(const std::filesystem::path& path, const Options& options)
         }
     }
 
-    size_t num_codes = internal_hdf5::validate_factor_codes(ghandle, "codes", num_levels, options.hdf5_buffer_size, /* allow_missing = */ false);
+    size_t num_codes = internal_factor::validate_factor_codes(ghandle, "codes", num_levels, options.hdf5_buffer_size, /* allow_missing = */ false);
 
-    if (ghandle.exists("names")) {
-        auto nhandle = ritsuko::hdf5::get_dataset(ghandle, "names");
-        if (nhandle.getTypeClass() != H5T_STRING) {
-            throw std::runtime_error("'names' should be a string datatype class");
-        }
-        auto nlen = ritsuko::hdf5::get_1d_length(nhandle.getSpace(), false);
-        if (num_codes != nlen) {
-            throw std::runtime_error("'names' and 'codes' should have the same length");
-        }
-    }
+    internal_other::validate_mcols(path, "element_annotations", num_codes, options);
+    internal_other::validate_metadata(path, "other_annotations", options);
 
-    // Checking the metadata.
-    try {
-        internal_other::validate_mcols(path / "element_annotations", num_codes, options);
-    } catch (std::exception& e) {
-        throw std::runtime_error("failed to validate 'element_annotations'; " + std::string(e.what()));
-    }
-
-    try {
-        internal_other::validate_metadata(path / "other_annotations", options);
-    } catch (std::exception& e) {
-        throw std::runtime_error("failed to validate 'other_annotations'; " + std::string(e.what()));
-    }
+    internal_string::validate_names(ghandle, "names", num_codes, options.hdf5_buffer_size);
 
 } catch (std::exception& e) {
     throw std::runtime_error("failed to validate a 'data_frame_factor' at '" + path.string() + "'; " + std::string(e.what()));
diff --git a/inst/include/takane/data_frame_list.hpp b/inst/include/takane/data_frame_list.hpp
new file mode 100644
index 0000000..d8a9806
--- /dev/null
+++ b/inst/include/takane/data_frame_list.hpp
@@ -0,0 +1,45 @@
+#ifndef TAKANE_DATA_FRAME_LIST_HPP
+#define TAKANE_DATA_FRAME_LIST_HPP
+
+#include "H5Cpp.h"
+
+#include <filesystem>
+#include <stdexcept>
+#include <string>
+
+#include "utils_public.hpp"
+#include "utils_compressed_list.hpp"
+
+/**
+ * @file data_frame_list.hpp
+ * @brief Validation for data frame lists.
+ */
+
+namespace takane {
+
+namespace data_frame_list {
+
+/**
+ * @param path Path to the directory containing the data frame list.
+ * @param options Validation options, typically for reading performance.
+ */
+inline void validate(const std::filesystem::path& path, const Options& options) try {
+    internal_compressed_list::validate_directory<true>(path, "data_frame_list", "DATA_FRAME", options);
+} catch (std::exception& e) {
+    throw std::runtime_error("failed to validate an 'data_frame_list' object at '" + path.string() + "'; " + std::string(e.what()));
+}
+
+/**
+ * @param path Path to a directory containing an data frame list.
+ * @param options Validation options, mostly for input performance.
+ * @return The length of the list.
+ */
+inline size_t height(const std::filesystem::path& path, const Options& options) {
+    return internal_compressed_list::height(path, "data_frame_list", options);
+}
+
+}
+
+}
+
+#endif
diff --git a/inst/include/takane/genomic_ranges.hpp b/inst/include/takane/genomic_ranges.hpp
index 511bb3f..b07cfd8 100644
--- a/inst/include/takane/genomic_ranges.hpp
+++ b/inst/include/takane/genomic_ranges.hpp
@@ -4,11 +4,16 @@
 #include "ritsuko/ritsuko.hpp"
 #include "comservatory/comservatory.hpp"
 
-#include "WrappedOption.hpp"
-
-#include <unordered_set>
 #include <string>
+#include <filesystem>
 #include <stdexcept>
+#include <cstdint>
+#include <type_traits>
+#include <limits>
+
+#include "utils_string.hpp"
+#include "utils_public.hpp"
+#include "utils_other.hpp"
 
 /**
  * @file genomic_ranges.hpp
@@ -18,200 +23,203 @@
 namespace takane {
 
 /**
- * @namespace takane::genomic_ranges
- * @brief Definitions for genomic ranges.
+ * @cond
+ */
+void validate(const std::filesystem::path&, const std::string&, const Options& options);
+/**
+ * @endcond
  */
-namespace genomic_ranges {
 
 /**
- * @brief Parameters for validating the genomic ranges file.
+ * @namespace takane::genomic_ranges
+ * @brief Definitions for genomic ranges.
  */
-struct Parameters {
-    /** 
-     * Number of genomic ranges in this object.
-     */
-    size_t num_ranges;
-
-    /** 
-     * Whether the ranges are named.
-     */
-    bool has_names;
-
-    /** 
-     * Universe of sequence names for this object.
-     */
-    WrappedOption<std::unordered_set<std::string> > seqnames;
-
-    /**
-     * Whether to load and parse the file in parallel, see `comservatory::ReadOptions` for details.
-     */
-    bool parallel = false;
-
-    /**
-     * Version of the `genomic_ranges` format.
-     */
-    int version = 1;
-};
+namespace genomic_ranges {
 
 /**
  * @cond
  */
-struct NamesField : public comservatory::DummyStringField {
-    void add_missing() {
-        throw std::runtime_error("missing values should not be present in the names column");
-    }
-};
+namespace internal {
 
-struct SeqnamesField : public comservatory::DummyStringField {
-    void add_missing() {
-        throw std::runtime_error("missing values should not be present in the seqnames column");
-    }
+struct SequenceLimits {
+    SequenceLimits(size_t n) : restricted(n), seqlen(n) {}
+    std::vector<unsigned char> restricted;
+    std::vector<uint64_t> seqlen;
+};
 
-    void push_back(std::string x) {
-        if (all_seqnames->find(x) == all_seqnames->end()) {
-            throw std::runtime_error("unknown sequence name '" + x + "'");
-        }
-        comservatory::DummyStringField::push_back(std::move(x));
+inline SequenceLimits find_sequence_limits(const std::filesystem::path& path, const Options& options) {
+    auto xtype = read_object_type(path);
+    if (xtype != "sequence_information") {
+        throw std::runtime_error("'sequence_information' directory should contain a 'sequence_information' object");
     }
+    ::takane::validate(path, xtype, options);
 
-    const std::unordered_set<std::string>* all_seqnames = NULL;
-};
+    auto fpath = path / "info.h5";
+    H5::H5File handle(fpath, H5F_ACC_RDONLY);
+    auto ghandle = handle.openGroup("sequence_information");
 
-struct StartField : public comservatory::DummyNumberField {
-    void add_missing() {
-        throw std::runtime_error("missing values should not be present in the start column");
-    }
+    auto lhandle = ghandle.openDataSet("length");
+    auto num_seq = ritsuko::hdf5::get_1d_length(lhandle.getSpace(), false);
+    ritsuko::hdf5::Stream1dNumericDataset<uint64_t> lstream(&lhandle, num_seq, options.hdf5_buffer_size);
+    auto lmissing = ritsuko::hdf5::open_and_load_optional_numeric_missing_placeholder<uint64_t>(lhandle, "missing-value-placeholder");
 
-    void push_back(double x) {
-        if (x < -2147483648 || x > 2147483647) { // constrain within limits.
-            throw std::runtime_error("start position does not fit inside a 32-bit signed integer");
-        }
-        if (x != std::floor(x)) {
-            throw std::runtime_error("start position is not an integer");
-        }
-        last = x;
-        comservatory::DummyNumberField::push_back(x);
-    }
+    auto chandle = ghandle.openDataSet("circular");
+    ritsuko::hdf5::Stream1dNumericDataset<int32_t> cstream(&chandle, num_seq, options.hdf5_buffer_size);
+    auto cmissing = ritsuko::hdf5::open_and_load_optional_numeric_missing_placeholder<int32_t>(chandle, "missing-value-placeholder");
 
-    int32_t last = 0;
-};
+    SequenceLimits output(num_seq);
+    auto& restricted = output.restricted;
+    auto& seqlen = output.seqlen;
 
-struct EndField : public comservatory::DummyNumberField {
-    void add_missing() {
-        throw std::runtime_error("missing values should not be present in the strand column");
-    }
+    for (size_t i = 0; i < num_seq; ++i, lstream.next(), cstream.next()) {
+        auto slen = lstream.get();
+        auto circ = cstream.get();
+        seqlen[i] = slen;
 
-    void push_back(double x) {
-        if (x < -2147483648 || x > 2147483647) { // constrain within limits.
-            throw std::runtime_error("end position does not fit inside a 32-bit signed integer");
+        // Skipping restriction if the sequence length is missing OR the sequence is circular.
+        if (lmissing.first && lmissing.second == slen) {
+            continue;
         }
-        if (x != std::floor(x)) {
-            throw std::runtime_error("end position is not an integer");
+        if (circ && !(cmissing.first && cmissing.second == circ)) {
+            continue;
         }
-        comservatory::DummyNumberField::push_back(x);
 
-        if (start->size() != size()) {
-            throw std::runtime_error("'start' and 'end' validator fields are out of sync");
-        }
-        if (x + 1 < start->last) {
-            throw std::runtime_error("'end' coordinate must be greater than or equal to 'start - 1'");
-        }
+        restricted[i] = true;
     }
 
-    const StartField* start = NULL;
-};
+    return output;
+}
 
-struct StrandField : public comservatory::DummyStringField {
-    void add_missing() {
-        throw std::runtime_error("missing values should not be present in the strand column");
-    }
+}
+/**
+ * @endcond
+ */
 
-    void push_back(std::string x) {
-        if (x.size() != 1 || (x[0] != '+' && x[0] != '-' && x[0] != '*')) {
-            throw std::runtime_error("invalid strand '" + x + "'");
-        }
-        comservatory::DummyStringField::push_back(std::move(x));
+/**
+ * @param path Path to the directory containing the genomic ranges.
+ * @param options Validation options, typically for reading performance.
+ */
+inline void validate(const std::filesystem::path& path, const Options& options) try {
+    // Figuring out the sequence length constraints.
+    auto limits = internal::find_sequence_limits(path / "sequence_information", options);
+    const auto& restricted = limits.restricted;
+    const auto& seqlen = limits.seqlen;
+    size_t num_sequences = restricted.size();
+
+    // Now loading all three components.
+    auto handle = ritsuko::hdf5::open_file(path / "ranges.h5");
+    auto ghandle = ritsuko::hdf5::open_group(handle, "genomic_ranges");
+
+    auto id_handle = ritsuko::hdf5::open_dataset(ghandle, "sequence");
+    auto num_ranges = ritsuko::hdf5::get_1d_length(id_handle, false);
+    if (ritsuko::hdf5::exceeds_integer_limit(id_handle, 64, false)) {
+        throw std::runtime_error("expected 'sequence' to have a datatype that fits into a 64-bit unsigned integer");
     }
-};
+    ritsuko::hdf5::Stream1dNumericDataset<uint64_t> id_stream(&id_handle, num_ranges, options.hdf5_buffer_size);
 
-template<class ParseCommand>
-void validate_base(ParseCommand parse, const Parameters& params) {
-    comservatory::Contents contents;
-    if (params.has_names) {
-        contents.fields.emplace_back(new NamesField);
+    auto start_handle = ritsuko::hdf5::open_dataset(ghandle, "start");
+    if (num_ranges != ritsuko::hdf5::get_1d_length(start_handle, false)) {
+        throw std::runtime_error("'start' and 'sequence' should have the same length");
+    }
+    if (ritsuko::hdf5::exceeds_integer_limit(start_handle, 64, true)) {
+        throw std::runtime_error("expected 'start' to have a datatype that fits into a 64-bit signed integer");
     }
+    ritsuko::hdf5::Stream1dNumericDataset<int64_t> start_stream(&start_handle, num_ranges, options.hdf5_buffer_size);
 
-    {
-        auto ptr = new SeqnamesField;
-        ptr->all_seqnames = params.seqnames.get();
-        contents.fields.emplace_back(ptr);
+    auto width_handle = ritsuko::hdf5::open_dataset(ghandle, "width");
+    if (num_ranges != ritsuko::hdf5::get_1d_length(width_handle, false)) {
+        throw std::runtime_error("'width' and 'sequence' should have the same length");
     }
-        
-    {
-        auto sptr = new StartField;
-        contents.fields.emplace_back(sptr);
-        auto eptr = new EndField;
-        eptr->start = sptr;
-        contents.fields.emplace_back(eptr);
+    if (ritsuko::hdf5::exceeds_integer_limit(width_handle, 64, false)) {
+        throw std::runtime_error("expected 'width' to have a datatype that fits into a 64-bit unsigned integer");
     }
+    ritsuko::hdf5::Stream1dNumericDataset<uint64_t> width_stream(&width_handle, num_ranges, options.hdf5_buffer_size);
 
-    contents.fields.emplace_back(new StrandField);
+    constexpr uint64_t end_limit = std::numeric_limits<int64_t>::max();
+    for (size_t i = 0; i < num_ranges; ++i, id_stream.next(), start_stream.next(), width_stream.next()) {
+        auto id = id_stream.get();
+        if (id >= num_sequences) {
+            throw std::runtime_error("'sequence' must be less than the number of sequences (got " + std::to_string(id) + ")");
+        }
 
-    comservatory::ReadOptions opt;
-    opt.parallel = params.parallel;
-    parse(contents, opt);
-    if (contents.num_records() != params.num_ranges) {
-        throw std::runtime_error("number of records in the CSV file does not match the expected number of ranges");
-    }
+        auto start = start_stream.get();
+        auto width = width_stream.get();
+
+        if (restricted[id]) {
+            if (start < 1) {
+                throw std::runtime_error("non-positive start position (" + std::to_string(start) + ") for non-circular sequence");
+            }
+
+            auto spos = static_cast<uint64_t>(start);
+            auto limit = seqlen[id];
+            if (spos > limit) {
+                throw std::runtime_error("start position beyond sequence length (" + std::to_string(start) + " > " + std::to_string(limit) + ") for non-circular sequence");
+            }
+
+            // The LHS should not overflow as 'spos >= 1' so 'limit - spos + 1' should still be no greater than 'limit'.
+            if (limit - spos + 1 < width) {
+                throw std::runtime_error("end position beyond sequence length (" + 
+                    std::to_string(start) + " + " + std::to_string(width) + " > " + std::to_string(limit) + 
+                    ") for non-circular sequence");
+            }
+        }
 
-    if (contents.names[0 + params.has_names] != "seqnames") {
-        throw std::runtime_error("expected the first (non-name) column to be 'seqnames'");
-    }
-    if (contents.names[1 + params.has_names] != "start") {
-        throw std::runtime_error("expected the second (non-name) column to be 'start'");
-    }
-    if (contents.names[2 + params.has_names] != "end") {
-        throw std::runtime_error("expected the third (non-name) column to be 'end'");
+        bool exceeded = false;
+        if (start > 0) {
+            // 'end_limit - start' is always non-negative as 'end_limit' is the largest value of an int64_t and 'start' is also int64_t.
+            exceeded = (end_limit - static_cast<uint64_t>(start) < width);
+        } else {
+            // 'end_limit - start' will not overflow a uint64_t, because 'end_limit' is the largest value of an int64_t and 'start' as also 'int64_t'.
+            exceeded = (end_limit + static_cast<uint64_t>(-start) < width);
+        }
+        if (exceeded) {
+            throw std::runtime_error("end position beyond the range of a 64-bit integer (" + std::to_string(start) + " + " + std::to_string(width) + ")");
+        }
     }
-    if (contents.names[3 + params.has_names] != "strand") {
-        throw std::runtime_error("expected the fourth (non-name) column to be 'strand'");
+
+    {       
+        auto strand_handle = ritsuko::hdf5::open_dataset(ghandle, "strand");
+        if (num_ranges != ritsuko::hdf5::get_1d_length(strand_handle, false)) {
+            throw std::runtime_error("'strand' and 'sequence' should have the same length");
+        }
+        if (ritsuko::hdf5::exceeds_integer_limit(strand_handle, 32, true)) {
+            throw std::runtime_error("expected 'strand' to have a datatype that fits into a 32-bit signed integer");
+        }
+
+        ritsuko::hdf5::Stream1dNumericDataset<int32_t> strand_stream(&strand_handle, num_ranges, options.hdf5_buffer_size);
+        for (hsize_t i = 0; i < num_ranges; ++i, strand_stream.next()) {
+            auto x = strand_stream.get();
+            if (x < -1 || x > 1) {
+                throw std::runtime_error("values of 'strand' should be one of 0, -1, or 1 (got " + std::to_string(x) + ")");
+            }
+        }
     }
-}
-/**
- * @endcond
- */
 
-/**
- * Checks if a CSV data frame is correctly formatted for genomic ranges.
- * An error is raised if the file does not meet the specifications.
- *
- * @tparam Reader A **byteme** reader class.
- *
- * @param reader A stream of bytes from the CSV file.
- * @param params Validation parameters.
- */
-template<class Reader>
-void validate(Reader& reader, const Parameters& params) {
-    validate_base(
-        [&](comservatory::Contents& contents, const comservatory::ReadOptions& opt) -> void { comservatory::read(reader, contents, opt); },
-        params
-    );
+    internal_other::validate_mcols(path, "range_annotations", num_ranges, options);
+    internal_other::validate_metadata(path, "other_annotations", options);
+
+    internal_string::validate_names(ghandle, "name", num_ranges, options.hdf5_buffer_size);
+
+} catch (std::exception& e) {
+    throw std::runtime_error("failed to validate 'genomic_ranges' object at '" + path.string() + "'; " + std::string(e.what()));
 }
 
 /**
- * Checks if a CSV data frame is correctly formatted for genomic ranges.
- * An error is raised if the file does not meet the specifications.
- *
- * @param path Path to the CSV file.
- * @param params Validation parameters.
+ * @param path Path to a directory containing genomic ranges.
+ * @param options Validation options, mostly for input performance.
+ * @return The number of ranges.
  */
-inline void validate(const char* path, const Parameters& params) {
-    validate_base(
-        [&](comservatory::Contents& contents, const comservatory::ReadOptions& opt) -> void { comservatory::read_file(path, contents, opt); },
-        params
-    );
+inline size_t height(const std::filesystem::path& path, const Options&) {
+    auto h5path = path / "ranges.h5";
+
+    // Assume it's all valid already.
+    H5::H5File handle(h5path, H5F_ACC_RDONLY);
+    auto ghandle = handle.openGroup("genomic_ranges");
+    auto dhandle = ghandle.openDataSet("sequence");
+    return ritsuko::hdf5::get_1d_length(dhandle, false);
 }
 
+
 }
 
 }
diff --git a/inst/include/takane/genomic_ranges_list.hpp b/inst/include/takane/genomic_ranges_list.hpp
new file mode 100644
index 0000000..4ec64ce
--- /dev/null
+++ b/inst/include/takane/genomic_ranges_list.hpp
@@ -0,0 +1,45 @@
+#ifndef TAKANE_GENOMIC_RANGES_LIST_HPP
+#define TAKANE_GENOMIC_RANGES_LIST_HPP
+
+#include "H5Cpp.h"
+
+#include <filesystem>
+#include <stdexcept>
+#include <string>
+
+#include "utils_public.hpp"
+#include "utils_compressed_list.hpp"
+
+/**
+ * @file genomic_ranges_list.hpp
+ * @brief Validation for genomic ranges lists.
+ */
+
+namespace takane {
+
+namespace genomic_ranges_list {
+
+/**
+ * @param path Path to the directory containing the genomic ranges list.
+ * @param options Validation options, typically for reading performance.
+ */
+inline void validate(const std::filesystem::path& path, const Options& options) try {
+    internal_compressed_list::validate_directory<false>(path, "genomic_ranges_list", "genomic_ranges", options);
+} catch (std::exception& e) {
+    throw std::runtime_error("failed to validate an 'genomic_ranges_list' object at '" + path.string() + "'; " + std::string(e.what()));
+}
+
+/**
+ * @param path Path to a directory containing an genomic ranges list.
+ * @param options Validation options, mostly for input performance.
+ * @return The length of the list.
+ */
+inline size_t height(const std::filesystem::path& path, const Options& options) {
+    return internal_compressed_list::height(path, "genomic_ranges_list", options);
+}
+
+}
+
+}
+
+#endif
diff --git a/inst/include/takane/sequence_information.hpp b/inst/include/takane/sequence_information.hpp
index c9cfc18..537f55a 100644
--- a/inst/include/takane/sequence_information.hpp
+++ b/inst/include/takane/sequence_information.hpp
@@ -1,12 +1,14 @@
 #ifndef TAKANE_SEQUENCE_INFORMATION_HPP
 #define TAKANE_SEQUENCE_INFORMATION_HPP
 
-#include "comservatory/comservatory.hpp"
-
-#include "data_frame.hpp"
-#include "utils_csv.hpp"
+#include "ritsuko/hdf5/hdf5.hpp"
 
+#include <filesystem>
 #include <stdexcept>
+#include <unordered_set>
+#include <string>
+
+#include "utils_public.hpp"
 
 /**
  * @file sequence_information.hpp
@@ -22,112 +24,77 @@ namespace takane {
 namespace sequence_information {
 
 /**
- * @brief Parameters for validating the sequence information file.
+ * @param path Path to the directory containing the data frame.
+ * @param options Validation options, typically for reading performance.
  */
-struct Parameters {
-    /**
-     * Expected number of sequences.
-     */
-    size_t num_sequences = 0;
-
-    /**
-     * Whether to load and parse the file in parallel, see `comservatory::ReadOptions` for details.
-     */
-    bool parallel = false;
-
-    /**
-     * Version of the `sequence_information` format.
-     */
-    int version = 1;
-};
+inline void validate(const std::filesystem::path& path, const Options& options) try {
+    auto handle = ritsuko::hdf5::open_file(path / "info.h5");
+    auto ghandle = ritsuko::hdf5::open_group(handle, "sequence_information");
 
-/**
- * @cond
- */
-template<class ParseCommand>
-CsvContents validate_base(ParseCommand parse, const Parameters& params, CsvFieldCreator* creator) {
-    DummyCsvFieldCreator default_creator;
-    if (creator == NULL) {
-        creator = &default_creator;
+    size_t nseq = 0;
+    {
+        auto nhandle = ritsuko::hdf5::open_dataset(ghandle, "name");
+        if (nhandle.getTypeClass() != H5T_STRING) {
+            throw std::runtime_error("expected a string datatype class for 'name'");
+        }
+
+        nseq = ritsuko::hdf5::get_1d_length(nhandle.getSpace(), false);
+        std::unordered_set<std::string> collected;
+        ritsuko::hdf5::Stream1dStringDataset stream(&nhandle, nseq, options.hdf5_buffer_size);
+        for (size_t s = 0; s < nseq; ++s, stream.next()) {
+            auto x = stream.steal();
+            if (collected.find(x) != collected.end()) {
+                throw std::runtime_error("detected duplicated sequence name '" + x + "'");
+            }
+            collected.insert(std::move(x));
+        }
     }
 
-    comservatory::Contents contents;
-    CsvContents output;
-    contents.names.push_back("seqnames");
+    const char* missing_attr_name = "missing-value-placeholder";
+
     {
-        auto ptr = creator->string();
-        output.fields.emplace_back(ptr);
-        contents.fields.emplace_back(new CsvUniqueStringField(0, ptr));
+        auto lhandle = ritsuko::hdf5::open_dataset(ghandle, "length");
+        if (ritsuko::hdf5::exceeds_integer_limit(lhandle, 64, false)) {
+            throw std::runtime_error("expected a datatype for 'length' that fits in a 64-bit unsigned integer");
+        }
+        if (ritsuko::hdf5::get_1d_length(lhandle.getSpace(), false) != nseq) {
+            throw std::runtime_error("expected lengths of 'length' and 'name' to be equal");
+        }
+        if (lhandle.attrExists(missing_attr_name)) {
+            auto ahandle = lhandle.openAttribute(missing_attr_name);
+            ritsuko::hdf5::check_missing_placeholder_attribute(lhandle, ahandle);
+        }
     }
 
-    contents.names.push_back("seqlengths");
     {
-        auto ptr = creator->integer();
-        output.fields.emplace_back(ptr);
-        contents.fields.emplace_back(new CsvNonNegativeIntegerField(1, ptr));
+        auto chandle = ritsuko::hdf5::open_dataset(ghandle, "circular");
+        if (ritsuko::hdf5::exceeds_integer_limit(chandle, 32, true)) {
+            throw std::runtime_error("expected a datatype for 'circular' that fits in a 32-bit signed integer");
+        }
+        if (ritsuko::hdf5::get_1d_length(chandle.getSpace(), false) != nseq) {
+            throw std::runtime_error("expected lengths of 'length' and 'circular' to be equal");
+        }
+        if (chandle.attrExists(missing_attr_name)) {
+            auto ahandle = chandle.openAttribute(missing_attr_name);
+            ritsuko::hdf5::check_missing_placeholder_attribute(chandle, ahandle);
+        }
     }
 
-    contents.names.push_back("isCircular");
-    output.fields.emplace_back(nullptr);
-    contents.fields.emplace_back(creator->boolean());
-
-    contents.names.push_back("genome");
-    output.fields.emplace_back(nullptr);
-    contents.fields.emplace_back(creator->string());
-
-    comservatory::ReadOptions opt;
-    opt.parallel = params.parallel;
-    parse(contents, opt);
-    if (contents.num_records() != params.num_sequences) {
-        throw std::runtime_error("number of records in the CSV file does not match the expected number of ranges");
+    {
+        auto gnhandle = ritsuko::hdf5::open_dataset(ghandle, "genome");
+        if (gnhandle.getTypeClass() != H5T_STRING) {
+            throw std::runtime_error("expected a string datatype class for 'genome'");
+        }
+        if (ritsuko::hdf5::get_1d_length(gnhandle.getSpace(), false) != nseq) {
+            throw std::runtime_error("expected lengths of 'length' and 'genome' to be equal");
+        }
+        if (gnhandle.attrExists(missing_attr_name)) {
+            auto ahandle = gnhandle.openAttribute(missing_attr_name);
+            ritsuko::hdf5::check_missing_placeholder_attribute(gnhandle, ahandle);
+        }
     }
-
-    output.reconstitute(contents.fields);
-    return output;
-}
-/**
- * @endcond
- */
-
-/**
- * Checks if a CSV data frame is correctly formatted for sequence information.
- * An error is raised if the file does not meet the specifications.
- *
- * @tparam Reader A **byteme** reader class.
- *
- * @param reader A stream of bytes from the CSV file.
- * @param params Validation parameters.
- * @param creator Factory to create objects for holding the contents of each CSV field.
- * Defaults to a pointer to a `DummyFieldCreator` instance.
- * 
- * @return Contents of the loaded CSV.
- * Whether the `fields` member actually contains the CSV data depends on `creator`.
- */
-template<class Reader>
-CsvContents validate(Reader& reader, const Parameters& params, CsvFieldCreator* creator = NULL) {
-    return validate_base(
-        [&](comservatory::Contents& contents, const comservatory::ReadOptions& opts) -> void { comservatory::read(reader, contents, opts); },
-        params,
-        creator
-    );
-}
-
-/**
- * Overload of `sequence_information::validate()` that accepts a file path.
- *
- * @param path Path to the CSV file.
- * @param params Validation parameters.
- * @param creator Factory to create objects for holding the contents of each CSV field.
- * Defaults to a pointer to a `DummyFieldCreator` instance.
- * 
- * @return Contents of the loaded CSV.
- */
-inline CsvContents validate(const char* path, const Parameters& params, CsvFieldCreator* creator = NULL) {
-    return validate_base(
-        [&](comservatory::Contents& contents, const comservatory::ReadOptions& opts) -> void { comservatory::read_file(path, contents, opts); },
-        params,
-        creator
-    );
+} catch (std::exception& e) {
+    throw std::runtime_error("failed to validate 'sequence_information' object at '" + path.string() + "'; " + std::string(e.what()));
 }
 
 }
diff --git a/inst/include/takane/string_factor.hpp b/inst/include/takane/string_factor.hpp
index 0b979e7..e8cc512 100644
--- a/inst/include/takane/string_factor.hpp
+++ b/inst/include/takane/string_factor.hpp
@@ -8,7 +8,8 @@
 #include "ritsuko/hdf5/hdf5.hpp"
 
 #include "utils_public.hpp"
-#include "utils_hdf5.hpp"
+#include "utils_string.hpp"
+#include "utils_factor.hpp"
 
 /**
  * @file string_factor.hpp
@@ -28,41 +29,21 @@ namespace string_factor {
  * @param options Validation options, typically for reading performance.
  */
 inline void validate(const std::filesystem::path& path, const Options& options) try {
-    H5::H5File handle((path / "contents.h5").string(), H5F_ACC_RDONLY);
-
-    const char* parent = "string_factor";
-    if (!handle.exists(parent) || handle.childObjType(parent) != H5O_TYPE_GROUP) {
-        throw std::runtime_error("expected a 'string_factor' group");
-    }
-    auto ghandle = handle.openGroup(parent);
+    auto handle = ritsuko::hdf5::open_file(path / "contents.h5");
+    auto ghandle = ritsuko::hdf5::open_group(handle, "string_factor");
 
-    auto vstring = ritsuko::hdf5::load_scalar_string_attribute(ghandle, "version");
+    auto vstring = ritsuko::hdf5::open_and_load_scalar_string_attribute(ghandle, "version");
     auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
     if (version.major != 1) {
         throw std::runtime_error("unsupported version string '" + vstring + "'");
     }
 
-    if (ghandle.attrExists("ordered")) {
-        auto oattr = ritsuko::hdf5::get_scalar_attribute(ghandle, "ordered");
-        if (ritsuko::hdf5::exceeds_integer_limit(oattr, 32, true)) {
-            throw std::runtime_error("expected a datatype for the 'ordered' attribute that fits in a 32-bit signed integer");
-        }
-    }
+    internal_factor::check_ordered_attribute(ghandle);
 
-    // Number of levels.
-    size_t num_levels = internal_hdf5::validate_factor_levels(ghandle, "levels", options.hdf5_buffer_size);
-    size_t num_codes = internal_hdf5::validate_factor_codes(ghandle, "codes", num_levels, options.hdf5_buffer_size);
-
-    if (ghandle.exists("names")) {
-        auto nhandle = ritsuko::hdf5::get_dataset(ghandle, "names");
-        if (nhandle.getTypeClass() != H5T_STRING) {
-            throw std::runtime_error("'names' should be a string datatype class");
-        }
-        auto nlen = ritsuko::hdf5::get_1d_length(nhandle.getSpace(), false);
-        if (num_codes != nlen) {
-            throw std::runtime_error("'names' and 'codes' should have the same length");
-        }
-    }
+    size_t num_levels = internal_factor::validate_factor_levels(ghandle, "levels", options.hdf5_buffer_size);
+    size_t num_codes = internal_factor::validate_factor_codes(ghandle, "codes", num_levels, options.hdf5_buffer_size);
+
+    internal_string::validate_names(ghandle, "names", num_codes, options.hdf5_buffer_size);
 
 } catch (std::exception& e) {
     throw std::runtime_error("failed to validate a 'string_factor' at '" + path.string() + "'; " + std::string(e.what()));
diff --git a/inst/include/takane/takane.hpp b/inst/include/takane/takane.hpp
index f2e11a7..4fede0e 100644
--- a/inst/include/takane/takane.hpp
+++ b/inst/include/takane/takane.hpp
@@ -3,6 +3,7 @@
 
 #include "_validate.hpp"
 #include "_height.hpp"
+#include "_satisfies_interface.hpp"
 
 /**
  * @namespace takane
diff --git a/inst/include/takane/utils_compressed_list.hpp b/inst/include/takane/utils_compressed_list.hpp
new file mode 100644
index 0000000..8f1ab24
--- /dev/null
+++ b/inst/include/takane/utils_compressed_list.hpp
@@ -0,0 +1,96 @@
+#ifndef TAKANE_UTILS_COMPRESSED_LIST_HPP
+#define TAKANE_UTILS_COMPRESSED_LIST_HPP
+
+#include "H5Cpp.h"
+#include "ritsuko/ritsuko.hpp"
+#include "ritsuko/hdf5/hdf5.hpp"
+
+#include <cstdint>
+#include <string>
+#include <stdexcept>
+#include <vector>
+#include <filesystem>
+
+#include "utils_public.hpp"
+#include "utils_string.hpp"
+#include "utils_other.hpp"
+
+namespace takane {
+
+void validate(const std::filesystem::path&, const std::string&, const Options&);
+size_t height(const std::filesystem::path&, const std::string&, const Options&);
+bool satisfies_interface(const std::string&, const std::string&);
+
+namespace internal_compressed_list {
+
+inline hsize_t validate_group(const H5::Group& handle, size_t concatenated_length, hsize_t buffer_size) {
+    auto lhandle = ritsuko::hdf5::open_dataset(handle, "lengths");
+    if (ritsuko::hdf5::exceeds_integer_limit(lhandle, 64, false)) {
+        throw std::runtime_error("expected 'lengths' to have a datatype that fits in a 64-bit unsigned integer");
+    }
+
+    size_t len = ritsuko::hdf5::get_1d_length(lhandle.getSpace(), false);
+    ritsuko::hdf5::Stream1dNumericDataset<int32_t> stream(&lhandle, len, buffer_size);
+    size_t total = 0;
+    for (size_t i = 0; i < len; ++i, stream.next()) {
+        total += stream.get();
+    }
+    if (total != concatenated_length) {
+        throw std::runtime_error("sum of 'lengths' does not equal the height of the concatenated object (got " + std::to_string(total) + ", expected " + std::to_string(concatenated_length) + ")");
+    }
+
+    return len;
+}
+
+template<bool satisfies_interface_>
+void validate_directory(const std::filesystem::path& path, const std::string& object_type, const std::string& concatenated_type, const Options& options) try {
+    auto handle = ritsuko::hdf5::open_file(path / "partitions.h5");
+    auto ghandle = ritsuko::hdf5::open_group(handle, object_type.c_str());
+
+    auto vstring = ritsuko::hdf5::open_and_load_scalar_string_attribute(ghandle, "version");
+    auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
+    if (version.major != 1) {
+        throw std::runtime_error("unsupported version string '" + vstring + "'");
+    }
+
+    auto catdir = path / "concatenated";
+    auto cattype = read_object_type(catdir);
+    if constexpr(satisfies_interface_) {
+        if (!satisfies_interface(cattype, concatenated_type)) {
+            throw std::runtime_error("'concatenated' should satisfy the '" + concatenated_type + "' interface");
+        }
+    } else {
+        if (cattype != concatenated_type) {
+            throw std::runtime_error("'concatenated' should contain an '" + concatenated_type + "' object");
+        }
+    }
+
+    try {
+        ::takane::validate(catdir, cattype, options);
+    } catch (std::exception& e) {
+        throw std::runtime_error("failed to validate the 'concatenated' object; " + std::string(e.what()));
+    }
+    size_t catheight = ::takane::height(catdir, cattype, options);
+
+    size_t len = validate_group(ghandle, catheight, options.hdf5_buffer_size);
+
+    internal_string::validate_names(ghandle, "names", len, options.hdf5_buffer_size);
+    internal_other::validate_mcols(path, "element_annotations", len, options);
+    internal_other::validate_metadata(path, "other_annotations", options);
+
+} catch (std::exception& e) {
+    throw std::runtime_error("failed to validate an '" + object_type + "' object at '" + path.string() + "'; " + std::string(e.what()));
+}
+
+inline size_t height(const std::filesystem::path& path, const std::string& name, [[maybe_unused]] const Options& options) {
+    H5::H5File handle(path / "partitions.h5", H5F_ACC_RDONLY);
+    auto ghandle = handle.openGroup(name);
+    auto dhandle = ghandle.openDataSet("lengths");
+    return ritsuko::hdf5::get_1d_length(dhandle, false);
+}
+
+}
+
+}
+
+#endif
diff --git a/inst/include/takane/utils_factor.hpp b/inst/include/takane/utils_factor.hpp
new file mode 100644
index 0000000..868a05d
--- /dev/null
+++ b/inst/include/takane/utils_factor.hpp
@@ -0,0 +1,86 @@
+#ifndef TAKANE_UTILS_FACTOR_HPP
+#define TAKANE_UTILS_FACTOR_HPP
+
+#include <unordered_set>
+#include <string>
+#include <cstdint>
+#include <vector>
+#include <stdexcept>
+
+#include "ritsuko/ritsuko.hpp"
+#include "ritsuko/hdf5/hdf5.hpp"
+
+namespace takane {
+
+namespace internal_factor {
+
+template<class H5Object_>
+void check_ordered_attribute(const H5Object_& handle) {
+    if (!handle.attrExists("ordered")) {
+        return;
+    }
+
+    auto attr = handle.openAttribute("ordered");
+    if (!ritsuko::hdf5::is_scalar(attr)) {
+        throw std::runtime_error("expected 'ordered' attribute to be a scalar");
+    }
+    if (ritsuko::hdf5::exceeds_integer_limit(attr, 32, true)) {
+        throw std::runtime_error("expected 'ordered' attribute to have a datatype that fits in a 32-bit signed integer");
+    }
+}
+
+inline hsize_t validate_factor_levels(const H5::Group& handle, const std::string& name, hsize_t buffer_size) {
+    auto lhandle = ritsuko::hdf5::open_dataset(handle, name.c_str());
+    if (lhandle.getTypeClass() != H5T_STRING) {
+        throw std::runtime_error("expected a string datatype for '" + name + "'");
+    }
+
+    auto len = ritsuko::hdf5::get_1d_length(lhandle.getSpace(), false);
+    std::unordered_set<std::string> present;
+
+    ritsuko::hdf5::Stream1dStringDataset stream(&lhandle, len, buffer_size);
+    for (hsize_t i = 0; i < len; ++i, stream.next()) {
+        auto x = stream.steal();
+        if (present.find(x) != present.end()) {
+            throw std::runtime_error("'" + name + "' contains duplicated factor level '" + x + "'");
+        }
+        present.insert(std::move(x));
+    }
+
+    return len;
+}
+
+inline hsize_t validate_factor_codes(const H5::Group& handle, const std::string& name, hsize_t num_levels, hsize_t buffer_size, bool allow_missing = true) {
+    auto chandle = ritsuko::hdf5::open_dataset(handle, name.c_str());
+    if (ritsuko::hdf5::exceeds_integer_limit(chandle, 64, false)) {
+        throw std::runtime_error("expected a datatype for '" + name + "' that fits in a 64-bit unsigned integer");
+    }
+
+    bool has_missing = false;
+    int32_t missing_placeholder = 0;
+    if (allow_missing) {
+        auto missingness = ritsuko::hdf5::open_and_load_optional_numeric_missing_placeholder<int32_t>(chandle, "missing-value-placeholder");
+        has_missing = missingness.first;
+        missing_placeholder = missingness.second;
+    }
+
+    auto len = ritsuko::hdf5::get_1d_length(chandle.getSpace(), false);
+    ritsuko::hdf5::Stream1dNumericDataset<int32_t> stream(&chandle, len, buffer_size);
+    for (hsize_t i = 0; i < len; ++i, stream.next()) {
+        auto x = stream.get();
+        if (has_missing && x == missing_placeholder) {
+            continue;
+        }
+        if (static_cast<hsize_t>(x) >= num_levels) {
+            throw std::runtime_error("expected factor codes to be less than the number of levels");
+        }
+    }
+
+    return len;
+}
+
+}
+
+}
+
+#endif
diff --git a/inst/include/takane/utils_hdf5.hpp b/inst/include/takane/utils_hdf5.hpp
deleted file mode 100644
index 5ae35b8..0000000
--- a/inst/include/takane/utils_hdf5.hpp
+++ /dev/null
@@ -1,127 +0,0 @@
-#ifndef TAKANE_UTILS_HDF5_HPP
-#define TAKANE_UTILS_HDF5_HPP
-
-#include <unordered_set>
-#include <string>
-#include <cstdint>
-#include <vector>
-#include <stdexcept>
-
-#include "ritsuko/ritsuko.hpp"
-#include "ritsuko/hdf5/hdf5.hpp"
-
-namespace takane {
-
-namespace internal_hdf5 {
-
-inline void validate_string_format(const H5::DataSet& handle, hsize_t len, const std::string& format, bool has_missing, const std::string& missing_value, hsize_t buffer_size) {
-    if (format == "date") {
-        ritsuko::hdf5::load_1d_string_dataset(
-            handle, 
-            len,
-            buffer_size,
-            [&](size_t, const char* p, size_t l) {
-                std::string x(p, p + l);
-                if (has_missing && missing_value == x) {
-                    return;
-                }
-                if (!ritsuko::is_date(p, l)) {
-                    throw std::runtime_error("expected a date-formatted string (got '" + x + "')");
-                }
-            }
-        );
-
-    } else if (format == "date-time") {
-        ritsuko::hdf5::load_1d_string_dataset(
-            handle, 
-            len,
-            buffer_size,
-            [&](size_t, const char* p, size_t l) {
-                std::string x(p, p + l);
-                if (has_missing && missing_value == x) {
-                    return;
-                }
-                if (!ritsuko::is_rfc3339(p, l)) {
-                    throw std::runtime_error("expected a date/time-formatted string (got '" + x + "')");
-                }
-            }
-        );
-
-    } else if (format != "none") {
-        throw std::runtime_error("unsupported format '" + format + "'");
-    }
-}
-
-inline hsize_t validate_factor_levels(const H5::Group& handle, const std::string& name, hsize_t buffer_size) {
-    auto lhandle = ritsuko::hdf5::get_dataset(handle, name.c_str());
-    if (lhandle.getTypeClass() != H5T_STRING) {
-        throw std::runtime_error("expected a string datatype for '" + name + "'");
-    }
-
-    auto len = ritsuko::hdf5::get_1d_length(lhandle.getSpace(), false);
-    std::unordered_set<std::string> present;
-
-    ritsuko::hdf5::load_1d_string_dataset(
-        lhandle,
-        len,
-        buffer_size,
-        [&](hsize_t, const char* p, size_t len) {
-            std::string x(p, p + len);
-            if (present.find(x) != present.end()) {
-                throw std::runtime_error("'" + name + "' contains duplicated factor level '" + x + "'");
-            }
-            present.insert(std::move(x));
-        }
-    );
-
-    return len;
-}
-
-inline hsize_t validate_factor_codes(const H5::Group& handle, const std::string& name, hsize_t num_levels, hsize_t buffer_size, bool allow_missing = true) {
-    auto chandle = ritsuko::hdf5::get_dataset(handle, name.c_str());
-    if (ritsuko::hdf5::exceeds_integer_limit(chandle, 32, true)) {
-        throw std::runtime_error("expected a datatype for '" + name + "' that fits in a 32-bit signed integer");
-    }
-
-    auto len = ritsuko::hdf5::get_1d_length(chandle.getSpace(), false);
-    auto block_size = ritsuko::hdf5::pick_1d_block_size(chandle.getCreatePlist(), len, buffer_size);
-    std::vector<int32_t> buffer(block_size);
-
-    bool has_missing = false;
-    int32_t missing_placeholder = 0;
-    if (allow_missing) {
-        const char* missing_attr_name = "missing-value-placeholder";
-        has_missing = chandle.attrExists(missing_attr_name);
-        if (has_missing) {
-            auto missing_attr = ritsuko::hdf5::get_missing_placeholder_attribute(chandle, missing_attr_name);
-            missing_attr.read(H5::PredType::NATIVE_INT32, &missing_placeholder);
-        }
-    }
-
-    ritsuko::hdf5::iterate_1d_blocks(
-        len,
-        block_size,
-        [&](hsize_t, hsize_t len, const H5::DataSpace& memspace, const H5::DataSpace& dataspace) {
-            chandle.read(buffer.data(), H5::PredType::NATIVE_INT32, memspace, dataspace);
-            for (hsize_t i = 0; i < len; ++i) {
-                if (has_missing && buffer[i] == missing_placeholder) {
-                    continue;
-                }
-                if (buffer[i] < 0) {
-                    throw std::runtime_error("expected factor codes to be non-negative");
-                }
-                if (static_cast<hsize_t>(buffer[i]) >= num_levels) {
-                    throw std::runtime_error("expected factor codes to be less than the number of levels");
-                }
-            }
-        }
-    );
-
-    return len;
-}
-
-}
-
-}
-
-#endif
diff --git a/inst/include/takane/utils_other.hpp b/inst/include/takane/utils_other.hpp
index 81084ce..bf995a4 100644
--- a/inst/include/takane/utils_other.hpp
+++ b/inst/include/takane/utils_other.hpp
@@ -13,42 +13,45 @@ namespace takane {
  */
 void validate(const std::filesystem::path&, const std::string&, const Options&);
 size_t height(const std::filesystem::path&, const std::string&, const Options&);
+bool satisfies_interface(const std::string&, const std::string&);
 /**
  * @endcond
  */
 
 namespace internal_other {
 
-inline bool ends_with(const std::string& full, const std::string& sub) {
-    return (full.size() >= sub.size() && full.find(sub) == full.size() - sub.size());
-}
-
-inline void validate_mcols(const std::filesystem::path& path, size_t expected, const Options& options) {
+inline void validate_mcols(const std::filesystem::path& parent, const std::string& name, size_t expected, const Options& options) try {
+    auto path = parent / name;
     if (!std::filesystem::exists(path)) {
         return;
     }
 
     auto xtype = read_object_type(path);
-    if (!ends_with(xtype, "data_frame")) {
-        throw std::runtime_error("expected a 'data_frame' or one of its derivatives");
+    if (!satisfies_interface(xtype, "DATA_FRAME")) {
+        throw std::runtime_error("expected an object that satisfies the 'DATA_FRAME' interface");
     }
     ::takane::validate(path, xtype, options);
 
     if (::takane::height(path, xtype, options) != expected) {
         throw std::runtime_error("unexpected number of rows");
     }
+} catch (std::exception& e) {
+    throw std::runtime_error("failed to validate '" + name + "'; " + std::string(e.what()));
 }
 
-inline void validate_metadata(const std::filesystem::path& path, const Options& options) {
+inline void validate_metadata(const std::filesystem::path& parent, const std::string& name, const Options& options) try {
+    auto path = parent / name;
     if (!std::filesystem::exists(path)) {
         return;
     }
 
     auto xtype = read_object_type(path);
-    if (!ends_with(xtype, "simple_list")) {
-        throw std::runtime_error("expected a 'simple_list' or one of its derivatives");
+    if (!satisfies_interface(xtype, "SIMPLE_LIST")) {
+        throw std::runtime_error("expected an object that satisfies the 'SIMPLE_LIST' interface'");
     }
     ::takane::validate(path, xtype, options);
+} catch (std::exception& e) {
+    throw std::runtime_error("failed to validate '" + name + "'; " + std::string(e.what()));
 }
 
 }
diff --git a/inst/include/takane/utils_string.hpp b/inst/include/takane/utils_string.hpp
new file mode 100644
index 0000000..ba862f8
--- /dev/null
+++ b/inst/include/takane/utils_string.hpp
@@ -0,0 +1,88 @@
+#ifndef TAKANE_UTILS_STRING_HPP
+#define TAKANE_UTILS_STRING_HPP
+
+#include <unordered_set>
+#include <string>
+#include <cstdint>
+#include <vector>
+#include <stdexcept>
+
+#include "ritsuko/ritsuko.hpp"
+#include "ritsuko/hdf5/hdf5.hpp"
+
+namespace takane {
+
+namespace internal_string {
+
+template<class H5Object_>
+std::string fetch_format_attribute(const H5Object_& handle) {
+    if (!handle.attrExists("format")) {
+        return "none";
+    }
+
+    auto attr = handle.openAttribute("format");
+    if (!ritsuko::hdf5::is_scalar(attr)) {
+        throw std::runtime_error("expected 'format' attribute to be a scalar");
+    }
+    if (attr.getTypeClass() != H5T_STRING) {
+        throw std::runtime_error("expected 'format' attribute to be a string");
+    }
+    return ritsuko::hdf5::load_scalar_string_attribute(attr);
+}
+
+inline void validate_string_format(const H5::DataSet& handle, hsize_t len, const std::string& format, bool has_missing, const std::string& missing_value, hsize_t buffer_size) {
+    if (format == "date") {
+        ritsuko::hdf5::Stream1dStringDataset stream(&handle, len, buffer_size);
+        for (hsize_t i = 0; i < len; ++i, stream.next()) {
+            auto x = stream.steal();
+            if (has_missing && missing_value == x) {
+                continue;
+            }
+            if (!ritsuko::is_date(x.c_str(), x.size())) {
+                throw std::runtime_error("expected a date-formatted string (got '" + x + "')");
+            }
+        }
+
+    } else if (format == "date-time") {
+        ritsuko::hdf5::Stream1dStringDataset stream(&handle, len, buffer_size);
+        for (hsize_t i = 0; i < len; ++i, stream.next()) {
+            auto x = stream.steal();
+            if (has_missing && missing_value == x) {
+                continue;
+            }
+            if (!ritsuko::is_rfc3339(x.c_str(), x.size())) {
+                throw std::runtime_error("expected a date/time-formatted string (got '" + x + "')");
+            }
+        }
+
+    } else if (format == "none") {
+        ritsuko::hdf5::validate_1d_string_dataset(handle, len, buffer_size);
+
+    } else {
+        throw std::runtime_error("unsupported format '" + format + "'");
+    }
+}
+
+inline void validate_names(const H5::Group& handle, const std::string& name, size_t len, hsize_t buffer_size) {
+    if (!handle.exists(name)) {
+        return;
+    }
+
+    auto nhandle = ritsuko::hdf5::open_dataset(handle, name.c_str());
+    if (nhandle.getTypeClass() != H5T_STRING) {
+        throw std::runtime_error("'" + name + "' should be a string datatype class");
+    }
+
+    auto nlen = ritsuko::hdf5::get_1d_length(nhandle.getSpace(), false);
+    if (len != nlen) {
+        throw std::runtime_error("'" + name + "' should have the same length as the parent object (got " + std::to_string(nlen) + ", expected " + std::to_string(len) + ")");
+    }
+
+    ritsuko::hdf5::validate_1d_string_dataset(nhandle, len, buffer_size);
+}
+
+}
+
+}
+
+#endif
diff --git a/inst/include/uzuki2/Version.hpp b/inst/include/uzuki2/Version.hpp
index 841185a..55ab9dc 100644
--- a/inst/include/uzuki2/Version.hpp
+++ b/inst/include/uzuki2/Version.hpp
@@ -1,9 +1,6 @@
 #ifndef UZUKI2_VERSIONED_BASE_HPP
 #define UZUKI2_VERSIONED_BASE_HPP
 
-#include <string>
-#include <cstring>
-
 /**
  * @file Version.hpp
  * @brief Version-related definitions.
@@ -63,54 +60,6 @@ struct Version {
     }
 };
 
-/**
- * @cond
- */
-inline Version parse_version_string(const std::string& version_string) {
-    int major = 0, minor = 0;
-    size_t i = 0, end = version_string.size();
-
-    if (version_string.empty()) {
-        throw std::runtime_error("version string is empty");
-    }
-    if (version_string[i] == '0') {
-        throw std::runtime_error("invalid version string '" + version_string + "' has leading zeros in its major version");
-    }
-    while (i < end && version_string[i] != '.') {
-        if (!std::isdigit(version_string[i])) {
-            throw std::runtime_error("invalid version string '" + version_string + "' contains non-digit characters");
-        }
-        major *= 10;
-        major += version_string[i] - '0';
-        ++i;
-    }
-
-    if (i == end) {
-        throw std::runtime_error("version string '" + version_string + "' is missing a minor version");
-    }
-    ++i; // get past the period and check again.
-    if (i == end) {
-        throw std::runtime_error("version string '" + version_string + "' is missing a minor version");
-    }
-
-    if (version_string[i] == '0' && i + 1 < end) {
-        throw std::runtime_error("invalid version string '" + version_string + "' has leading zeros in its minor version");
-    }
-    while (i < end) {
-        if (!std::isdigit(version_string[i])) {
-            throw std::runtime_error("invalid version string '" + version_string + "' contains non-digit characters");
-        }
-        minor *= 10;
-        minor += version_string[i] - '0';
-        ++i;
-    }
-
-    return Version(major, minor);
-}
-/**
- * @cond
- */
-
 }
 
 #endif
diff --git a/inst/include/uzuki2/parse_hdf5.hpp b/inst/include/uzuki2/parse_hdf5.hpp
index 200c7b7..80aa040 100644
--- a/inst/include/uzuki2/parse_hdf5.hpp
+++ b/inst/include/uzuki2/parse_hdf5.hpp
@@ -41,18 +41,19 @@ namespace hdf5 {
 /**
  * @cond
  */
-inline H5::DataSet get_scalar_dataset(const H5::Group& handle, const std::string& name, H5T_class_t type_class) try {
-    auto dhandle = ritsuko::hdf5::get_scalar_dataset(handle, name.c_str());
-    if (dhandle.getTypeClass() != type_class) {
-        throw std::runtime_error("dataset has the wrong datatype class");
+inline H5::DataSet check_scalar_dataset(const H5::Group& handle, const char* name) {
+    if (handle.childObjType(name) != H5O_TYPE_DATASET) {
+        throw std::runtime_error("expected '" + std::string(name) + "' to be a dataset");
+    }
+    auto dhandle = handle.openDataSet(name);
+    if (!ritsuko::hdf5::is_scalar(dhandle)) {
+        throw std::runtime_error("expected '" + std::string(name) + "'to be a scalar dataset");
     }
     return dhandle;
-} catch (std::exception& e) {
-    throw std::runtime_error("failed to load scalar dataset at '" + ritsuko::hdf5::get_name(handle) + "/" + name + "'; " + std::string(e.what()));
 }
 
 template<class Host, class Function>
-void parse_integer_like(const H5::DataSet& handle, Host* ptr, Function check, const Version& version) try {
+void parse_integer_like(const H5::DataSet& handle, Host* ptr, Function check, const Version& version, hsize_t buffer_size) try {
     if (ritsuko::hdf5::exceeds_integer_limit(handle, 32, true)) {
         throw std::runtime_error("dataset cannot be represented by 32-bit signed integers");
     }
@@ -65,69 +66,57 @@ void parse_integer_like(const H5::DataSet& handle, Host* ptr, Function check, co
         const char* placeholder_name = "missing-value-placeholder";
         has_missing = handle.attrExists(placeholder_name);
         if (has_missing) {
-            auto attr = ritsuko::hdf5::get_missing_placeholder_attribute(handle, placeholder_name, /* type_class_only = */ version.lt(1, 2));
+            auto attr = handle.openAttribute(placeholder_name);
+            ritsuko::hdf5::check_missing_placeholder_attribute(handle, attr, /* type_class_only = */ version.lt(1, 2));
             attr.read(H5::PredType::NATIVE_INT32, &missing_value);
         }
     }
 
     hsize_t full_length = ptr->size();
-    auto block_size = ritsuko::hdf5::pick_1d_block_size(handle.getCreatePlist(), full_length, /* buffer_size = */ 10000);
-    std::vector<int32_t> buffer(block_size);
-    ritsuko::hdf5::iterate_1d_blocks(
-        full_length,
-        block_size, 
-        [&](hsize_t counter, hsize_t limit, const H5::DataSpace& mspace, const H5::DataSpace& dspace) -> void {
-            handle.read(buffer.data(), H5::PredType::NATIVE_INT32, mspace, dspace);
-            for (hsize_t i = 0; i < limit; ++i) {
-                auto current = buffer[i];
-                if (has_missing && current == missing_value) {
-                    ptr->set_missing(counter + i);
-                } else {
-                    check(current);
-                    ptr->set(counter + i, current);
-                }
-            }
+    ritsuko::hdf5::Stream1dNumericDataset<int32_t> stream(&handle, full_length, buffer_size);
+    for (hsize_t i = 0; i < full_length; ++i, stream.next()) {
+        auto current = stream.get();
+        if (has_missing && current == missing_value) {
+            ptr->set_missing(i);
+        } else {
+            check(current);
+            ptr->set(i, current);
         }
-    );
+    }
+
 } catch (std::exception& e) {
     throw std::runtime_error("failed to load integer dataset at '" + ritsuko::hdf5::get_name(handle) + "'; " + std::string(e.what()));
 }
 
 template<class Host, class Function>
-void parse_string_like(const H5::DataSet& handle, Host* ptr, Function check) try {
+void parse_string_like(const H5::DataSet& handle, Host* ptr, Function check, hsize_t buffer_size) try {
     auto dtype = handle.getDataType();
     if (dtype.getClass() != H5T_STRING) {
         throw std::runtime_error("expected a string dataset");
     }
 
-    const char* placeholder_name = "missing-value-placeholder";
-    bool has_missing = handle.attrExists(placeholder_name);
-    std::string missing_val;
-    if (has_missing) {
-        auto ahandle = ritsuko::hdf5::get_missing_placeholder_attribute(handle, placeholder_name, /* type_class_only = */ true);
-        missing_val = ritsuko::hdf5::load_scalar_string_attribute(ahandle);
-    }
+    auto missingness = ritsuko::hdf5::open_and_load_optional_string_missing_placeholder(handle, "missing-value-placeholder");
+    bool has_missing = missingness.first;
+    std::string missing_val = missingness.second;
 
-    ritsuko::hdf5::load_1d_string_dataset(
-        handle, 
-        ptr->size(), 
-        /* buffer_size = */ 10000,
-        [&](size_t i, const char* str, size_t len) -> void {
-            std::string x(str, str + len);
-            if (has_missing && x == missing_val) {
-                ptr->set_missing(i);
-            } else {
-                check(x);
-                ptr->set(i, std::move(x));
-            }
+    hsize_t full_length = ptr->size();
+    ritsuko::hdf5::Stream1dStringDataset stream(&handle, full_length, buffer_size);
+    for (hsize_t i = 0; i < full_length; ++i, stream.next()) {
+        auto x = stream.steal();
+        if (has_missing && x == missing_val) {
+            ptr->set_missing(i);
+        } else {
+            check(x);
+            ptr->set(i, std::move(x));
         }
-    );
+    }
+
 } catch (std::exception& e) {
     throw std::runtime_error("failed to load string dataset at '" + ritsuko::hdf5::get_name(handle) + "'; " + std::string(e.what()));
 }
 
 template<class Host, class Function>
-void parse_numbers(const H5::DataSet& handle, Host* ptr, Function check, const Version& version) try {
+void parse_numbers(const H5::DataSet& handle, Host* ptr, Function check, const Version& version, hsize_t buffer_size) try {
     if (version.lt(1, 3)) {
         if (handle.getTypeClass() != H5T_FLOAT) {
             throw std::runtime_error("expected a floating-point dataset");
@@ -148,7 +137,8 @@ void parse_numbers(const H5::DataSet& handle, Host* ptr, Function check, const V
         const char* placeholder_name = "missing-value-placeholder";
         has_missing = handle.attrExists(placeholder_name);
         if (has_missing) {
-            auto attr = ritsuko::hdf5::get_missing_placeholder_attribute(handle, placeholder_name, /* type_class_only = */ version.lt(1, 2));
+            auto attr = handle.openAttribute(placeholder_name);
+            ritsuko::hdf5::check_missing_placeholder_attribute(handle, attr, /* type_class_only = */ version.lt(1, 2));
             attr.read(H5::PredType::NATIVE_DOUBLE, &missing_value);
         }
     }
@@ -166,30 +156,23 @@ void parse_numbers(const H5::DataSet& handle, Host* ptr, Function check, const V
     };
 
     hsize_t full_length = ptr->size();
-    auto block_size = ritsuko::hdf5::pick_1d_block_size(handle.getCreatePlist(), full_length, /* buffer_size = */ 10000);
-    std::vector<double> buffer(block_size);
-    ritsuko::hdf5::iterate_1d_blocks(
-        full_length,
-        block_size, 
-        [&](hsize_t counter, hsize_t limit, const H5::DataSpace& mspace, const H5::DataSpace& dspace) -> void {
-            handle.read(buffer.data(), H5::PredType::NATIVE_DOUBLE, mspace, dspace);
-            for (hsize_t i = 0; i < limit; ++i) {
-                auto current = buffer[i];
-                if (has_missing && is_missing_value(current)) {
-                    ptr->set_missing(counter + i);
-                } else {
-                    check(current);
-                    ptr->set(counter + i, current);
-                }
-            }
+    ritsuko::hdf5::Stream1dNumericDataset<double> stream(&handle, full_length, buffer_size);
+    for (hsize_t i = 0; i < full_length; ++i, stream.next()) {
+        auto current = stream.get();
+        if (has_missing && is_missing_value(current)) {
+            ptr->set_missing(i);
+        } else {
+            check(current);
+            ptr->set(i, current);
         }
-    ); 
+    }
+
 } catch (std::exception& e) {
     throw std::runtime_error("failed to load floating-point dataset at '" + ritsuko::hdf5::get_name(handle) + "'; " + std::string(e.what()));
 }
 
 template<class Host>
-void extract_names(const H5::Group& handle, Host* ptr) try {
+void extract_names(const H5::Group& handle, Host* ptr, hsize_t buffer_size) try {
     if (handle.childObjType("names") != H5O_TYPE_DATASET) {
         throw std::runtime_error("expected a dataset");
     }
@@ -206,52 +189,46 @@ void extract_names(const H5::Group& handle, Host* ptr) try {
         throw std::runtime_error("number of names should be equal to the object length");
     }
 
-    ritsuko::hdf5::load_1d_string_dataset(
-        nhandle, 
-        nlen, 
-        /* buffer_size = */ 10000,
-        [&](size_t i, const char* val, size_t len) -> void { 
-            ptr->set_name(i, std::string(val, val + len));
-        }
-    );
+    ritsuko::hdf5::Stream1dStringDataset stream(&nhandle, nlen, buffer_size);
+    for (size_t i = 0; i < nlen; ++i, stream.next()) {
+        ptr->set_name(i, stream.steal());
+    }
 } catch (std::exception& e) {
     throw std::runtime_error("failed to load names at '" + ritsuko::hdf5::get_name(handle) + "'; " + std::string(e.what()));
 }
 
 template<class Provisioner, class Externals>
-std::shared_ptr<Base> parse_inner(const H5::Group& handle, Externals& ext, const Version& version) try {
+std::shared_ptr<Base> parse_inner(const H5::Group& handle, Externals& ext, const Version& version, hsize_t buffer_size) try {
     // Deciding what type we're dealing with.
-    auto object_type = ritsuko::hdf5::load_scalar_string_attribute(handle, "uzuki_object");
+    auto object_type = ritsuko::hdf5::open_and_load_scalar_string_attribute(handle, "uzuki_object");
     std::shared_ptr<Base> output;
 
     if (object_type == "list") {
-        if (!handle.exists("data") || handle.childObjType("data") != H5O_TYPE_GROUP) {
-            throw std::runtime_error("expected a group at 'data'");
-        }
-        auto dhandle = handle.openGroup("data");
+        auto dhandle = ritsuko::hdf5::open_group(handle, "data");
         size_t len = dhandle.getNumObjs();
 
         bool named = handle.exists("names");
         auto lptr = Provisioner::new_List(len, named);
         output.reset(lptr);
 
-        for (size_t i = 0; i < len; ++i) {
-            auto istr = std::to_string(i);
-            if (!dhandle.exists(istr) || dhandle.childObjType(istr) != H5O_TYPE_GROUP) {
-                throw std::runtime_error("expected a group at 'data/" + istr + "'");
+        try {
+            for (size_t i = 0; i < len; ++i) {
+                auto istr = std::to_string(i);
+                auto lhandle = ritsuko::hdf5::open_group(dhandle, istr.c_str());
+                lptr->set(i, parse_inner<Provisioner>(lhandle, ext, version, buffer_size));
             }
-            auto lhandle = dhandle.openGroup(istr);
-            lptr->set(i, parse_inner<Provisioner>(lhandle, ext, version));
+        } catch (std::exception& e) {
+            throw std::runtime_error("failed to parse list contents in 'data'; " + std::string(e.what()));
         }
 
         if (named) {
-            extract_names(handle, lptr);
+            extract_names(handle, lptr, buffer_size);
         }
 
     } else if (object_type == "vector") {
-        auto vector_type = ritsuko::hdf5::load_scalar_string_attribute(handle, "uzuki_type");
+        auto vector_type = ritsuko::hdf5::open_and_load_scalar_string_attribute(handle, "uzuki_type");
 
-        auto dhandle = ritsuko::hdf5::get_dataset(handle, "data");
+        auto dhandle = ritsuko::hdf5::open_dataset(handle, "data");
         size_t len = ritsuko::hdf5::get_1d_length(dhandle.getSpace(), true);
         bool is_scalar = (len == 0);
         if (is_scalar) {
@@ -263,7 +240,7 @@ std::shared_ptr<Base> parse_inner(const H5::Group& handle, Externals& ext, const
         if (vector_type == "integer") {
             auto iptr = Provisioner::new_Integer(len, named, is_scalar);
             output.reset(iptr);
-            parse_integer_like(dhandle, iptr, [](int32_t) -> void {}, version);
+            parse_integer_like(dhandle, iptr, [](int32_t) -> void {}, version, buffer_size);
 
         } else if (vector_type == "boolean") {
             auto bptr = Provisioner::new_Boolean(len, named, is_scalar);
@@ -272,10 +249,10 @@ std::shared_ptr<Base> parse_inner(const H5::Group& handle, Externals& ext, const
                 if (x != 0 && x != 1) {
                      throw std::runtime_error("boolean values should be 0 or 1");
                 }
-            }, version);
+            }, version, buffer_size);
 
         } else if (vector_type == "factor" || (version.equals(1, 0) && vector_type == "ordered")) {
-            auto levhandle = ritsuko::hdf5::get_dataset(handle, "levels");
+            auto levhandle = ritsuko::hdf5::open_dataset(handle, "levels");
             auto levtype = levhandle.getDataType();
             if (levtype.getClass() != H5T_STRING) {
                 throw std::runtime_error("expected a string dataset for the levels at 'levels'");
@@ -286,9 +263,12 @@ std::shared_ptr<Base> parse_inner(const H5::Group& handle, Externals& ext, const
             if (vector_type == "ordered") {
                 ordered = true;
             } else if (handle.exists("ordered")) {
-                auto ohandle = get_scalar_dataset(handle, "ordered", H5T_INTEGER);
-                int tmp_ordered = 0;
-                ohandle.read(&tmp_ordered, H5::PredType::NATIVE_INT);
+                auto ohandle = check_scalar_dataset(handle, "ordered");
+                if (ritsuko::hdf5::exceeds_integer_limit(ohandle, 32, true)) {
+                    throw std::runtime_error("'ordered' value cannot be represented by a 32-bit integer");
+                }
+                int32_t tmp_ordered = 0;
+                ohandle.read(&tmp_ordered, H5::PredType::NATIVE_INT32);
                 ordered = tmp_ordered > 0;
             }
 
@@ -298,22 +278,18 @@ std::shared_ptr<Base> parse_inner(const H5::Group& handle, Externals& ext, const
                 if (x < 0 || x >= levlen) {
                      throw std::runtime_error("factor codes should be non-negative and less than the number of levels");
                 }
-            }, version);
+            }, version, buffer_size);
 
             std::unordered_set<std::string> present;
-            ritsuko::hdf5::load_1d_string_dataset(
-                levhandle, 
-                levlen, 
-                /* buffer_size = */ 10000,
-                [&](size_t i, const char* val, size_t len) -> void { 
-                    std::string x(val, val + len);
-                    if (present.find(x) != present.end()) {
-                        throw std::runtime_error("levels should be unique");
-                    }
-                    fptr->set_level(i, x); 
-                    present.insert(std::move(x));
+            ritsuko::hdf5::Stream1dStringDataset stream(&levhandle, levlen, buffer_size);
+            for (int32_t i = 0; i < levlen; ++i, stream.next()) {
+                auto x = stream.steal();
+                if (present.find(x) != present.end()) {
+                    throw std::runtime_error("levels should be unique");
                 }
-            );
+                fptr->set_level(i, x); 
+                present.insert(std::move(x));
+            }
 
         } else if (vector_type == "string" || (version.equals(1, 0) && (vector_type == "date" || vector_type == "date-time"))) {
             StringVector::Format format = StringVector::NONE;
@@ -323,49 +299,46 @@ std::shared_ptr<Base> parse_inner(const H5::Group& handle, Externals& ext, const
                 } else if (vector_type == "date-time") {
                     format = StringVector::DATETIME;
                 }
+
             } else if (handle.exists("format")) {
-                auto fhandle = get_scalar_dataset(handle, "format", H5T_STRING);
-                ritsuko::hdf5::load_1d_string_dataset(
-                    fhandle, 
-                    1, 
-                    /* buffer_size = */ 10000,
-                    [&](size_t, const char* val, size_t len) -> void {
-                        std::string x(val, val + len);
-                        if (x == "date") {
-                            format = StringVector::DATE;
-                        } else if (x == "date-time") {
-                            format = StringVector::DATETIME;
-                        } else {
-                            throw std::runtime_error("unsupported format '" + x + "'");
-                        }
-                    }
-                );
+                auto fhandle = check_scalar_dataset(handle, "format");
+                if (fhandle.getTypeClass() != H5T_STRING) {
+                    throw std::runtime_error("'format' dataset should have a string datatype class");
+                }
+                auto x = ritsuko::hdf5::load_scalar_string_dataset(fhandle);
+                if (x == "date") {
+                    format = StringVector::DATE;
+                } else if (x == "date-time") {
+                    format = StringVector::DATETIME;
+                } else {
+                    throw std::runtime_error("unsupported format '" + x + "'");
+                }
             }
 
             auto sptr = Provisioner::new_String(len, named, is_scalar, format);
             output.reset(sptr);
             if (format == StringVector::NONE) {
-                parse_string_like(dhandle, sptr, [](const std::string&) -> void {});
+                parse_string_like(dhandle, sptr, [](const std::string&) -> void {}, buffer_size);
 
             } else if (format == StringVector::DATE) {
                 parse_string_like(dhandle, sptr, [&](const std::string& x) -> void {
                     if (!ritsuko::is_date(x.c_str(), x.size())) {
                          throw std::runtime_error("dates should follow YYYY-MM-DD formatting");
                     }
-                });
+                }, buffer_size);
 
             } else if (format == StringVector::DATETIME) {
                 parse_string_like(dhandle, sptr, [&](const std::string& x) -> void {
                     if (!ritsuko::is_rfc3339(x.c_str(), x.size())) {
                          throw std::runtime_error("date-times should follow the Internet Date/Time format");
                     }
-                });
+                }, buffer_size);
             }
 
         } else if (vector_type == "number") {
             auto dptr = Provisioner::new_Number(len, named, is_scalar);
             output.reset(dptr);
-            parse_numbers(dhandle, dptr, [](double) -> void {}, version);
+            parse_numbers(dhandle, dptr, [](double) -> void {}, version, buffer_size);
 
         } else {
             throw std::runtime_error("unknown vector type '" + vector_type + "'");
@@ -373,14 +346,14 @@ std::shared_ptr<Base> parse_inner(const H5::Group& handle, Externals& ext, const
 
         if (named) {
             auto vptr = static_cast<Vector*>(output.get());
-            extract_names(handle, vptr);
+            extract_names(handle, vptr, buffer_size);
         }
 
     } else if (object_type == "nothing") {
         output.reset(Provisioner::new_Nothing());
 
     } else if (object_type == "external") {
-        auto ihandle = ritsuko::hdf5::get_dataset(handle, "index");
+        auto ihandle = ritsuko::hdf5::open_dataset(handle, "index");
         if (ritsuko::hdf5::exceeds_integer_limit(ihandle, 32, true)) {
             throw std::runtime_error("external index at 'index' cannot be represented by a 32-bit signed integer");
         }
@@ -411,12 +384,28 @@ std::shared_ptr<Base> parse_inner(const H5::Group& handle, Externals& ext, const
  * @endcond
  */
 
+/**
+ * @brief Options for HDF5 file parsing.
+ */
+struct Options {
+    /**
+     * Buffer size, in terms of the number of elements, to use for reading data from HDF5 datasets.
+     */
+    hsize_t buffer_size = 10000;
+
+    /**
+     * Whether to throw an error if the top-level R object is not an R list.
+     */
+    bool strict_list = true;
+};
+
 /**
  * @tparam Provisioner A class namespace defining static methods for creating new `Base` objects.
  * @tparam Externals Class describing how to resolve external references for type `EXTERNAL`.
  *
  * @param handle Handle for a HDF5 group corresponding to the list.
  * @param ext Instance of an external reference resolver class.
+ * @param options Optional parameters.
  *
  * @return A `ParsedList` containing a pointer to the root `Base` object.
  * Depending on `Provisioner`, this may contain references to all nested objects. 
@@ -456,16 +445,23 @@ std::shared_ptr<Base> parse_inner(const H5::Group& handle, Externals& ext, const
  * - `size_t size()`, which returns the number of available external references.
  */
 template<class Provisioner, class Externals>
-ParsedList parse(const H5::Group& handle, Externals ext) {
+ParsedList parse(const H5::Group& handle, Externals ext, Options options = Options()) {
     Version version;
     if (handle.attrExists("uzuki_version")) {
-        auto ver_str = ritsuko::hdf5::load_scalar_string_attribute(handle, "uzuki_version");
-        version = parse_version_string(ver_str);
+        auto ver_str = ritsuko::hdf5::open_and_load_scalar_string_attribute(handle, "uzuki_version");
+        auto vraw = ritsuko::parse_version_string(ver_str.c_str(), ver_str.size(), /* skip_patch = */ true);
+        version.major = vraw.major;
+        version.minor = vraw.minor;
     }
 
     ExternalTracker etrack(std::move(ext));
-    auto ptr = parse_inner<Provisioner>(handle, etrack, version);
+    auto ptr = parse_inner<Provisioner>(handle, etrack, version, options.buffer_size);
+
+    if (options.strict_list && ptr->type() != LIST) {
+        throw std::runtime_error("top-level object should represent an R list");
+    }
     etrack.validate();
+
     return ParsedList(std::move(ptr), std::move(version));
 }
 
@@ -476,6 +472,7 @@ ParsedList parse(const H5::Group& handle, Externals ext) {
  * @tparam Provisioner A class namespace defining static methods for creating new `Base` objects.
  *
  * @param handle Handle for a HDF5 group corresponding to the list.
+ * @param options Optional parameters.
  *
  * @return A `ParsedList` containing a pointer to the root `Base` object.
  * Depending on `Provisioner`, this may contain references to all nested objects. 
@@ -483,8 +480,8 @@ ParsedList parse(const H5::Group& handle, Externals ext) {
  * Any invalid representations in `contents` will cause an error to be thrown.
  */
 template<class Provisioner>
-ParsedList parse(const H5::Group& handle) {
-    return parse<Provisioner>(handle, uzuki2::DummyExternals(0));
+ParsedList parse(const H5::Group& handle, Options options = Options()) {
+    return parse<Provisioner>(handle, uzuki2::DummyExternals(0), std::move(options));
 }
 
 /**
@@ -496,6 +493,7 @@ ParsedList parse(const H5::Group& handle) {
  * @param file Path to a HDF5 file.
  * @param name Name of the HDF5 group containing the list in `file`.
  * @param ext Instance of an external reference resolver class.
+ * @param options Optional parameters.
  *
  * @return A `ParsedList` containing a pointer to the root `Base` object.
  * Depending on `Provisioner`, this may contain references to all nested objects. 
@@ -503,9 +501,9 @@ ParsedList parse(const H5::Group& handle) {
  * Any invalid representations in `contents` will cause an error to be thrown.
  */
 template<class Provisioner, class Externals>
-ParsedList parse(const std::string& file, const std::string& name, Externals ext) {
+ParsedList parse(const std::string& file, const std::string& name, Externals ext, Options options = Options()) {
     H5::H5File handle(file, H5F_ACC_RDONLY);
-    return parse<Provisioner>(handle.openGroup(name), std::move(ext));
+    return parse<Provisioner>(ritsuko::hdf5::open_group(handle, name.c_str()), std::move(ext), std::move(options));
 }
 
 /**
@@ -516,6 +514,7 @@ ParsedList parse(const std::string& file, const std::string& name, Externals ext
  *
  * @param file Path to a HDF5 file.
  * @param name Name of the HDF5 group containing the list in `file`.
+ * @param options Optional parameters.
  *
  * @return A `ParsedList` containing a pointer to the root `Base` object.
  * Depending on `Provisioner`, this may contain references to all nested objects. 
@@ -523,9 +522,9 @@ ParsedList parse(const std::string& file, const std::string& name, Externals ext
  * Any invalid representations in `contents` will cause an error to be thrown.
  */
 template<class Provisioner>
-ParsedList parse(const std::string& file, const std::string& name) {
+ParsedList parse(const std::string& file, const std::string& name, Options options = Options()) {
     H5::H5File handle(file, H5F_ACC_RDONLY);
-    return parse<Provisioner>(handle.openGroup(name), uzuki2::DummyExternals(0));
+    return parse<Provisioner>(ritsuko::hdf5::open_group(handle, name.c_str()), uzuki2::DummyExternals(0), std::move(options));
 }
 
 /**
@@ -536,10 +535,11 @@ ParsedList parse(const std::string& file, const std::string& name) {
  * @param name Name of the HDF5 group corresponding to `handle`. 
  * Only used for error messages.
  * @param num_external Expected number of external references. 
+ * @param options Optional parameters.
  */
-inline void validate(const H5::Group& handle, int num_external = 0) {
+inline void validate(const H5::Group& handle, int num_external = 0, Options options = Options()) {
     DummyExternals ext(num_external);
-    parse<DummyProvisioner>(handle, ext);
+    parse<DummyProvisioner>(handle, ext, std::move(options));
     return;
 }
 
@@ -550,10 +550,11 @@ inline void validate(const H5::Group& handle, int num_external = 0) {
  * @param file Path to a HDF5 file.
  * @param name Name of the HDF5 group containing the list in `file`.
  * @param num_external Expected number of external references. 
+ * @param options Optional parameters.
  */
-inline void validate(const std::string& file, const std::string& name, int num_external = 0) {
+inline void validate(const std::string& file, const std::string& name, int num_external = 0, Options options = Options()) {
     DummyExternals ext(num_external);
-    parse<DummyProvisioner>(file, name, ext);
+    parse<DummyProvisioner>(file, name, ext, std::move(options));
     return;
 }
 
diff --git a/inst/include/uzuki2/parse_json.hpp b/inst/include/uzuki2/parse_json.hpp
index b380c92..b7e2fbb 100644
--- a/inst/include/uzuki2/parse_json.hpp
+++ b/inst/include/uzuki2/parse_json.hpp
@@ -395,6 +395,11 @@ struct Options {
      * If true, an extra thread is used to avoid blocking I/O operations.
      */
     bool parallel = false;
+
+    /**
+     * Whether to throw an error if the top-level R object is not an R list.
+     */
+    bool strict_list = true;
 };
 
 /**
@@ -434,14 +439,21 @@ ParsedList parse(byteme::Reader& reader, Externals ext, Options options = Option
             if (vIt->second->type() != millijson::STRING) {
                 throw std::runtime_error("expected a string in 'version'");
             }
-            auto vptr = static_cast<const millijson::String*>(vIt->second.get());
-            version = parse_version_string(vptr->value);
+            const auto& vstr = static_cast<const millijson::String*>(vIt->second.get())->value;
+            auto vraw = ritsuko::parse_version_string(vstr.c_str(), vstr.size(), /* skip_patch = */ true);
+            version.major = vraw.major;
+            version.minor = vraw.minor;
         }
     }
 
     ExternalTracker etrack(std::move(ext));
     auto output = parse_object<Provisioner>(contents.get(), etrack, "", version);
+
+    if (options.strict_list && output->type() != LIST) {
+        throw std::runtime_error("top-level object should represent an R list");
+    }
     etrack.validate();
+
     return ParsedList(std::move(output), std::move(version));
 }
 
diff --git a/tests/testthat/test-DataFrame.R b/tests/testthat/test-DataFrame.R
index b0b9aeb..03b69c8 100644
--- a/tests/testthat/test-DataFrame.R
+++ b/tests/testthat/test-DataFrame.R
@@ -295,7 +295,7 @@ test_that("handling of NAs works correctly", {
 
     fpath <- file.path(tmp2, "basic_columns.h5")
     attrs <- rhdf5::h5readAttributes(fpath, "data_frame/data/2/codes")
-    expect_identical(attrs[["missing-value-placeholder"]], -1L)
+    expect_identical(attrs[["missing-value-placeholder"]], 2L)
     attrs <- rhdf5::h5readAttributes(fpath, "data_frame/data/3/codes")
     expect_null(attrs[["missing-value-placeholder"]])