diff --git a/lib/genesis/utils/math/bitvector.hpp b/lib/genesis/utils/math/bitvector.hpp index 4a6e8251..10a746dd 100644 --- a/lib/genesis/utils/math/bitvector.hpp +++ b/lib/genesis/utils/math/bitvector.hpp @@ -43,6 +43,12 @@ namespace genesis { namespace utils { +// ================================================================================================= +// Forward Declarations +// ================================================================================================= + +class Deserializer; + // ================================================================================================= // Bitvector // ================================================================================================= @@ -85,7 +91,7 @@ class Bitvector } // reserve enough bits, and init them. - data_.resize( (size / IntSize) + (size % IntSize == 0 ? 0 : 1) ); + data_.resize( get_vector_size( size )); set_all(initial_value); } @@ -148,6 +154,8 @@ class Bitvector Bitvector& operator= (Bitvector const&) = default; Bitvector& operator= (Bitvector&&) = default; + friend Deserializer& operator>>( Deserializer& deserializer, Bitvector& bv ); + // --------------------------------------------------------- // Single Bit Functions // --------------------------------------------------------- @@ -398,6 +406,16 @@ class Bitvector return data_; } + /** + * @brief For a given numer of bits, compute the size of the internally used vector. + * + * This is mostly meant as a helper for data operatins such as serialization and deserialization. + */ + static size_t get_vector_size( size_t bit_size ) + { + return (bit_size / IntSize) + (bit_size % IntSize == 0 ? 0 : 1); + } + // --------------------------------------------------------- // Internal Members // --------------------------------------------------------- @@ -471,7 +489,7 @@ struct BitvectorHash }; /** - * @brief Helper structer that yields the x_hash of a given Bitvector. + * @brief Helper structure that yields the x_hash of a given Bitvector. * * It is meant to be used in containers such as `unordered_set` or `unordered_map` * that can make use of custom hash functions for the key objects. Using this class instead diff --git a/lib/genesis/utils/math/bitvector/operators.cpp b/lib/genesis/utils/math/bitvector/operators.cpp index 27748014..a8d70a62 100644 --- a/lib/genesis/utils/math/bitvector/operators.cpp +++ b/lib/genesis/utils/math/bitvector/operators.cpp @@ -330,6 +330,46 @@ std::istream& operator >> ( std::istream& in, Bitvector& bv ) return in; } +Serializer& operator<<( Serializer& serializer, Bitvector const& bv ) +{ + // We write the size in number of bits first. + // Then, the data serialization will additionally store the size of the underlying vector + // that is used in the Bitvector, which is a bit of overhead, but we live with that for now. + serializer << bv.size(); + serializer << bv.data(); + return serializer; +} + +Deserializer& operator>>( Deserializer& deserializer, Bitvector& bv ) +{ + // This funciton is a friend of the Bitvector class, so that we can write to its data directly. + // Otherwise, we'd need special constructors etc, which is a bit cumbersome. + deserializer >> bv.size_; + deserializer >> bv.data_; + + // Now that we have read the data, check that it is valid. + // First, we check the sizes, and then we check that the last bits are already unset, + // and do not contain any stray set bits that would indicate wrong usage or serialization. + size_t const expected_size = Bitvector::get_vector_size( bv.size_ ); + if( bv.data_.size() != expected_size ) { + throw std::invalid_argument( + "Cannot deserialize Bitvector of expected vector size " + std::to_string( expected_size ) + + " with actual vector size " + std::to_string( bv.data_.size() ) + ); + } + if( bv.data_.size() > 0 ) { + auto const back = bv.data_.back(); + bv.unset_padding_(); + if( bv.data_.back() != back ) { + throw std::invalid_argument( + "Invalid (de)serialization of Bitvector where last bits after the actual size were set" + ); + } + } + + return deserializer; +} + std::vector make_bool_vector_from_indices( std::vector const& indices, size_t size ) { // Get the largest element of the vector. If it's empty, we return an all-false vector. diff --git a/lib/genesis/utils/math/bitvector/operators.hpp b/lib/genesis/utils/math/bitvector/operators.hpp index 5508873d..b75bca75 100644 --- a/lib/genesis/utils/math/bitvector/operators.hpp +++ b/lib/genesis/utils/math/bitvector/operators.hpp @@ -32,6 +32,8 @@ */ #include "genesis/utils/math/bitvector.hpp" +#include "genesis/utils/io/deserializer.hpp" +#include "genesis/utils/io/serializer.hpp" #include #include @@ -233,6 +235,36 @@ std::ostream& operator << ( std::ostream& out, Bitvector const& bv ); */ std::istream& operator >> ( std::istream& in, Bitvector& bv ); +/** + * @brief Serialize a Bitvector to a binary target. + */ +Serializer& operator<<( Serializer& serializer, Bitvector const& bv ); + +/** + * @brief Deserialize a Bitvector from a binary source. + */ +Deserializer& operator>>( Deserializer& deserializer, Bitvector& bv ); + +/** + * @brief Get the size in the binary output of a serialized Bitvector using Serializer, + * given the number of bits being serialized. + */ +inline size_t serialized_bitvector_size( size_t bit_size ) +{ + // We need its size in bits, its vector size, as well as the data itself. + size_t total = 2 * sizeof( size_t ); + total += Bitvector::get_vector_size( bit_size ) * sizeof( Bitvector::IntType ); + return total; +} + +/** + * @brief Get the size in the binary output of a serialized Bitvector using Serializer. + */ +inline size_t serialized_bitvector_size( Bitvector const& bv ) +{ + return serialized_bitvector_size( bv.size() ); +} + /** * @brief Helper function to create a bool vector from a set of indices to be set to `true`. * diff --git a/test/src/utils/math/bitvector.cpp b/test/src/utils/math/bitvector.cpp index 2cc4b721..67770dd0 100644 --- a/test/src/utils/math/bitvector.cpp +++ b/test/src/utils/math/bitvector.cpp @@ -30,6 +30,8 @@ #include "src/common.hpp" +#include "genesis/utils/io/deserializer.hpp" +#include "genesis/utils/io/serializer.hpp" #include "genesis/utils/math/bitvector.hpp" #include "genesis/utils/math/bitvector/operators.hpp" #include "genesis/utils/math/random.hpp" @@ -629,3 +631,42 @@ TEST( Bitvector, FindFirstLastSet ) } } } + +TEST( Bitvector, Serialization ) +{ + std::srand(std::time(nullptr)); + + // We test that a container of bitvectors also works, and internally test + // different sizes that are either exact boundaries, or some arbitrary values. + std::vector bvs; + bvs.push_back( make_random_bitvector_( 42 )); + bvs.push_back( make_random_bitvector_( 0 )); + bvs.push_back( make_random_bitvector_( 512 )); + bvs.push_back( make_random_bitvector_( 710 )); + + // Serialize + std::ostringstream out; + Serializer serial( to_stream( out )); + serial << bvs; + auto const out_str = out.str(); + + // Test that the string has the correct size. + // This is a size_t for the outer std::vector, and then for each internal bitvector, + // we need its size in bits, its vector size, as well as the data itself. + size_t total = sizeof( size_t ); + for( auto const& bv : bvs ) { + total += serialized_bitvector_size( bv ); + // total += 2 * sizeof( size_t ); + // total += Bitvector::get_vector_size( bv.size() ) * sizeof( Bitvector::IntType ); + } + EXPECT_EQ( out_str.size(), total ); + + // Deserialize again + std::istringstream in( out_str ); + Deserializer deser( from_stream( in )); + std::vector bvs_deser; + deser >> bvs_deser; + + // Finally, compare + EXPECT_EQ( bvs_deser, bvs ); +}