Skip to content

Commit

Permalink
Add Bitvector serialization functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed Nov 12, 2024
1 parent 2740d4c commit d049f22
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 2 deletions.
22 changes: 20 additions & 2 deletions lib/genesis/utils/math/bitvector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@
namespace genesis {
namespace utils {

// =================================================================================================
// Forward Declarations
// =================================================================================================

class Deserializer;

// =================================================================================================
// Bitvector
// =================================================================================================
Expand Down Expand Up @@ -85,7 +91,7 @@ class Bitvector
}

// reserve enough bits, and init them.
data_.resize( (size / IntSize) + (size % IntSize == 0 ? 0 : 1) );
data_.resize( get_vector_size( size ));
set_all(initial_value);
}

Expand Down Expand Up @@ -148,6 +154,8 @@ class Bitvector
Bitvector& operator= (Bitvector const&) = default;
Bitvector& operator= (Bitvector&&) = default;

friend Deserializer& operator>>( Deserializer& deserializer, Bitvector& bv );

// ---------------------------------------------------------
// Single Bit Functions
// ---------------------------------------------------------
Expand Down Expand Up @@ -398,6 +406,16 @@ class Bitvector
return data_;
}

/**
* @brief For a given numer of bits, compute the size of the internally used vector.
*
* This is mostly meant as a helper for data operatins such as serialization and deserialization.
*/
static size_t get_vector_size( size_t bit_size )
{
return (bit_size / IntSize) + (bit_size % IntSize == 0 ? 0 : 1);
}

// ---------------------------------------------------------
// Internal Members
// ---------------------------------------------------------
Expand Down Expand Up @@ -471,7 +489,7 @@ struct BitvectorHash
};

/**
* @brief Helper structer that yields the x_hash of a given Bitvector.
* @brief Helper structure that yields the x_hash of a given Bitvector.
*
* It is meant to be used in containers such as `unordered_set` or `unordered_map`
* that can make use of custom hash functions for the key objects. Using this class instead
Expand Down
40 changes: 40 additions & 0 deletions lib/genesis/utils/math/bitvector/operators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,46 @@ std::istream& operator >> ( std::istream& in, Bitvector& bv )
return in;
}

Serializer& operator<<( Serializer& serializer, Bitvector const& bv )
{
// We write the size in number of bits first.
// Then, the data serialization will additionally store the size of the underlying vector
// that is used in the Bitvector, which is a bit of overhead, but we live with that for now.
serializer << bv.size();
serializer << bv.data();
return serializer;
}

Deserializer& operator>>( Deserializer& deserializer, Bitvector& bv )
{
// This funciton is a friend of the Bitvector class, so that we can write to its data directly.
// Otherwise, we'd need special constructors etc, which is a bit cumbersome.
deserializer >> bv.size_;
deserializer >> bv.data_;

// Now that we have read the data, check that it is valid.
// First, we check the sizes, and then we check that the last bits are already unset,
// and do not contain any stray set bits that would indicate wrong usage or serialization.
size_t const expected_size = Bitvector::get_vector_size( bv.size_ );
if( bv.data_.size() != expected_size ) {
throw std::invalid_argument(
"Cannot deserialize Bitvector of expected vector size " + std::to_string( expected_size ) +
" with actual vector size " + std::to_string( bv.data_.size() )
);
}
if( bv.data_.size() > 0 ) {
auto const back = bv.data_.back();
bv.unset_padding_();
if( bv.data_.back() != back ) {
throw std::invalid_argument(
"Invalid (de)serialization of Bitvector where last bits after the actual size were set"
);
}
}

return deserializer;
}

std::vector<bool> make_bool_vector_from_indices( std::vector<size_t> const& indices, size_t size )
{
// Get the largest element of the vector. If it's empty, we return an all-false vector.
Expand Down
32 changes: 32 additions & 0 deletions lib/genesis/utils/math/bitvector/operators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
*/

#include "genesis/utils/math/bitvector.hpp"
#include "genesis/utils/io/deserializer.hpp"
#include "genesis/utils/io/serializer.hpp"

#include <cstddef>
#include <iosfwd>
Expand Down Expand Up @@ -233,6 +235,36 @@ std::ostream& operator << ( std::ostream& out, Bitvector const& bv );
*/
std::istream& operator >> ( std::istream& in, Bitvector& bv );

/**
* @brief Serialize a Bitvector to a binary target.
*/
Serializer& operator<<( Serializer& serializer, Bitvector const& bv );

/**
* @brief Deserialize a Bitvector from a binary source.
*/
Deserializer& operator>>( Deserializer& deserializer, Bitvector& bv );

/**
* @brief Get the size in the binary output of a serialized Bitvector using Serializer,
* given the number of bits being serialized.
*/
inline size_t serialized_bitvector_size( size_t bit_size )
{
// We need its size in bits, its vector size, as well as the data itself.
size_t total = 2 * sizeof( size_t );
total += Bitvector::get_vector_size( bit_size ) * sizeof( Bitvector::IntType );
return total;
}

/**
* @brief Get the size in the binary output of a serialized Bitvector using Serializer.
*/
inline size_t serialized_bitvector_size( Bitvector const& bv )
{
return serialized_bitvector_size( bv.size() );
}

/**
* @brief Helper function to create a bool vector from a set of indices to be set to `true`.
*
Expand Down
41 changes: 41 additions & 0 deletions test/src/utils/math/bitvector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@

#include "src/common.hpp"

#include "genesis/utils/io/deserializer.hpp"
#include "genesis/utils/io/serializer.hpp"
#include "genesis/utils/math/bitvector.hpp"
#include "genesis/utils/math/bitvector/operators.hpp"
#include "genesis/utils/math/random.hpp"
Expand Down Expand Up @@ -629,3 +631,42 @@ TEST( Bitvector, FindFirstLastSet )
}
}
}

TEST( Bitvector, Serialization )
{
std::srand(std::time(nullptr));

// We test that a container of bitvectors also works, and internally test
// different sizes that are either exact boundaries, or some arbitrary values.
std::vector<Bitvector> bvs;
bvs.push_back( make_random_bitvector_( 42 ));
bvs.push_back( make_random_bitvector_( 0 ));
bvs.push_back( make_random_bitvector_( 512 ));
bvs.push_back( make_random_bitvector_( 710 ));

// Serialize
std::ostringstream out;
Serializer serial( to_stream( out ));
serial << bvs;
auto const out_str = out.str();

// Test that the string has the correct size.
// This is a size_t for the outer std::vector, and then for each internal bitvector,
// we need its size in bits, its vector size, as well as the data itself.
size_t total = sizeof( size_t );
for( auto const& bv : bvs ) {
total += serialized_bitvector_size( bv );
// total += 2 * sizeof( size_t );
// total += Bitvector::get_vector_size( bv.size() ) * sizeof( Bitvector::IntType );
}
EXPECT_EQ( out_str.size(), total );

// Deserialize again
std::istringstream in( out_str );
Deserializer deser( from_stream( in ));
std::vector<Bitvector> bvs_deser;
deser >> bvs_deser;

// Finally, compare
EXPECT_EQ( bvs_deser, bvs );
}

0 comments on commit d049f22

Please sign in to comment.