Skip to content

Commit

Permalink
Refactor Bitvector to use more free functions
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed Dec 8, 2024
1 parent 11a822e commit b52958b
Show file tree
Hide file tree
Showing 21 changed files with 1,300 additions and 1,010 deletions.
4 changes: 2 additions & 2 deletions lib/genesis/population/format/bed_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ GenomeLocusSet BedReader::read_as_genome_locus_set(
auto const chr_names = result.chromosome_names();
for( auto const& chr_name : chr_names ) {
auto& bv = result.chromosome_positions( chr_name );
auto const last_bit_idx = bv.find_last_set();
auto const last_bit_idx = find_last_set( bv );
if( last_bit_idx == Bitvector::npos ) {
bv = Bitvector( 1 );
} else {
Expand Down Expand Up @@ -113,7 +113,7 @@ GenomeLocusSet BedReader::read_as_genome_locus_set(

// Use the seq dict to resize the bitvector to the desired length.
auto& bv = result.chromosome_positions( chr_name );
auto const last_bit_idx = bv.find_last_set();
auto const last_bit_idx = find_last_set( bv );
if( last_bit_idx == Bitvector::npos ) {
// Empty chr in bed. Should not really be able to happen, as that means there was not
// an entry in the input to begin with, but let's catch it anyway.
Expand Down
2 changes: 1 addition & 1 deletion lib/genesis/population/function/genome_locus_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ GenomeLocusSet read_mask_fasta(
// If we invert, we do that here at the end. We could also switch in the set function
// above, but it's easier to do this in bulk. We need to unset the first bit then.
if( invert ) {
bv.negate();
negate(bv);
bv.unset(0);
}

Expand Down
5 changes: 4 additions & 1 deletion lib/genesis/population/function/window_average.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
#include "genesis/population/variant.hpp"
#include "genesis/population/window/base_window.hpp"
#include "genesis/population/window/window_view.hpp"
#include "genesis/utils/math/bitvector.hpp"
#include "genesis/utils/math/bitvector/functions.hpp"
#include "genesis/utils/math/bitvector/operators.hpp"

#include <cassert>
#include <limits>
Expand Down Expand Up @@ -217,7 +220,7 @@ inline size_t get_window_provided_loci_count(

// Finally, we have checked everything. Our first and last position are both inclusive,
// while the bitvector count uses past-the-end, so we need to add one here for the last.
return bv.count( first, last + 1 );
return pop_count( bv, first, last + 1 );
};

// If the window is a WindowStream over a whole genome, we use all its chromosomes.
Expand Down
5 changes: 3 additions & 2 deletions lib/genesis/population/genome_locus_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

#include "genesis/population/genome_locus_set.hpp"

#include "genesis/utils/math/bitvector/functions.hpp"
#include "genesis/utils/math/bitvector/operators.hpp"

#include <cassert>
Expand Down Expand Up @@ -90,7 +91,7 @@ void GenomeLocusSet::add(
assert( bv.size() >= end + 1 );

// Now set all bits in between the two positions, inclusive.
bv.set( start, end + 1 );
bv.set_range( start, end + 1 );
// for( size_t i = start; i <= end; ++i ) {
// bv.set( i );
// }
Expand Down Expand Up @@ -203,7 +204,7 @@ void GenomeLocusSet::set_intersect( GenomeLocusSet const& rhs )
// so remove it from the to-delete list. If all its bits are 0, we have eliminated
// all positions from the filter, so we might as well delete the whole vector; in that
// case, we simply keept it in the to-delete list and then it gets removed below.
if( lhs_bits.count() > 0 ) {
if( pop_count(lhs_bits) > 0 ) {
chrs_to_delete.erase( chr.first );
}
}
Expand Down
5 changes: 3 additions & 2 deletions lib/genesis/population/genome_locus_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "genesis/population/genome_region_list.hpp"
#include "genesis/sequence/sequence_dict.hpp"
#include "genesis/utils/math/bitvector.hpp"
#include "genesis/utils/math/bitvector/functions.hpp"

namespace genesis {
namespace population {
Expand Down Expand Up @@ -386,7 +387,7 @@ class GenomeLocusSet

// We do not need to to an extra check for position 0 here.
// If it is true, then so will be the result.
return bv.any_set();
return any_set( bv );
}

// -------------------------------------------------------------------------
Expand Down Expand Up @@ -422,7 +423,7 @@ class GenomeLocusSet

// If the above is not the case, check the actual start_position.
// If the start_position is outside of the bitvector, it is not covered, obviously.
return bitvector.find_next_set( start_position );
return find_next_set( bitvector, start_position );
}

/**
Expand Down
7 changes: 5 additions & 2 deletions lib/genesis/sequence/functions/functions.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
Genesis - A toolkit for working with phylogenetic data.
Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
Copyright (C) 2014-2024 Lucas Czech
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -35,6 +35,9 @@
#include "genesis/sequence/printers/simple.hpp"

#include "genesis/utils/core/logging.hpp"
#include "genesis/utils/math/bitvector.hpp"
#include "genesis/utils/math/bitvector/functions.hpp"
#include "genesis/utils/math/bitvector/operators.hpp"
#include "genesis/utils/text/string.hpp"
#include "genesis/utils/text/style.hpp"

Expand Down Expand Up @@ -189,7 +192,7 @@ void remove_sites( Sequence& seq, utils::Bitvector sites )
);
}

auto const num_sites = sites.size() - sites.count();
auto const num_sites = sites.size() - pop_count(sites);
std::string result;
result.reserve( num_sites );

Expand Down
5 changes: 3 additions & 2 deletions lib/genesis/tree/bipartition/bipartition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

/*
Genesis - A toolkit for working with phylogenetic data.
Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
Copyright (C) 2014-2024 Lucas Czech
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -34,6 +34,7 @@
#include "genesis/tree/tree.hpp"
#include "genesis/tree/tree/subtree.hpp"
#include "genesis/utils/math/bitvector.hpp"
#include "genesis/utils/math/bitvector/functions.hpp"

#include <stdexcept>

Expand Down Expand Up @@ -96,7 +97,7 @@ class Bipartition

void invert()
{
leaf_nodes_.negate();
negate(leaf_nodes_);
link_ = &link_->outer();
}

Expand Down
12 changes: 6 additions & 6 deletions lib/genesis/tree/bipartition/functions.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
Genesis - A toolkit for working with phylogenetic data.
Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
Copyright (C) 2014-2024 Lucas Czech
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -249,18 +249,18 @@ Bipartition find_smallest_subtree(

auto const inverted = ~(bip.leaf_nodes());
if( utils::is_subset( comp, bip.leaf_nodes() )) {
if( min_count == 0 || bip.leaf_nodes().count() < min_count ) {
if( min_count == 0 || pop_count(bip.leaf_nodes()) < min_count ) {
best_bip = bip;
min_count = best_bip.leaf_nodes().count();
min_count = pop_count(best_bip.leaf_nodes());
}
}
if( utils::is_subset( comp, inverted ) ) {
if( min_count == 0 || inverted.count() < min_count ) {
if( min_count == 0 || pop_count(inverted) < min_count ) {
best_bip = bip;
best_bip.invert();

assert( best_bip.leaf_nodes().count() == inverted.count() );
min_count = best_bip.leaf_nodes().count();
assert( pop_count( best_bip.leaf_nodes() ) == pop_count( inverted ) );
min_count = pop_count( best_bip.leaf_nodes() );
}
}
}
Expand Down
8 changes: 5 additions & 3 deletions lib/genesis/tree/bipartition/rf.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
Genesis - A toolkit for working with phylogenetic data.
Copyright (C) 2014-2019 Lucas Czech
Copyright (C) 2014-2024 Lucas Czech
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -34,6 +34,8 @@
#include "genesis/tree/function/functions.hpp"
#include "genesis/tree/iterator/postorder.hpp"
#include "genesis/utils/core/algorithm.hpp"
#include "genesis/utils/math/bitvector/functions.hpp"
#include "genesis/utils/math/bitvector/operators.hpp"

#include <cstdint>
#include <algorithm>
Expand Down Expand Up @@ -201,14 +203,14 @@ void rf_get_bitvectors_template(

// Call the bitvector processor functor now, as we just finished constructing a split.
// We normalize first to make sure that we always get comparable bitvectors in the end.
current.normalize();
normalize(current);
process_bitvector( current );
}
}

// We have traversed all node names now. If there is still an unset bit in the bitvector,
// that means that we did not find all names that are in the tree.
if( name_check.count() != names.size() ) {
if( pop_count(name_check) != names.size() ) {
throw std::runtime_error(
"Cannot calculate RF distance with trees that have different node names. "
"Some names are missing from one of the trees."
Expand Down
1 change: 1 addition & 0 deletions lib/genesis/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@
#include "genesis/utils/io/string_output_target.hpp"
#include "genesis/utils/math/binomial.hpp"
#include "genesis/utils/math/bit.hpp"
#include "genesis/utils/math/bitvector/functions.hpp"
#include "genesis/utils/math/bitvector.hpp"
#include "genesis/utils/math/bitvector/operators.hpp"
#include "genesis/utils/math/common.hpp"
Expand Down
29 changes: 29 additions & 0 deletions lib/genesis/utils/math/bit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@
#include "genesis/utils/core/std.hpp"

#include <array>
#include <climits>
#include <cstdint>
#include <string>
#include <type_traits>

#if GENESIS_CPP_STD >= GENESIS_CPP_STD_20
Expand All @@ -53,6 +55,33 @@
namespace genesis {
namespace utils {

// ================================================================================================
// Print
// ================================================================================================

// See genesis/utils/text/string.hpp
// /**
// * @brief Print the bits in an int as a string.
// */
// template <typename T>
// std::string to_bit_string( T value, bool with_byte_spaces = true )
// {
// static_assert(
// std::is_unsigned<T>::value, "Template parameter must be an unsigned integer type."
// );
// static_assert( CHAR_BIT == 8, "CHAR_BIT != 8" );
//
// std::string res = "";
// T const one = 1;
// for( size_t i = 0; i < sizeof(T) * 8; ++i ) {
// res += (( value & (one << i)) ? "1" : "0");
// if( with_byte_spaces && (i+1) % 8 == 0 ) {
// res += " ";
// }
// }
// return res;
// }

// ================================================================================================
// Pop Count
// ================================================================================================
Expand Down
Loading

0 comments on commit b52958b

Please sign in to comment.