Skip to content

Commit

Permalink
Fix alignment of larger sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
rvaser committed Nov 3, 2020
1 parent 6324d80 commit 07d77bb
Show file tree
Hide file tree
Showing 9 changed files with 273 additions and 152 deletions.
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.9)

project(spoa VERSION 4.0.4
project(spoa VERSION 4.0.5
LANGUAGES CXX
DESCRIPTION "Spoa is a c++ library (and tool) for SIMD vectorized partial order alignment.")

Expand Down Expand Up @@ -61,7 +61,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC
target_link_libraries(${PROJECT_NAME}
cereal)
if (BUILD_SHARED_LIBS)
set_property(TARGET ${PROJECT_NAME} PROPERTY SOVERSION = "6.0.0")
set_property(TARGET ${PROJECT_NAME} PROPERTY SOVERSION = "7.0.0")
endif ()

if (spoa_generate_dispatch)
Expand All @@ -77,6 +77,8 @@ if (spoa_generate_dispatch)
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/vendor/simde>
$<INSTALL_INTERFACE:include>)
target_link_libraries(${PROJECT_NAME}_${arch}
cereal)
set_target_properties(${PROJECT_NAME}_${arch} PROPERTIES
COMPILE_FLAGS "-m${arch}")
if (BUILD_SHARED_LIBS)
Expand Down
8 changes: 6 additions & 2 deletions include/spoa/alignment_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class AlignmentEngine {

virtual void Prealloc(
std::uint32_t max_sequence_len,
std::uint32_t alphabet_size) = 0;
std::uint8_t alphabet_size) = 0;

Alignment Align(
const std::string& sequence,
Expand All @@ -66,7 +66,7 @@ class AlignmentEngine {
virtual Alignment Align(
const char* sequence, std::uint32_t sequence_len,
const Graph& graph,
std::int32_t* score = nullptr) noexcept = 0;
std::int32_t* score = nullptr) = 0;

protected:
AlignmentEngine(
Expand All @@ -79,6 +79,10 @@ class AlignmentEngine {
std::int8_t q,
std::int8_t c);

std::int64_t WorstCaseAlignmentScore(
std::int64_t sequence_len,
std::int64_t graph_len) const;

AlignmentType type_;
AlignmentSubtype subtype_;
std::int8_t m_;
Expand Down
11 changes: 11 additions & 0 deletions src/alignment_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,15 @@ Alignment AlignmentEngine::Align(
return Align(sequence.c_str(), sequence.size(), graph, score);
}

std::int64_t AlignmentEngine::WorstCaseAlignmentScore(
std::int64_t i,
std::int64_t j) const {
auto gap_score = [&] (std::int64_t len) -> std::int64_t {
return len == 0 ? 0 : std::min(g_ + (len - 1) * e_, q_ + (len - 1) * c_);
};
return std::min(
-1 * (m_ * std::min(i, j) + gap_score(std::abs(i - j))),
gap_score(i) + gap_score(j));
}

} // namespace spoa
24 changes: 20 additions & 4 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ std::unique_ptr<bioparser::Parser<biosoup::Sequence>> CreateParser(
str.compare(str.size() - suff.size(), suff.size(), suff) == 0;
};


if (is_suffix(path, ".fasta") || is_suffix(path, ".fasta.gz") ||
is_suffix(path, ".fna") || is_suffix(path, ".fna.gz") ||
is_suffix(path, ".faa") || is_suffix(path, ".faa.gz") ||
Expand Down Expand Up @@ -263,18 +262,35 @@ int main(int argc, char** argv) {
for (const auto& it : sequences) {
max_sequence_len = std::max(max_sequence_len, it->data.size());
}
alignment_engine->Prealloc(max_sequence_len, 4);
try {
alignment_engine->Prealloc(max_sequence_len, 4);
} catch (std::invalid_argument& exception) {
std::cerr << exception.what() << std::endl;
return 1;
}

spoa::Graph graph{};
std::vector<bool> is_reversed;
for (const auto& it : sequences) {
std::int32_t score = 0;
auto alignment = alignment_engine->Align(it->data, graph, &score);
spoa::Alignment alignment;
try {
alignment = alignment_engine->Align(it->data, graph, &score);
} catch (std::invalid_argument& exception) {
std::cerr << exception.what() << std::endl;
return 1;
}

if (is_strand_ambiguous) {
it->ReverseAndComplement();
std::int32_t score_rev = 0;
auto alignment_rev = alignment_engine->Align(it->data, graph, &score_rev);
spoa::Alignment alignment_rev;
try {
alignment_rev = alignment_engine->Align(it->data, graph, &score_rev);
} catch (std::invalid_argument& exception) {
std::cerr << exception.what() << std::endl;
return 1;
}
if (score >= score_rev) {
it->ReverseAndComplement();
is_reversed.push_back(false);
Expand Down
22 changes: 11 additions & 11 deletions src/simd_alignment_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@ class SimdAlignmentEngine: public AlignmentEngine {

void Prealloc(
std::uint32_t max_sequence_len,
std::uint32_t alphabet_size) override;
std::uint8_t alphabet_size) override;

Alignment Align(
const char* sequence, std::uint32_t sequence_len,
const Graph& graph,
std::int32_t* score) noexcept override;
std::int32_t* score) override;

friend std::unique_ptr<AlignmentEngine> CreateSimdAlignmentEngine(
AlignmentType type,
Expand All @@ -78,34 +78,34 @@ class SimdAlignmentEngine: public AlignmentEngine {

template<typename T>
Alignment Linear(
const char* sequence, std::uint32_t sequence_len,
std::uint32_t sequence_len,
const Graph& graph,
std::int32_t* score) noexcept;

template<typename T>
Alignment Affine(
const char* sequence, std::uint32_t sequence_len,
std::uint32_t sequence_len,
const Graph& graph,
std::int32_t* score) noexcept;

template<typename T>
Alignment Convex(
const char* sequence, std::uint32_t sequence_len,
std::uint32_t sequence_len,
const Graph& graph,
std::int32_t* score) noexcept;

void Realloc(
std::uint32_t matrix_width,
std::uint32_t matrix_height,
std::uint32_t num_codes);
std::uint64_t matrix_width,
std::uint64_t matrix_height,
std::uint8_t num_codes);

template<typename T>
void Initialize(
const char* sequence,
const Graph& graph,
std::uint32_t normal_matrix_width,
std::uint32_t matrix_width,
std::uint32_t matrix_height) noexcept;
std::uint64_t normal_matrix_width,
std::uint64_t matrix_width,
std::uint64_t matrix_height) noexcept;

struct Implementation;
std::unique_ptr<Implementation> pimpl_;
Expand Down
Loading

0 comments on commit 07d77bb

Please sign in to comment.