forked from ekg/smithwaterman
-
Notifications
You must be signed in to change notification settings - Fork 0
/
BandedSmithWaterman.h
111 lines (101 loc) · 4.44 KB
/
BandedSmithWaterman.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#pragma once
#include <iostream>
#include <algorithm>
#include <memory>
//#include "Alignment.h"
#include "Mosaik.h"
//#include "HashRegion.h"
#include <string.h>
#include <stdio.h>
#include <sstream>
#include <string>
using namespace std;
#define MOSAIK_NUM_NUCLEOTIDES 26
#define GAP '-'
typedef unsigned char DirectionType;
typedef unsigned char PositionType;
struct ElementInfo {
unsigned int Direction : 2;
unsigned int mSizeOfVerticalGaps : 15;
unsigned int mSizeOfHorizontalGaps : 15;
};
class CBandedSmithWaterman {
public:
// constructor
CBandedSmithWaterman(float matchScore, float mismatchScore, float gapOpenPenalty, float gapExtendPenalty, unsigned int bandWidth);
// destructor
~CBandedSmithWaterman(void);
// aligns the query sequence to the anchor using the Smith Waterman Gotoh algorithm
void Align(unsigned int& referenceAl, string& stringAl, const string& s1, const string& s2, pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> >& hr);
// enables homo-polymer scoring
void EnableHomoPolymerGapPenalty(float hpGapOpenPenalty);
private:
// calculates the score during the forward algorithm
float CalculateScore(const string& s1, const string& s2, const unsigned int rowNum, const unsigned int columnNum, float& currentQueryGapScore, const unsigned int rowOffset, const unsigned int columnOffset);
// creates a simple scoring matrix to align the nucleotides and the ambiguity code N
void CreateScoringMatrix(void);
// corrects the homopolymer gap order for forward alignments
void CorrectHomopolymerGapOrder(const unsigned int numBases, const unsigned int numMismatches);
// returns the maximum floating point number
static inline float MaxFloats(const float& a, const float& b, const float& c);
// reinitializes the matrices
void ReinitializeMatrices(const PositionType& positionType, const unsigned int& s1Length, const unsigned int& s2Length, const pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> > hr);
// performs the backtrace algorithm
void Traceback(unsigned int& referenceAl, string& stringAl, const string& s1, const string& s2, unsigned int bestRow, unsigned int bestColumn, const unsigned int rowOffset, const unsigned int columnOffset);
// updates the best score during the forward algorithm
inline void UpdateBestScore(unsigned int& bestRow, unsigned int& bestColumn, float& bestScore, const unsigned int rowNum, const unsigned int columnNum, const float score);
// our simple scoring matrix
float mScoringMatrix[MOSAIK_NUM_NUCLEOTIDES][MOSAIK_NUM_NUCLEOTIDES];
// keep track of maximum initialized sizes
unsigned int mCurrentMatrixSize;
unsigned int mCurrentAnchorSize;
unsigned int mCurrentAQSumSize;
unsigned int mBandwidth;
// define our backtrace directions
const static DirectionType Directions_STOP;
const static DirectionType Directions_LEFT;
const static DirectionType Directions_DIAGONAL;
const static DirectionType Directions_UP;
// store the backtrace pointers
ElementInfo* mPointers;
// define our position types
const static PositionType Position_REF_AND_QUERY_ZERO;
const static PositionType Position_REF_ZERO;
const static PositionType Position_QUERY_ZERO;
const static PositionType Position_REF_AND_QUERO_NONZERO;
// define scoring constants
const float mMatchScore;
const float mMismatchScore;
const float mGapOpenPenalty;
const float mGapExtendPenalty;
// score if xi aligns to a gap after yi
float* mAnchorGapScores;
// best score of alignment x1...xi to y1...yi
float* mBestScores;
// our reversed alignment
char* mReversedAnchor;
char* mReversedQuery;
// define static constants
static const float FLOAT_NEGATIVE_INFINITY;
// toggles the use of the homo-polymer gap open penalty
bool mUseHomoPolymerGapOpenPenalty;
float mHomoPolymerGapOpenPenalty;
};
// returns the maximum floating point number
inline float CBandedSmithWaterman::MaxFloats(const float& a, const float& b, const float& c) {
float max = 0.0f;
if(a > max) max = a;
if(b > max) max = b;
if(c > max) max = c;
return max;
}
// updates the best score during the forward algorithm
inline void CBandedSmithWaterman::UpdateBestScore(unsigned int& bestRow, unsigned int& bestColumn, float& bestScore, const unsigned int rowNum, const unsigned int columnNum, const float score) {
//const unsigned int row = rowNum + rowOffset;
//const unsigned int column = columnOffset - rowNum + columnNum;
if(score > bestScore) {
bestRow = rowNum;
bestColumn = columnNum;
bestScore = score;
}
}