-
Notifications
You must be signed in to change notification settings - Fork 0
/
ScoringMatrix.hpp
211 lines (166 loc) · 6.26 KB
/
ScoringMatrix.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
/*
* Copyright (C) 2006 Cold Spring Harbor Laboratory
* Authors: Andrew D. Smith, Pavel Sumazin and Michael Q. Zhang
*
* This file is part of CREAD.
*
* CREAD is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* CREAD is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with CREAD; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef SCORINGMATRIX_HPP
#define SCORINGMATRIX_HPP
/**
\file ScoringMatrix.hpp
\brief This header file contains class definitions for ScoringMatrix
and the related StadenPValue class, along with associated non-member
function declarations.
*/
#include "Matrix.hpp"
/**
\brief Class to represent log-ratio scoring matrices for motifs.
\class ScoringMatrix
*/
class ScoringMatrix {
public:
/**
\brief Default constructor, width-zero scoring matrix
*/
ScoringMatrix() : matrix(0), width(0) {}
explicit ScoringMatrix(float **m, const size_t w);
/**
\brief Minimal constructor with non-empty initializations
\param mat Matrix object to convert into the ScoringMatrix
\param base_comp float vector of base probabilities
\param correction float to add to matrix entries to prevent log of 0
*/
ScoringMatrix(const Matrix& mat,
const std::vector<float> &base_comp,
float correction = std::numeric_limits<float>::min());
/**
\brief Minimal constructor with non-empty initializations
\param mat Matrix object to convert into the ScoringMatrix
\param base_comp float array of base probabilities
\param correction float to add to matrix entries to prevent log of 0
*/
ScoringMatrix(const Matrix& mat,
const float base_comp[],
float correction = std::numeric_limits<float>::min());
/**
\brief copy constructor; needed because of dynamic allocation
\param other the other scoring matrix to copy
*/
ScoringMatrix(const ScoringMatrix &other);
/**
\brief assignment operator; needed because of dynamic allocation
\param rhs the scoring matrix being assigned
\return reference to a scoring matrix
*/
ScoringMatrix& operator=(const ScoringMatrix &rhs);
/**
\brief destructor; needed because of dynamic allocation
*/
~ScoringMatrix() {delete_matrix(matrix, width);}
/**
\brief swap method; defined for efficiency
\param sm the other scoring matrix with which to swap internals
*/
void swap(ScoringMatrix &sm);
typedef float** pointer;
typedef const float*const* const_pointer;
typedef float*& reference;
typedef const float*const& const_reference;
typedef pointer iterator;
typedef const_pointer const_iterator;
/// Iterator to start of scoring matrix columns.
iterator begin() {return matrix;}
/// Iterator to end of scoring matrix columns.
iterator end() {return matrix + width;}
/// Constant iterator to start of scoring matrix columns.
const_iterator begin() const {return matrix;}
/// Constant iterator to end of scoring matrix columns.
const_iterator end() const {return matrix + width;}
/// Get the n-th column of the scoring matrix
reference operator[](size_t n) {return *(begin() + n);}
/// Get the n-th column of the scoring matrix (as const. ref.)
const_reference operator[](size_t n) const {return *(begin() + n);}
/// Get string representation of the scoring matrix
std::string tostring() const;
// accessors
/// Get the number of columns in the scoring matrix
size_t size() const {return width;}
/// Get the number of columns in the scoring matrix
size_t get_width() const {return width;}
/// Get the reverse complement of the scoring matrix
ScoringMatrix revcomp() const;
/**
\brief convert a score to a functional depth
*/
float functional_depth(const float score) const;
/**
\brief convert a functional depth to a score
*/
float functional_depth_to_score(const float fd) const;
static ScoringMatrix StormoScoringMatrix(const Matrix& mat,
const float *base_comp);
static ScoringMatrix StormoScoringMatrix(const Matrix& mat,
const std::vector<float> &basecomp);
private:
float **matrix;
size_t width;
// Functions to delete matrices (and columns)
template <class T> static void delete_column(T* p) {delete[] p;}
template <class T> static void delete_matrix(T **p, int n) {
std::for_each(p, p + n, delete_column<T>);
delete[] p;
}
static float epsilon() {return 0.005;}
static float default_correction() {return 0.0000000001;}
};
std::ostream&
operator<<(std::ostream& s, const ScoringMatrix &sm);
/**
\brief Helper class to organize and speed calculation of Staden
p-values for scoring matrix matches.
\class StadenPValue
This class is used to implement the calculation of p-values for
scoring matrix matches as described originally by Staden (ref)
*/
class StadenPValue {
public:
StadenPValue(const std::vector<float>& bc, const float sc) :
base_comp(bc), scale(sc) {
used = std::vector<size_t>(default_max_score + 1);
prev_used = std::vector<size_t>(default_max_score + 1);
counts = std::vector<double>(default_max_score + 1);
prev_counts = std::vector<double>(default_max_score + 1);
}
float get_pvalue(ScoringMatrix sm, const float score) const;
float get_score(ScoringMatrix sm, const float pvalue) const;
void get_pvalues(ScoringMatrix sm,
const std::vector<float> &scores,
std::vector<float> &pvalues) const;
void get_scores(ScoringMatrix sm,
const std::vector<float> &pvalue,
std::vector<float> &scores) const;
private:
static const size_t default_max_score = 10000;
mutable std::vector<size_t> used;
mutable std::vector<size_t> prev_used;
mutable std::vector<double> counts;
mutable std::vector<double> prev_counts;
std::vector<float> base_comp;
float scale;
float normalize_matrix(ScoringMatrix& sm) const;
};
#endif