-
Notifications
You must be signed in to change notification settings - Fork 0
/
Matrix.hpp
177 lines (137 loc) · 4.95 KB
/
Matrix.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/*
* Copyright (C) 2006 Cold Spring Harbor Laboratory
* Authors: Andrew D. Smith, Pavel Sumazin and Michael Q. Zhang
*
* This file is part of CREAD.
*
* CREAD is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* CREAD is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with CREAD; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef MATRIX_HPP
#define MATRIX_HPP
#include <smithlab_utils.hpp>
/**
\file Matrix.hpp
\brief This header file contains class definitions for Matrix, along
with associated non-member function declarations.
*/
/**
\brief Represents position-weight matrices for DNA sequence motifs.
\class Matrix
*/
class Matrix {
public:
/**
\brief Basic constructor
\param mat array of columns, each a float array of size
equal to alphabet_size (4 for DNA).
\param w the number of columns in the array.
*/
Matrix(float **mat = 0, size_t w = 0);
/*!
\brief Constructor that parses string set representation.
\param lines_from_file an ordered set of lines, one for each
column of the matrix
*/
explicit Matrix(std::vector<std::string>& lines_from_file);
/// Copy constructor
Matrix(const Matrix& original);
/// Assignment operator
Matrix& operator=(const Matrix& lhs);
/// Destructor
~Matrix();
/// Swap method
void swap(Matrix &other);
typedef float** pointer;
typedef const float *const * const_pointer;
typedef float*& reference;
typedef const float *const & const_reference;
typedef pointer iterator;
typedef const_pointer const_iterator;
/// Iterator to start of matrix columns.
iterator begin() {return matrix;}
/// Iterator to end of matrix columns.
iterator end() {return matrix + width;}
/// Constant iterator to start of matrix columns.
const_iterator begin() const {return matrix;}
/// Constant iterator to end of matrix columns.
const_iterator end() const {return matrix + width;}
/// Get the n-th column of the matrix
reference operator[](size_t n) {return *(begin() + n);}
/// Get the n-th column of the matrix (const. ref.)
const_reference operator[](size_t n) const {return *(begin() + n);}
/// Get string representation of the matrix
std::string tostring() const;
/// Get the number of columns in the matrix
size_t get_width() const {return width;}
// mutators
/*!
\brief Add a scaled pseudocount to each matrix entry based on
specified base composition.
The pseudocount at an entry will be determined by the base
composition (specific to each base), and the weight given to
pseudocounts in general relative to real counts.
\param base_comp frequencies of bases
\param weight value multiplied with the base frequency to obtain
the pseudocount
*/
void base_comp_pseudocount(const std::vector<float> &base_comp,
float weight = 1.0);
/*!
\brief Add a unit value to each entry of the matrix
*/
void Laplace_pseudocount();
// TODO: Whoa!! very bad stuff below...
const float **at(size_t n) const {return (const float **)(matrix + n);}
/*!
\brief Copy of matrix, with columns normalized to have unit sum.
\return Copy of matrix, with columns normalized to have unit sum.
*/
Matrix freqmat() const;
/*!
\brief Copy of matrix, with columns normalized to have unit sum,
and a base composition pseudocount applied.
\return Pseudocount-corrected and normalized copy of the matrix.
*/
Matrix corrected_freqmat(float []) const;
/// Get the reverse complement of the matrix
Matrix revcomp() const;
/// test if the matrix entries are counts or frequencies
bool is_count_mat() const;
/// Total information content of the matrix
float GetInformationContent(float []) const;
/// Total information content of the matrix
float GetInformationContent(std::vector<float> &f) const;
/// Total information content of the matrix
float info(const std::vector<float> &) const;
/// Total information content of the matrix
float info(const float []) const;
static Matrix combine(const Matrix& a, const Matrix& b,
int max_overhang = 0);
private:
float **matrix;
size_t width;
friend class MatCompMethods;
};
std::ostream&
operator<<(std::ostream& s, const Matrix& mat);
/*!
\exception MatrixException
\brief Used for reporting errors when handeling matrices.
*/
class MatrixException : public SMITHLABException {
public:
MatrixException(std::string m = "") throw() : SMITHLABException(m) {}
};
#endif