-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfasta_splitter.cpp
216 lines (188 loc) · 5.74 KB
/
fasta_splitter.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#include "fasta_splitter.h"
#include "functions.cuh"
fasta_splitter::fasta_splitter(string fasta_File, string output_Path, string fasta_Name)
{
/**
* * Constructor Function
* Assigns passed variables to the classes' private variable.
**/
cout << "Starting up FASTA SPLITTER" << endl
<< endl;
this->fasta_File = fasta_File;
cout << "Raw FASTA file\t : " << this->fasta_File << endl;
this->output_Folder = output_Path;
/**
* First we convert the FASTA ID to lowercase to prevent user error if they have selected the option to extract all the
* sequences from the merged FASTA file.
**/
string check = fasta_Name;
transform(check.begin(), check.end(), check.begin(), ::tolower);
if (check == "all")
{
this->fasta_Name = check;
}
else
{
if (fasta_Name.at(0) == '>')
{
this->fasta_Name = fasta_Name.substr(1, fasta_Name.length());
}
else
{
this->fasta_Name = fasta_Name;
}
}
cout << endl;
}
void fasta_splitter::ingress()
{
/**
* Execution function.
**/
/**
* Ensures first and foremost that the entered file does exist in the said location.
**/
if (filesystem::exists(this->fasta_File) == 0)
{
/**
* ! Initialised if the merged FASTA file does not exists in the said location.
**/
cout << "INVALID FASTA FILE." << endl
<< "FILE \"" << this->fasta_File << "\" WAS NOT FOUND AT THE LOCATION." << endl;
}
else
{
/**
* Executes respective function based on the split requirement of the user.
**/
if (fasta_Name == "all")
{
split_all();
}
else
{
split_select();
}
}
}
void fasta_splitter::split_select()
{
/**
* This function is used to extract a specific sequence from the merged file.
**/
/**
* Call the "functions" class. Bespoke functions commonly used by CATE.
**/
functions function = functions();
fstream fasta_File;
/**
* @param split_Fasta defines the FASTA file names of the output files.
**/
string split_Fasta;
/**
* @param check defines the sequence ID that needs to be extracted.
**/
string check = ">" + fasta_Name;
fasta_File.open(this->fasta_File, ios::in);
if (fasta_File.is_open())
{
/**
* @param found acts as a boolean variable.
* Once the query sequence is found it is used to trigger the writing of the sequence to the output file.
**/
int found = 0;
cout << "Reading FASTA file and extracting sequence: " << fasta_Name << endl
<< endl;
split_Fasta = this->output_Folder + "/" + fasta_Name + ".fasta";
function.createFile(split_Fasta, check);
fstream output;
output.open(split_Fasta, ios::app);
string line;
while (getline(fasta_File, line))
{
if (line.at(0) == '>')
{
/**
* If found is 1 and another line starts with ">" it means that the query sequence is complete.
* Therefore the file read loop will be broken and the program will end
**/
if (found == 1)
{
break;
}
if (line == check)
{
found = 1;
cout << "WRITING sequence: " << line.substr(1, line.length()) << endl;
}
else
{
cout << "Skipping sequence: " << line.substr(1, line.length()) << endl;
}
}
else
{
/**
* All lines will be written till the next sequence.
**/
if (found == 1)
{
output << line << "\n";
}
}
}
output.close();
fasta_File.close();
}
}
void fasta_splitter::split_all()
{
/**
* This function is used to extract all the sequences from the merged file.
**/
functions function = functions();
fstream fasta_File;
string split_Fasta;
fasta_File.open(this->fasta_File, ios::in);
if (fasta_File.is_open())
{
cout << "Reading and splitting entire FASTA file" << endl
<< endl;
string line;
fstream output;
/**
* @param first acts as a boolean variable.
* Ensures close is not triggered on the first sequence itself.
**/
int first = 0;
while (getline(fasta_File, line))
{
if (line.at(0) == '>')
{
if (first == 1)
{
/**
* Ensures close is not triggered on the first sequence itself.
* Ensures currently written sequence is completed and the next sequence has begun.
* The current sequence's file will be closed.
**/
output.close();
}
first = 1;
cout << "Writing sequence: " << line.substr(1, line.length()) << endl;
split_Fasta = this->output_Folder + "/" + line.substr(1, line.length()) + ".fasta";
function.createFile(split_Fasta, line);
output.open(split_Fasta, ios::app);
}
else
{
// fstream output;
// output.open(split_Fasta, ios::app);
output << line << "\n";
// output.close();
}
}
output.close();
fasta_File.close();
}
}