-
Notifications
You must be signed in to change notification settings - Fork 0
/
psl2gtf.c
74 lines (67 loc) · 1.83 KB
/
psl2gtf.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
/**
**
** Author: Nadia Davidson, [email protected]
** Modified: 26 October 2016
**/
#include <iostream>
#include <fstream>
#include <istream>
#include <string>
#include <sstream>
#include <map>
#include <stdlib.h>
#include <vector>
#include <algorithm>
#include "misc.h"
using namespace std;
/**
* Take a psl file from blat and report the gtf *
**/
// the real stuff starts here.
int main(int argc, char **argv){
if(argc!=2){
cout << "Wrong number of arguments" << endl;
cout << endl;
cout << "Usage: psl3gtf <psl>" << endl;
exit(1);
}
ifstream file;
//Open the exons file
file.open(argv[1]);
if(!(file.good())){
cout << "Unable to open file " << argv[1] << endl;
exit(1);
}
// reading the blat table
string line;
string column;
string gene_id;
string trans_id;
string strand;
for(int line_skip=5; line_skip!=0 && getline(file,line) ; line_skip--); //skip the first line
while(getline(file,line) ){
istringstream line_stream(line);
for(int col_skip=8; col_skip!=0 && (line_stream >> column); col_skip--);
line_stream >> strand;
line_stream >> trans_id;
for(int col_skip=3; col_skip!=0 && (line_stream >> column); col_skip--);
line_stream >> gene_id;
for(int col_skip=4; col_skip!=0 && (line_stream >> column); col_skip--);
line_stream >> column;
vector<int> junc_size = get_vector_from_list(column);
line_stream >> column;
line_stream >> column;
vector<int> junc_start = get_vector_from_list(column);
for(int i=0; i < junc_size.size() ; i++){
cout << gene_id << "\t"
<< "psl2gtf\texon\t"
<< junc_start.at(i)+1 << "\t"
<< junc_start.at(i)+junc_size.at(i) << "\t"
<< ".\t" << strand << "\t.\t"
<< "gene_id \"" << gene_id << "\"; "
<< "transcript_id \"" << trans_id << "\";"
<< endl;
}
}
file.close();
}