-
Notifications
You must be signed in to change notification settings - Fork 1
/
paf.hpp
100 lines (93 loc) · 2.31 KB
/
paf.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#include <string>
#include <vector>
#include <zlib.h>
#include "kseq.h"
KSEQ_INIT(gzFile, gzread)
typedef struct
{
void *fp;
kstring_t buf;
} paf_file_t;
typedef struct
{
const char *qn, *tn; // these point to the input string; NOT allocated
uint32_t ql, qs, qe, tl, ts, te; // query len/start/end, target len/start/end
uint32_t ml; // number of residue matches
uint32_t rev; // strand
uint32_t bl; // alignment block length
} paf_rec_t;
paf_file_t *paf_open(const char *fn)
{
kstream_t *ks;
gzFile fp;
paf_file_t *pf;
fp = gzopen(fn, "r");
if (fp == 0)
return 0;
ks = ks_init(fp);
pf = (paf_file_t *)calloc(1, sizeof(paf_file_t));
pf->fp = ks;
return pf;
}
int paf_close(paf_file_t *pf)
{
kstream_t *ks;
if (pf == 0)
return 0;
free(pf->buf.s);
ks = (kstream_t *)pf->fp;
gzclose(ks->f);
ks_destroy(ks);
free(pf);
return 0;
}
int paf_parse(int l, char *s, paf_rec_t *pr) // s must be NULL terminated
{
// on return: <0 for failure; 0 for success; >0 for filtered
char *q, *r;
int i, t;
for (i = t = 0, q = s; i <= l; ++i)
{
if (i < l && s[i] != '\t')
continue;
s[i] = 0;
if (t == 0)
pr->qn = q;
else if (t == 1)
pr->ql = strtol(q, &r, 10);
else if (t == 2)
pr->qs = strtol(q, &r, 10);
else if (t == 3)
pr->qe = strtol(q, &r, 10);
else if (t == 4)
pr->rev = (*q == '-');
else if (t == 5)
pr->tn = q;
else if (t == 6)
pr->tl = strtol(q, &r, 10);
else if (t == 7)
pr->ts = strtol(q, &r, 10);
else if (t == 8)
pr->te = strtol(q, &r, 10);
else if (t == 9)
pr->ml = strtol(q, &r, 10);
else if (t == 10)
pr->bl = strtol(q, &r, 10);
++t, q = i < l ? &s[i + 1] : 0;
}
if (t < 10)
return -1;
return 0;
}
int paf_read(paf_file_t *pf, paf_rec_t *r)
{
int ret;
file_read_more:
ret = ks_getuntil((kstream_t *)pf->fp, KS_SEP_LINE, &pf->buf, 0);
if (ret < 0)
return ret;
ret = paf_parse(pf->buf.l, pf->buf.s, r);
if (ret < 0)
goto file_read_more;
return ret;
}