-
Notifications
You must be signed in to change notification settings - Fork 1
/
splitfastq.py
executable file
·47 lines (35 loc) · 1.06 KB
/
splitfastq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/python
'''
split fastq into chunks based on record counting
i.e. does not require seeking within the file
this version outputs only one chunk, ie it outputs only every nth record
'''
import os,sys
if os.uname()[1] != 'mocedades':
sys.path.append('/ibers/ernie/home/rov/python_lib')
from rjv.fastq import *
usage=\
'''
usage: splitfastq.py <fastqfile[.gz]> <chunks> <offset> > <outputfile>
eg splitfastq.py myreads.fq 10 0
gives every tenth record from the file starting with the first record
eg splitfastq.py myreads.fq 20 1
gives every 20th record from the file starting with the second record
'''
if len(sys.argv) != 4:
print usage
exit()
inpname = sys.argv[1]
chunks = int(sys.argv[2])
offset = int(sys.argv[3])
fout = sys.stdout
newline = '\n'
#for rec in next_fastq_split(inpname,offset,chunks):
ct = 0
for fq in next_fastq(inpname):
if ct % chunks == offset:
fout.write('@' + fq['header'] + newline)
fout.write(fq['seq'] + newline)
fout.write('+' + newline)
fout.write(fq['qual'] + newline)
ct += 1