forked from birdumbrella/nano-ID
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread.extraction.R
65 lines (30 loc) · 1.31 KB
/
read.extraction.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
### prewd ###
prewd = file.path(getwd(),"...")
### libraries ###
library(rhdf5)
### setwd ###
setwd(file.path(prewd,"..."))
### number of cores ###
mc.cores = detectCores()
### basic objects ###
basic.objects = c(ls(),"basic.objects","fast5.file","fast5.files")
### create fast5.files vector ###
fast5.files = list.files(file.path(prewd,"..."),pattern = ".fast5$",full.names = TRUE,recursive = TRUE)
fast5.files = substr(fast5.files,nchar(prewd)+2,nchar(fast5.files))
save(fast5.files,file=file.path("...","fast5.files.RData"))
### extract read sequences and names ###
dir.create(file.path("..."))
read.to.fast5.name = c()
read.sequence.list = list()
for (fast5.file in fast5.files){
try({
h5 = h5read(file.path(prewd,fast5.file),"/Analyses/Basecall_1D_000/BaseCalled_template/Fastq")
read.name = strsplit(strsplit(h5,split = "\n")[[1]][1],split = " ")[[1]][1]
read.to.fast5.name[paste0(substring(read.name,2,nchar(read.name)),"...")] = fast5.file
read.sequence.list[[paste0(substring(read.name,2,nchar(read.name)),"...")]] = gsub("U","T",strsplit(h5,split = "\n")[[1]][2])
},silent = TRUE)
}
save(read.to.fast5.name,file=file.path("...","read.to.fast5.name.RData"))
save(read.sequence.list,file=file.path("...","read.sequence.list.RData"))
rm(list = setdiff(ls(),basic.objects))
gc()