-
Notifications
You must be signed in to change notification settings - Fork 0
/
fix.R
73 lines (71 loc) · 3.3 KB
/
fix.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
junction.support = function (reads, junctions = NULL, bwa = NULL, ref = NULL, pad = 500,
pad.ref = pad * 20, realign = TRUE, walks = NULL, verbose = TRUE,
...)
{
if (!inherits(reads, "GRanges") || is.null(reads$qname) ||
is.null(reads$cigar) || is.null(reads$seq) || is.null(reads$flag))
stop("read input must be GRanges with fields $qname, $cigar, $seq, $flag and optionally $AS")
if (!is.null(junctions))
walks = jJ(junctions$grl)$gw(pad = pad)
if (is.null(walks))
stop("Either walks or junctions must be provided")
if (!realign) {
if (is.null(junctions))
junctions = walks$edges$junctions
reads = gr.flipstrand(reads)
reads$R1 = bamUtils::bamflag(reads$flag)[, "isFirstMateRead"] >
0
r1 = reads %Q% (R1 == TRUE) %>% as.data.table
r2 = reads %Q% (R1 == FALSE) %>% as.data.table
ov = merge(r1, r2, by = "qname")
sl = seqlengths(reads)
grl = grl.pivot(GRangesList(dt2gr(ov[, .(seqnames = seqnames.x,
start = start.x, end = end.x, strand = strand.x)],
seqlengths = sl), dt2gr(ov[, .(seqnames = seqnames.y,
start = start.y, end = end.y, strand = strand.y)],
seqlengths = sl)))
values(grl)$qname = ov$qname
jn = merge(jJ(grl), junctions, cartesian = TRUE, pad = pad)
if (!length(jn))
return(reads[c()])
## out = merge(as.data.table(reads), unique(jn$dt[, .(qname,
## junction.id = subject.id)]), by = "qname") %>% dt2gr(seqlengths = sl)
out = dt2gr(setDT(dplyr::inner_join(as.data.frame(reads),
as.data.frame(unique(jn$dt[, .(qname, junction.id = subject.id)])), by = "qname")), seqlengths = sl)
return(out)
}
if (inherits(bwa, "character") && file.exists(bwa)) {
if (verbose)
message("Loading BWA index")
bwa = BWA(bwa)
}
if (!inherits(ref, "DNAStringSet")) {
if (verbose)
message("Loading genome reference as DNAStringSet")
ref = rtracklayer::import(bwa@reference)
}
names(ref) = strsplit(names(ref), "\\s+") %>% sapply("[",
1)
if (length(setdiff(seqnames(walks$nodes$gr), seqlevels(ref))))
stop("seqlevels mismatch between junctions / walks and reference, please check reference (e.g. chr issues)")
if (length(setdiff(seqnames(walks$nodes$gr), seqlevels(bwa))))
stop("seqlevels mismatch between junctions / walks and BWA reference, please check reference (e.g. chr issues)")
if (verbose)
message("Building and mapping derivative contigs")
contig = bwa[ref[gr.fix(walks$grl, ref, drop = TRUE)]]
if (verbose)
message("Building reference contigs flanking junctions")
contigref = ref[gr.fix(walks$edges$junctions$footprint +
pad.ref, ref, drop = TRUE)]
if (verbose)
message("Making gChain mapping contigs to reference")
cg.contig = gChain::cgChain(contig)
if (verbose)
message("Running contig support")
reads = contig.support(reads, contig, ref = contigref, cg.contig = cg.contig,
...)
reads$junction.id = reads$contig.id
return(reads)
}
overwritefun("junction.support", "junction.support", "skitools")
junction.support = skitools::junction.support