forked from esherm/intSiteCaller
-
Notifications
You must be signed in to change notification settings - Fork 5
/
read_psl_files.R
25 lines (22 loc) · 1 KB
/
read_psl_files.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#' read psl gz files, assuming psl gz files don't have column header
#' @param pslFile character vector of file name(s)
#' @param toNull character vector of column names to get rid of
#' @return data.frame, data.table of the psl table
#' @example
readpsl <- function(pslFile, toNull=NULL) {
stopifnot(require("data.table"))
cols <- c("matches", "misMatches", "repMatches", "nCount", "qNumInsert",
"qBaseInsert", "tNumInsert", "tBaseInsert", "strand", "qName",
"qSize", "qStart", "qEnd", "tName", "tSize", "tStart", "tEnd",
"blockCount", "blockSizes", "qStarts", "tStarts")
cols.class <- c(rep("numeric",8), rep("character",2), rep("numeric",3),
"character", rep("numeric",4), rep("character",3))
psl <- lapply(pslFile, function(f) {
message("Reading ",f)
data.table::fread( paste("zcat", f), sep="\t" )
})
psl <- data.table::rbindlist(psl)
colnames(psl) <- cols
if(length(toNull)>0) psl[, toNull] <- NULL
return(as.data.frame(psl))
}