forked from danny-wilson/kmer_pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
createfullkmerlist.Rscript
executable file
·131 lines (106 loc) · 5.25 KB
/
createfullkmerlist.Rscript
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env Rscript
############################################
# Error handling function #
############################################
# Old function -----------
# options(error = quote({
# dump.frames(to.file=TRUE, dumpto="Rcoredump")
# load("Rcoredump.rda")
# print(Rcoredump)
# q()
# }))
# New function ------------
# Courtesy of https://renkun.me/2020/03/31/a-simple-way-to-show-stack-trace-on-error-in-r/
if(!interactive()) options(error = function() {
calls <- sys.calls()
if (length(calls) >= 2L) {
sink(stderr())
on.exit(sink(NULL))
cat("Backtrace:\n")
calls <- rev(calls[-length(calls)])
for (i in seq_along(calls)) {
cat(i, ": ", deparse(calls[[i]], nlines = 1L), "\n", sep = "")
}
}
coredump = paste0("coredump.",Sys.getpid(),".RData")
if(coredump!="") save.image(coredump)
if (!interactive()) {
q(status = 1)
}
})
help = c("createfullkmerlist.Rscript merge kmers into a list of unique kmers present across the dataset",
"Usage: createfullkmerlist.Rscript task_id n p output_prefix analysis_dir id_file kmer_type kmer_length software_file")
## Read the command line arguments
args = commandArgs(trailingOnly = TRUE)
if(length(args!=0)){
if(args[1]=="-help" | args[1]=="-h"){
cat(help,sep="\n")
q("no")
}
}
if(length(args)!=9) {
cat(help,sep="\n")
cat("Received arguments: ",args,"\n")
stop("\nIncorrect usage\n")
}
###################################################################################################
## Functions and software paths
###################################################################################################
###################################################################################################
start.time = proc.time()[3]
# Initialize variables
process = as.integer(args[1])
n = as.integer(args[2])
p = as.integer(args[3])
output_prefix = as.character(args[4]) # SGE added
output_dir = as.character(args[5]) # SGE added
id_file = as.character(args[6]) # SGE added
kmer_type = tolower(as.character(args[7]))
kmer_length = as.integer(args[8]) # SGE added
software_file = as.character(args[9]) # SGE added
# Check input arguments
if(!file.exists(output_dir)) stop("Error: output directory doesn't exist","\n") # SGE added
if(unlist(strsplit(output_dir,""))[length(unlist(strsplit(output_dir,"")))]!="/") output_dir = paste0(output_dir, "/") # SGE added
if(!file.exists(id_file)) stop("Error: sample ID file doesn't exist","\n") # SGE added
if(kmer_type!="protein" & kmer_type!="nucleotide") stop("Error: kmer type must be either 'protein' or 'nucleotide'","\n")
if(is.na(kmer_length)) stop("Error: kmer length must be an integer","\n")
# Get directory containing the kmers
input_dir = file.path(output_dir, paste0(kmer_type, "kmer",kmer_length), "/")
if(!file.exists(input_dir)) stop(paste0("Error: input directory ", input_dir, " doesn't exist"),"\n") # SGE added
# Read in software file
software_paths = read.table(software_file, h = T, sep = "\t", quote = "")
required_software = c("R","scriptpath")
if(any(is.na(match(required_software, as.character(software_paths$name))))) stop(paste0("Error: missing required software path in the software file - requires ",paste(required_software, collapse = ", ")),"\n")
Rpath = as.character(software_paths$path)[which(toupper(as.character(software_paths$name))=="R")]
Rscriptpath = paste0(Rpath, "script")
if(!file.exists(Rscriptpath)) stop("Error: Rscript path",Rscriptpath,"doesn't exist","\n") # SGE added
script_location = as.character(software_paths$path)[which(tolower(as.character(software_paths$name))=="scriptpath")]
if(!dir.exists(script_location)) stop("Error: script location directory specified in the software paths file doesn't exist","\n")
proteinkmermergescript = file.path(script_location, "proteinkmermerge.Rscript")
if(!file.exists(proteinkmermergescript)) stop("Error: proteinkmermerge.R path doesn't exist - check pipeline script location in the software file","\n")
nucleotidekmermergescript = file.path(script_location, "nucleotidekmermerge.Rscript")
if(!file.exists(nucleotidekmermergescript)) stop("Error: nucleotidekmermerge.R path doesn't exist - check pipeline script location in the software file","\n")
# Report variables
cat("#############################################", "\n")
cat("Running on host: ",system("hostname", intern=TRUE),"\n")
cat("Command line arguments","\n")
cat(args, "\n\n")
cat("Parameters:","\n")
cat("task_id:", process, "\n")
cat("n:", n,"\n")
cat("p:", p,"\n")
cat("Output prefix:", output_prefix,"\n")
cat("Analysis directory:", output_dir,"\n")
cat("ID file path:", id_file,"\n")
cat("Kmer type:", kmer_type,"\n")
cat("Kmer length", kmer_length, "\n")
cat("Software file:", software_file, "\n")
cat("Script location:", script_location, "\n")
cat("Rscript path:", Rscriptpath,"\n")
cat("#############################################", "\n\n")
if(kmer_type =="protein"){
stopifnot(system(paste0(Rscriptpath, " ", proteinkmermergescript, " ", n, " ", p, " ", output_prefix, " ", input_dir, " ", output_dir, " ", id_file, " ", kmer_length, " ", software_file, " ", process))==0)
}
if(kmer_type =="nucleotide"){
stopifnot(system(paste0(Rscriptpath, " ", nucleotidekmermergescript, " ", n, " ", p, " ", output_prefix, " ", input_dir, " ", output_dir, " ", id_file, " ", kmer_length, " ", process))==0)
}