-
Notifications
You must be signed in to change notification settings - Fork 1
/
manifest
263 lines (263 loc) · 11.5 KB
/
manifest
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
#Salmon.Alevin.Quant
#Tue Aug 24 20:03:55 UTC 2021
JVMLevel=
LSID=urn\:lsid\:genepattern.org\:module.analysis\:00418\:0.7
author=Anthony S. Castanza;UCSD-MesirovLab
categories=rna-seq;single-cell
commandLine=<libdir>salmon_alevin_quant.sh -b <Barcodes> -q <Reads> -i <Transcriptome.Index> -c <Chemistry> -l <Library.Type> -t <TgMap> -z <Output.Basename> -u <GTF.Gene.ID.Type> -w <Whitelist> -m <mtGenes> -n <mtGenes.From.GTF> -r <rRNA> -s <rRNA.From.GTF> -x <dumpMTX> -e <expectCells> -f <forceCells> -d <dumpFeatures> -a <numCellBootstraps> -j <job.cpuCount>
cpuType=any
description=Quantification and analysis of 3' tagged-end single-cell sequencing data using Salmon's Alevin (v1.8.0) function.<br>\n\nSee\: <a href\="https\://salmon.readthedocs.io/en/latest/alevin.html" target\="_blank">The Alevin Documentation</a>\nfor detailed usage guidelines.
documentationUrl=
fileFormat=.gz;.tar.gz;gz
job.docker.image=genepattern/salmon-alevin-quant\:beta
job.memory=16Gb
language=any
name=Salmon.Alevin.Quant
os=any
p10_MODE=IN
p10_TYPE=FILE
p10_default_value=
p10_description=The list of mitochondrial genes which are to be used as a feature for cellular barcode whitelising naive Bayes classification.\n<br>\nNote\: \n\nIt is generally advisable to not use nuclear mitrochondrial genes in this as they can be both up and/or down regulated which might cancel out the usefulness of this feature. Please check issue \#367 in salmon repo to know more about it.
p10_fileFormat=
p10_flag=-m
p10_name=mtGenes
p10_numValues=0..1
p10_optional=on
p10_prefix=
p10_prefix_when_specified=
p10_type=java.io.File
p10_value=
p11_MODE=
p11_TYPE=TEXT
p11_default_value=FALSE
p11_description=If supplying a GTF file to automatically construct the transcript to gene map (TgMap parameter), automatically construct the mtGenes list from the GTF directly.\n<br>\nNote\: if set to true, this will override the list supplied to the "mtGenes" parameter.\n<br>\n<b>This requires a GTF file to be supplied to "TgMap"</b>
p11_fileFormat=
p11_flag=-n
p11_name=mtGenes.From.GTF
p11_numValues=1..1
p11_optional=
p11_prefix=
p11_prefix_when_specified=
p11_type=java.lang.String
p11_value=TRUE\=true;FALSE\=false
p12_MODE=IN
p12_TYPE=FILE
p12_default_value=
p12_description=The list of ribosomal genes which are to be used as a feature for cellular barcode whitelising naive Bayes classification.\n<br>
p12_fileFormat=
p12_flag=-r
p12_name=rRNA
p12_numValues=0..1
p12_optional=on
p12_prefix=
p12_prefix_when_specified=
p12_type=java.io.File
p12_value=
p13_MODE=
p13_TYPE=TEXT
p13_default_value=FALSE
p13_description=If supplying a GTF file to automatically construct the transcript to gene map (TgMap parameter), automatically construct the rRNA list from the GTF directly.\n<br>\nNote\: if set to true, this will override the list supplied to the "rRNA" parameter.\n<br>\n<b>This requires a GTF file to be supplied to "TgMap"</b>
p13_fileFormat=
p13_flag=-s
p13_name=rRNA.From.GTF
p13_numValues=1..1
p13_optional=
p13_prefix=
p13_prefix_when_specified=
p13_type=java.lang.String
p13_value=TRUE\=true;FALSE\=false
p14_MODE=
p14_TYPE=TEXT
p14_default_value=TRUE
p14_description=Convert the default binary format of alevin for gene-count matrix into a human readable mtx (matrix market exchange) sparse format. Generates a new output called quants_mat.mtx in the compressed output folder.
p14_fileFormat=
p14_flag=-x
p14_name=dumpMTX
p14_numValues=1..1
p14_optional=
p14_prefix=
p14_prefix_when_specified=
p14_type=java.lang.String
p14_value=TRUE\=true;FALSE\=false
p15_MODE=
p15_TYPE=Integer
p15_default_value=0
p15_description=This command line flag uses the cellranger type white-listing procedure. As specified in their algorithm overview page, "All barcodes whose total UMI counts exceed m/10 are called as cells", where m is the frequency of the top 1% cells as specified by the parameter of this command line flag.\n<br>\nNote\: Setting this parameter to "0" disables this option and, unless forceCells is specified, instead uses the default Alevin knee calculation heuristics.
p15_fileFormat=
p15_flag=-e
p15_name=expectCells
p15_numValues=1..1
p15_optional=
p15_prefix=
p15_prefix_when_specified=
p15_range=0+
p15_type=java.lang.Integer
p15_value=
p16_MODE=
p16_TYPE=Integer
p16_default_value=0
p16_description=By default, Alevin performs a heuristic based initial cellular barcode white-listing by finding the knee in the distribution of the barcode frequency. With this flag, by looking at the barcode frequency distribution, a user can explicitly specify the number of CB to consider for initial white-listing.\n<br>\nNote\: Setting this parameter to "0" disables this option and, unless expectCells is specified, instead uses the default Alevin knee calculation heuristics.
p16_fileFormat=
p16_flag=-f
p16_name=forceCells
p16_numValues=1..1
p16_optional=
p16_prefix=
p16_prefix_when_specified=
p16_range=0+
p16_type=java.lang.Integer
p16_value=
p17_MODE=
p17_TYPE=TEXT
p17_default_value=TRUE
p17_description=If activated, alevin dumps all the features used by the CB classification and their counts at each cell level. Generally used in pair with other flags.
p17_fileFormat=
p17_flag=-d
p17_name=dumpFeatures
p17_numValues=1..1
p17_optional=
p17_prefix=
p17_prefix_when_specified=
p17_type=java.lang.String
p17_value=TRUE\=true;FALSE\=false
p18_MODE=
p18_TYPE=Integer
p18_default_value=0
p18_description=Estimate the inferential uncertainty in the estimation of per cell level gene count matrix by performing bootstrapping of the reads in per-cell level equivalence classes, and generate the mean and variance of the count matrix. This option generates three additional files, namely, quants_mean_mat.gz, quants_var_mat.gz and quants_boot_rows.txt. The format of the files stay the same as quants_mat.gz while the row order is saved in quants_boot_rows.txt and the column order is stays the same as in file quants_mat_cols.txt.<br>\nSet the number of bootstrap estimates to perform (0 \= flag will not be passed to alevin). dumpFeatures must be set to "true".
p18_fileFormat=
p18_flag=-a
p18_name=numCellBootstraps
p18_numValues=0..1
p18_optional=
p18_prefix=
p18_prefix_when_specified=
p18_range=0+
p18_type=java.lang.Integer
p18_value=
p1_MODE=IN
p1_TYPE=FILE
p1_default_value=
p1_description=Cellular barcodes+UMI file(s), the FASTQ file containing CB+UMI raw sequences. Alevin also supports parsing of data from multiple files as long as the order is the same as in the "Reads" parameter.
p1_fileFormat=.fastq.gz;.fq.gz;.gz;fastq;fq.gz
p1_flag=-b
p1_name=Barcodes
p1_numValues=1+
p1_optional=
p1_prefix=
p1_prefix_when_specified=
p1_type=java.io.File
p1_value=
p2_MODE=IN
p2_TYPE=FILE
p2_default_value=
p2_description=The FASTQ file containing raw read-sequences. Alevin also supports parsing of data from multiple files as long as the order is the same as in the "Barcodes" parameter.
p2_fileFormat=.fastq.gz;.fq.gz;.gz;fastq.gz;fq.gz
p2_flag=-q
p2_name=Reads
p2_numValues=1+
p2_optional=
p2_prefix=
p2_prefix_when_specified=
p2_type=java.io.File
p2_value=
p3_MODE=IN
p3_TYPE=FILE
p3_choices=https\://datasets-genepattern-org.s3.amazonaws.com/data/test_data/Salmon/gencode.v37.annotation.k31.salmon_full_decoy_index.tar.gz\=Human_Gencode_v37_Kmer31_Full_Decoy_Index;https\://datasets-genepattern-org.s3.amazonaws.com/data/test_data/Salmon/gencode.vM26.annotation.k31.salmon_full_decoy_index.tar.gz\=Mouse_Gencode_vM26_Kmer31_Full_Decoy_Index
p3_default_value=
p3_description=An Indexed transcriptome created with `Salmon Indexer`.<br>\nMust be in .tar.gz format.
p3_fileFormat=.tar.gz;tar.gz
p3_flag=-i
p3_name=Transcriptome.Index
p3_numValues=1..1
p3_optional=
p3_prefix=
p3_prefix_when_specified=
p3_type=java.io.File
p3_value=
p4_MODE=
p4_TYPE=TEXT
p4_default_value=dropseq
p4_description=The chemistry used by the single-cell sequencing platform.<br>\nDrop-Seq, 10x Chromium v2, 10x Chromium v3, CITE-Seq, CEL-Seq, CEL-Seq2, Quartz-Seq2, sci-RNA-seq3, inDrop v2, splitSeqV1, and splitSeqV2 are currently supported. BD Rhapsody support is experimental.
p4_fileFormat=
p4_flag=-c
p4_name=Chemistry
p4_numValues=1..1
p4_optional=
p4_prefix=
p4_prefix_when_specified=
p4_type=java.lang.String
p4_value=dropseq\=Drop-Seq;chromium\=10x Chromium v2;chromiumV3\=10x Chromium v3;citeseq\=CITE-Seq;celseq\=CEL-Seq;celseq2\=CEL-Seq2;quartzseq2\=Quartz-Seq2;sciseq3\=sci-RNA-seq3;indropV2\=inDrop v2;splitSeqV1\=splitSeqV1;splitSeqV2\=splitSeqV2;rhapsody\=BD Rhapsody (Experimental)
p5_MODE=
p5_TYPE=TEXT
p5_default_value=ISR
p5_description=The library type of the RNA-seq. This describes the relative orientation of paired end reads. \n<br>\nThe Alevin authors recommend using ISR (default) for both Drop-seq and Chromium chemistry.
p5_fileFormat=
p5_flag=-l
p5_name=Library.Type
p5_numValues=1..1
p5_optional=
p5_prefix=
p5_prefix_when_specified=
p5_type=java.lang.String
p5_value=ISF\=ISF (Inward, read 1 from the forward strand);ISR\=ISR (Inward, read 1 from the reverse strand);IU\=IU (Inward, Unstranded);OSF\=OSF (Outward, read 1 from the forward strand);OSR\=OSR (Outward, read 1 from the reverse strand);OU\=OU (Outward, Unstranded)
p6_MODE=IN
p6_TYPE=FILE
p6_choices=http\://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M26/gencode.vM26.annotation.gtf.gz\=Mouse_Gencode_vM26;http\://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_37/gencode.v37.annotation.gtf.gz\=Human_Gencode_v37
p6_default_value=
p6_description=A two column tsv (tab-separated) file with no header containing the transcript to gene map file. The first column lists each transcript present in the reference, the second column lists the corresponding gene.\n<br>\nAlternatively, a .GTF file can be supplied to automatically create a transcript to gene map.\n<br>
p6_fileFormat=.gtf.gz;gtf;gtf.gz;tsv;txt
p6_flag=-t
p6_name=TgMap
p6_numValues=1..1
p6_optional=
p6_prefix=
p6_prefix_when_specified=
p6_type=java.io.File
p6_value=
p7_MODE=
p7_TYPE=TEXT
p7_default_value=alevin.output
p7_description=Base name to use for outputting the alevin quantification results. Should be made unique for downstream processing.
p7_fileFormat=
p7_flag=-z
p7_name=Output.Basename
p7_numValues=1..1
p7_optional=
p7_prefix=
p7_prefix_when_specified=
p7_type=java.lang.String
p7_value=
p8_MODE=
p8_TYPE=TEXT
p8_default_value=gene_id
p8_description=If supplying a GTF file to automatically construct the transcript to gene map (TgMap parameter), map the transcripts to\:<br>\nthe Gene ID (GTF.Gene.ID.Type \= gene_id) eg\: ENSG00000141510<br>\nthe Gene Symbol (GTF.Gene.ID.Type \= gene_symbol) eg\: TP53<br>\n<b>This requires a GTF file to be supplied to "TgMap"</b>
p8_fileFormat=
p8_flag=-u
p8_name=GTF.Gene.ID.Type
p8_numValues=1..1
p8_optional=
p8_prefix=
p8_prefix_when_specified=
p8_type=java.lang.String
p8_value=gene_id\=gene_id;gene_id_noversion\=gene_id (Drop decimal versions);gene_symbol\=gene_symbol
p9_MODE=IN
p9_TYPE=FILE
p9_default_value=
p9_description=Optionally explicitly specify the whitelist cellular barcodes to use for cell detection and cellular barcode sequence correction. If not given, alevin generates its own set of putative cellular barcodes.\n<br>\nNote\: Not 10x 724k whitelist\n<br>\nThis flag does not use the biologically known whitelist provided by 10x, instead it's the per experiment level whitelist file e.g. the file generated by cellranger with the name barcodes.tsv.
p9_fileFormat=
p9_flag=-w
p9_name=Whitelist
p9_numValues=0..1
p9_optional=on
p9_prefix=
p9_prefix_when_specified=
p9_type=java.io.File
p9_value=
privacy=public
publicationDate=08/20/2021 16\:26
quality=preproduction
src.repo=https\://github.com/genepattern/Salmon.Alevin.Quant/tree/v0.7
taskDoc=Salmon.Alevin.Quant.pdf
taskType=rna-seq
version=Alevin 1.8.0 - updated to latest version, added enabled for splitSeq