Skip to content

Commit

Permalink
use tfbool definition only for bbduk, not fastp, fix requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
tedil committed Mar 27, 2024
1 parent 69f652c commit e3134db
Showing 1 changed file with 70 additions and 62 deletions.
132 changes: 70 additions & 62 deletions snappy_pipeline/workflows/adapter_trimming/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,16 @@ $schema: "https://json-schema.org/draft/2020-12/schema"

description: configuration file for adapter_trimming step

type: object

definitions:
tfbool:
type: string
enum: ["t", "f"]
enum: [ "t", "f" ]

type: object
properties:
adapter_trimming:
type: object
required:
- tools
additionalProperties: false

properties:
path_link_in:
Expand Down Expand Up @@ -44,9 +42,9 @@ properties:
interleaved:
description: "(int) t/f overrides interleaved autodetection."
oneOf:
- $ref: #/definitions/tfbool
- $ref: "#/definitions/tfbool"
- type: string
enum: [ "auto"]
enum: [ "auto" ]
default: "auto"
qin:
description: "Input quality offset: 33 (Sanger), 64, or auto."
Expand All @@ -58,11 +56,11 @@ properties:
(cu) Process non-AGCT IUPAC reference bases by making all
possible unambiguous copies. Intended for short motifs
or adapter barcodes, as time/memory use is exponential.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
nzo:
description: "Only write statistics about ref sequences with nonzero hits."
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: t
qout:
description: "Output quality offset: 33 (Sanger), 64, or auto."
Expand All @@ -76,19 +74,19 @@ properties:
default: 3
rename:
description: "Rename reads to indicate which sequences they matched."
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
refnames:
description: "Use names of reference files rather than scaffold IDs."
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
trd:
description: "Truncate read and ref names at the first whitespace."
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
ordered:
description: "Set to true to output reads in same order as input."
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f

# Histogram output parameters:
Expand All @@ -113,7 +111,7 @@ properties:
# Histograms for mapped sam/bam files only:
histbefore:
description: "Calculate histograms from reads before processing."
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: t
idbins:
description: "Number idhist bins. Set to 'auto' to use read length."
Expand All @@ -133,15 +131,15 @@ properties:
default: 21
rcomp:
description: "Look for reverse-complements of kmers in addition to forward kmers."
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: t

# Processing parameters:
maskmiddle:
description: >
(mm) Treat the middle base of a kmer as a wildcard, to
increase sensitivity in the presence of errors.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: t
minkmerhits:
description: >
Expand Down Expand Up @@ -197,40 +195,40 @@ properties:
(fn) Forbids matching of read kmers containing N.
By default, these will match a reference 'A' if
hdist>0 or edist>0, to increase sensitivity.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
removeifeitherbad:
description: >
(rieb) Paired reads get sent to 'outmatch' if either is
match (or either is trimmed shorter than minlen).
Set to false to require both.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: t
trimfailures:
description: >
Instead of discarding failed reads, trim them to 1bp.
This makes the statistics a bit odd.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
findbestmatch:
description: >
(fbm) If multiple matches, associate read with sequence
sharing most kmers. Reduces speed.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
skipr1:
description: "Don't do kmer-based operations on read 1."
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
skipr2:
description: "Don't do kmer-based operations on read 2."
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
ecco:
description: >
For overlapping paired reads only. Performs error-
correction with BBMerge prior to kmer operations.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
# Trimming/Filtering/Masking parameters:
# Note - if ktrim, kmask, and ksplit are unset, the default behavior is kfilter.
Expand All @@ -256,7 +254,7 @@ properties:
default: ""
maskfullycovered:
description: "(mfc) Only mask bases that are fully covered by kmers."
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
ksplit:
description: >
Expand All @@ -265,7 +263,7 @@ properties:
read, it will be trimmed instead. Singletons will go to
out, and pairs will go to outm. Do not use ksplit with
other operations such as quality-trimming or filtering.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
mink:
description: >
Expand Down Expand Up @@ -339,7 +337,7 @@ properties:
description: >
(outputtrimmedtomatch) Output reads trimmed to shorter
than minlength to outm rather than discarding.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
tp:
description: >
Expand All @@ -349,12 +347,12 @@ properties:
tbo:
description: >
(trimbyoverlap) Trim adapters based on where paired reads overlap.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
strictoverlap:
description: >
Adjust sensitivity for trimbyoverlap mode.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: t
minoverlap:
description: >
Expand All @@ -371,7 +369,7 @@ properties:
description: >
(trimpairsevenly) When kmer right-trimming, trim both
reads to the minimum length of either.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
forcetrimleft:
description: >
Expand Down Expand Up @@ -422,24 +420,24 @@ properties:
description: >
Use average GC of paired reads. Deprecated option?
Also affects gchist.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: t
tossjunk:
description: >
Discard reads with invalid characters as bases.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
swift:
description: >
Trim Swift sequences: Trailing C/T/N R1, leading G/A/N R2.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f

# Header-parsing parameters - these require Illumina headers:
chastityfilter:
description: >
(cf) Discard reads with id containing ' 1:Y:' or ' 2:Y:'.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
barcodefilter:
description: |
Expand Down Expand Up @@ -559,18 +557,18 @@ properties:
of 0-41 and is reported as quality scores, so the output
should be fastq or fasta+qual.
NOTE: If set, entropytrim overrides entropymask.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
# Cardinality estimation:
cardinality:
description: >
(loglog) Count unique kmers using the LogLog algorithm.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
cardinalityout:
description: >
(loglogout) Count unique kmers in output reads.
$ref: #/definitions/tfbool
$ref: "#/definitions/tfbool"
default: f
loglogk:
description: >
Expand Down Expand Up @@ -625,8 +623,8 @@ properties:
dedup:
description: >
enable deduplication to drop the duplicated reads/pairs
$ref: #/definitions/tfbool
default: f
type: boolean
default: false
dup_calc_accuracy:
description: >
accuracy level to calculate duplication (1~6), higher level uses more memory (1G, 2G, 4G, 8G, 16G, 24G). Default 1 for no-dedup mode, and 3 for dedup mode. (int [=0])
Expand All @@ -637,13 +635,13 @@ properties:
dont_eval_duplication:
description: >
don't evaluate duplication rate to save time and use less memory.
$ref: #/definitions/tfbool
default: t
type: boolean
default: true
trim_poly_g:
description: >
force polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data
$ref: #/definitions/tfbool
default: t
type: boolean
default: true
poly_g_min_len:
description: >
the minimum length to detect polyG in the read tail. 10 by default. (int [=10])
Expand All @@ -652,8 +650,8 @@ properties:
trim_poly_x:
description: >
enable polyX trimming in 3' ends.
$ref: #/definitions/tfbool
default: f
type: boolean
default: false
poly_x_min_len:
description: >
the minimum length to detect polyX in the read tail. 10 by default. (int [=10])
Expand All @@ -662,18 +660,18 @@ properties:
cut_front:
description: >
move a sliding window from front (5') to tail, drop the bases in the window if its mean quality < threshold, stop otherwise.
$ref: #/definitions/tfbool
default: f
type: boolean
default: false
cut_tail:
description: >
move a sliding window from tail (3') to front, drop the bases in the window if its mean quality < threshold, stop otherwise.
$ref: #/definitions/tfbool
default: f
type: boolean
default: false
cut_right:
description: >
move a sliding window from front to tail, if meet one window with mean quality < threshold, drop the bases in the window and the right part, and then stop.
$ref: #/definitions/tfbool
default: f
type: boolean
default: false
cut_front_window_size:
description: >
the window size option of cut_front, default to cut_window_size if not specified (int [=4])
Expand Down Expand Up @@ -707,8 +705,8 @@ properties:
disable_quality_filtering:
description: >
quality filtering is enabled by default. If this option is specified, quality filtering is disabled
$ref: #/definitions/tfbool
default: f
type: boolean
default: false
qualified_quality_phred:
description: >
the quality value that a base is qualified. Default 15 means phred quality >=Q15 is qualified. (int [=15])
Expand All @@ -732,8 +730,8 @@ properties:
disable_length_filtering:
description: >
length filtering is enabled by default. If this option is specified, length filtering is disabled
$ref: #/definitions/tfbool
default: f
type: boolean
default: false
length_required:
description: >
reads shorter than length_required will be discarded, default is 15. (int [=15])
Expand All @@ -747,8 +745,8 @@ properties:
low_complexity_filter:
description: >
enable low complexity filter. The complexity is defined as the percentage of base that is different from its next base (base[i] != base[i+1]).
$ref: #/definitions/tfbool
default: f
type: boolean
default: false
complexity_threshold:
description: >
the threshold for low complexity filter (0~100). Default is 30, which means 30% complexity is required. (int [=30])
Expand All @@ -772,8 +770,8 @@ properties:
correction:
description: >
enable base correction in overlapped regions (only for PE data), default is disabled
$ref: #/definitions/tfbool
default: f
type: boolean
default: false
overlap_len_require:
description: >
the minimum length to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. 30 by default. (int [=30])
Expand All @@ -792,8 +790,8 @@ properties:
umi:
description: >
enable unique molecular identifier (UMI) preprocessing
$ref: #/definitions/tfbool
default: f
type: boolean
default: false
umi_loc:
description: >
specify the location of UMI, can be (index1/index2/read1/read2/per_index/per_read, default is none (string [=])
Expand All @@ -820,8 +818,18 @@ properties:
overrepresentation_analysis:
description: >
enable overrepresented sequence analysis.
$ref: #/definitions/tfbool
default: f
type: boolean
default: false

additionalProperties: false

required:
- tools
anyOf:
- required:
- "bbduk"
- required:
- "fastp"

required:
- adapter_trimming
Expand Down

0 comments on commit e3134db

Please sign in to comment.