use tfbool definition only for bbduk, not fastp, fix requirements

bihealth · Mar 27, 2024 · e3134db · e3134db
1 parent 69f652c
commit e3134db
Showing 1 changed file with 70 additions and 62 deletions.
diff --git a/snappy_pipeline/workflows/adapter_trimming/config.schema.yaml b/snappy_pipeline/workflows/adapter_trimming/config.schema.yaml
@@ -2,18 +2,16 @@ $schema: "https://json-schema.org/draft/2020-12/schema"
 
 description: configuration file for adapter_trimming step
 
+type: object
+
 definitions:
   tfbool:
     type: string
-    enum: ["t", "f"]
+    enum: [ "t", "f" ]
 
-type: object
 properties:
   adapter_trimming:
     type: object
-    required:
-      - tools
-    additionalProperties: false
 
     properties:
       path_link_in:
@@ -44,9 +42,9 @@ properties:
           interleaved:
             description: "(int) t/f overrides interleaved autodetection."
             oneOf:
-              - $ref: #/definitions/tfbool
+              - $ref: "#/definitions/tfbool"
               - type: string
-                enum: [ "auto"]
+                enum: [ "auto" ]
             default: "auto"
           qin:
             description: "Input quality offset: 33 (Sanger), 64, or auto."
@@ -58,11 +56,11 @@ properties:
               (cu) Process non-AGCT IUPAC reference bases by making all
               possible unambiguous copies.  Intended for short motifs
               or adapter barcodes, as time/memory use is exponential.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           nzo:
             description: "Only write statistics about ref sequences with nonzero hits."
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: t
           qout:
             description: "Output quality offset: 33 (Sanger), 64, or auto."
@@ -76,19 +74,19 @@ properties:
             default: 3
           rename:
             description: "Rename reads to indicate which sequences they matched."
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           refnames:
             description: "Use names of reference files rather than scaffold IDs."
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           trd:
             description: "Truncate read and ref names at the first whitespace."
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           ordered:
             description: "Set to true to output reads in same order as input."
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
 
           # Histogram output parameters:
@@ -113,7 +111,7 @@ properties:
           # Histograms for mapped sam/bam files only:
           histbefore:
             description: "Calculate histograms from reads before processing."
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: t
           idbins:
             description: "Number idhist bins.  Set to 'auto' to use read length."
@@ -133,15 +131,15 @@ properties:
             default: 21
           rcomp:
             description: "Look for reverse-complements of kmers in addition to forward kmers."
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: t
 
           # Processing parameters:
           maskmiddle:
             description: >
               (mm) Treat the middle base of a kmer as a wildcard, to
               increase sensitivity in the presence of errors.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: t
           minkmerhits:
             description: >
@@ -197,40 +195,40 @@ properties:
               (fn) Forbids matching of read kmers containing N.
               By default, these will match a reference 'A' if
               hdist>0 or edist>0, to increase sensitivity.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           removeifeitherbad:
             description: >
               (rieb) Paired reads get sent to 'outmatch' if either is
               match (or either is trimmed shorter than minlen).
               Set to false to require both.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: t
           trimfailures:
             description: >
               Instead of discarding failed reads, trim them to 1bp.
               This makes the statistics a bit odd.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           findbestmatch:
             description: >
               (fbm) If multiple matches, associate read with sequence
               sharing most kmers.  Reduces speed.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           skipr1:
             description: "Don't do kmer-based operations on read 1."
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           skipr2:
             description: "Don't do kmer-based operations on read 2."
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           ecco:
             description: >
               For overlapping paired reads only.  Performs error-
               correction with BBMerge prior to kmer operations.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           # Trimming/Filtering/Masking parameters:
           # Note - if ktrim, kmask, and ksplit are unset, the default behavior is kfilter.
@@ -256,7 +254,7 @@ properties:
             default: ""
           maskfullycovered:
             description: "(mfc) Only mask bases that are fully covered by kmers."
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           ksplit:
             description: >
@@ -265,7 +263,7 @@ properties:
               read, it will be trimmed instead.  Singletons will go to
               out, and pairs will go to outm.  Do not use ksplit with
               other operations such as quality-trimming or filtering.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           mink:
             description: >
@@ -339,7 +337,7 @@ properties:
             description: >
               (outputtrimmedtomatch) Output reads trimmed to shorter
               than minlength to outm rather than discarding.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           tp:
             description: >
@@ -349,12 +347,12 @@ properties:
           tbo:
             description: >
               (trimbyoverlap) Trim adapters based on where paired reads overlap.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           strictoverlap:
             description: >
               Adjust sensitivity for trimbyoverlap mode.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: t
           minoverlap:
             description: >
@@ -371,7 +369,7 @@ properties:
             description: >
               (trimpairsevenly) When kmer right-trimming, trim both
               reads to the minimum length of either.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           forcetrimleft:
             description: >
@@ -422,24 +420,24 @@ properties:
             description: >
               Use average GC of paired reads.    Deprecated option?
               Also affects gchist.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: t
           tossjunk:
             description: >
               Discard reads with invalid characters as bases.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           swift:
             description: >
               Trim Swift sequences: Trailing C/T/N R1, leading G/A/N R2.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
 
           # Header-parsing parameters - these require Illumina headers:
           chastityfilter:
             description: >
               (cf) Discard reads with id containing ' 1:Y:' or ' 2:Y:'.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           barcodefilter:
             description: |
@@ -559,18 +557,18 @@ properties:
               of 0-41 and is reported as quality scores, so the output
               should be fastq or fasta+qual.
               NOTE: If set, entropytrim overrides entropymask.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           # Cardinality estimation:
           cardinality:
             description: >
               (loglog) Count unique kmers using the LogLog algorithm.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           cardinalityout:
             description: >
               (loglogout) Count unique kmers in output reads.
-            $ref: #/definitions/tfbool
+            $ref: "#/definitions/tfbool"
             default: f
           loglogk:
             description: >
@@ -625,8 +623,8 @@ properties:
           dedup:
             description: >
               enable deduplication to drop the duplicated reads/pairs
-            $ref: #/definitions/tfbool
-            default: f
+            type: boolean
+            default: false
           dup_calc_accuracy:
             description: >
               accuracy level to calculate duplication (1~6), higher level uses more memory (1G, 2G, 4G, 8G, 16G, 24G). Default 1 for no-dedup mode, and 3 for dedup mode. (int [=0])
@@ -637,13 +635,13 @@ properties:
           dont_eval_duplication:
             description: >
               don't evaluate duplication rate to save time and use less memory.
-            $ref: #/definitions/tfbool
-            default: t
+            type: boolean
+            default: true
           trim_poly_g:
             description: >
               force polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data
-            $ref: #/definitions/tfbool
-            default: t
+            type: boolean
+            default: true
           poly_g_min_len:
             description: >
               the minimum length to detect polyG in the read tail. 10 by default. (int [=10])
@@ -652,8 +650,8 @@ properties:
           trim_poly_x:
             description: >
               enable polyX trimming in 3' ends.
-            $ref: #/definitions/tfbool
-            default: f
+            type: boolean
+            default: false
           poly_x_min_len:
             description: >
               the minimum length to detect polyX in the read tail. 10 by default. (int [=10])
@@ -662,18 +660,18 @@ properties:
           cut_front:
             description: >
               move a sliding window from front (5') to tail, drop the bases in the window if its mean quality < threshold, stop otherwise.
-            $ref: #/definitions/tfbool
-            default: f
+            type: boolean
+            default: false
           cut_tail:
             description: >
               move a sliding window from tail (3') to front, drop the bases in the window if its mean quality < threshold, stop otherwise.
-            $ref: #/definitions/tfbool
-            default: f
+            type: boolean
+            default: false
           cut_right:
             description: >
               move a sliding window from front to tail, if meet one window with mean quality < threshold, drop the bases in the window and the right part, and then stop.
-            $ref: #/definitions/tfbool
-            default: f
+            type: boolean
+            default: false
           cut_front_window_size:
             description: >
               the window size option of cut_front, default to cut_window_size if not specified (int [=4])
@@ -707,8 +705,8 @@ properties:
           disable_quality_filtering:
             description: >
               quality filtering is enabled by default. If this option is specified, quality filtering is disabled
-            $ref: #/definitions/tfbool
-            default: f
+            type: boolean
+            default: false
           qualified_quality_phred:
             description: >
               the quality value that a base is qualified. Default 15 means phred quality >=Q15 is qualified. (int [=15])
@@ -732,8 +730,8 @@ properties:
           disable_length_filtering:
             description: >
               length filtering is enabled by default. If this option is specified, length filtering is disabled
-            $ref: #/definitions/tfbool
-            default: f
+            type: boolean
+            default: false
           length_required:
             description: >
               reads shorter than length_required will be discarded, default is 15. (int [=15])
@@ -747,8 +745,8 @@ properties:
           low_complexity_filter:
             description: >
               enable low complexity filter. The complexity is defined as the percentage of base that is different from its next base (base[i] != base[i+1]).
-            $ref: #/definitions/tfbool
-            default: f
+            type: boolean
+            default: false
           complexity_threshold:
             description: >
               the threshold for low complexity filter (0~100). Default is 30, which means 30% complexity is required. (int [=30])
@@ -772,8 +770,8 @@ properties:
           correction:
             description: >
               enable base correction in overlapped regions (only for PE data), default is disabled
-            $ref: #/definitions/tfbool
-            default: f
+            type: boolean
+            default: false
           overlap_len_require:
             description: >
               the minimum length to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. 30 by default. (int [=30])
@@ -792,8 +790,8 @@ properties:
           umi:
             description: >
               enable unique molecular identifier (UMI) preprocessing
-            $ref: #/definitions/tfbool
-            default: f
+            type: boolean
+            default: false
           umi_loc:
             description: >
               specify the location of UMI, can be (index1/index2/read1/read2/per_index/per_read, default is none (string [=])
@@ -820,8 +818,18 @@ properties:
           overrepresentation_analysis:
             description: >
               enable overrepresented sequence analysis.
-            $ref: #/definitions/tfbool
-            default: f
+            type: boolean
+            default: false
+
+    additionalProperties: false
+
+  required:
+    - tools
+  anyOf:
+    - required:
+      - "bbduk"
+    - required:
+      - "fastp"
 
 required:
   - adapter_trimming