bigbio · ypriverol · Sep 29, 2023 · Aug 3, 2023 · Aug 3, 2023 · Aug 4, 2023
diff --git a/bin/diann_convert.py b/bin/diann_convert.py
diff --git a/modules/local/diann_preliminary_analysis/main.nf b/modules/local/diann_preliminary_analysis/main.nf
@@ -7,7 +7,7 @@ process DIANN_PRELIMINARY_ANALYSIS {
         'biocontainers/diann:v1.8.1_cv1' }"
 
     input:
-    tuple val(meta), file(mzML), file(predict_tsv)
+    tuple val(meta), path(mzML), path(predict_tsv)
 
     output:
     path "*.quant", emit: diann_quant

diff --git a/modules/local/diannconvert/main.nf b/modules/local/diannconvert/main.nf
@@ -36,6 +36,7 @@ process DIANNCONVERT {
     """
     diann_convert.py convert \\
         --folder ./ \\
+        --exp_design ${exp_design} \\
         --diann_version ./version/versions.yml \\
         --dia_params "${dia_params}" \\
         --charge $params.max_precursor_charge \\

diff --git a/modules/local/sdrfparsing/main.nf b/modules/local/sdrfparsing/main.nf
@@ -23,12 +23,18 @@ process SDRFPARSING {
     """
     ## -t2 since the one-table format parser is broken in OpenMS2.5
     ## -l for legacy behavior to always add sample columns
-    ## TODO Update the sdrf-pipelines to dynamic print versions
 
-    parse_sdrf convert-openms -t2 -l -s ${sdrf} 2>&1 | tee ${sdrf.baseName}_parsing.log
+    ## JSPP 2023-Aug -- Adding --raw for now, this will allow the development of the
+    # bypass diann pipelie but break every other aspect of it. Make sure
+    # this flag is gone when PRing
+
+    parse_sdrf convert-openms --raw -t2 -l -s ${sdrf} 2>&1 | tee ${sdrf.baseName}_parsing.log
     mv openms.tsv ${sdrf.baseName}_config.tsv
     mv experimental_design.tsv ${sdrf.baseName}_openms_design.tsv
 
+    ## TODO Update the sdrf-pipelines to dynamic print versions
+    # Version reporting can now be programmatic, since:
+    # https://github.com/bigbio/sdrf-pipelines/pull/134
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         sdrf-pipelines: \$(echo "0.0.22")

diff --git a/modules/local/tdf2mzml/main.nf b/modules/local/tdf2mzml/main.nf
@@ -0,0 +1,63 @@
+
+process TDF2MZML {
+    tag "$meta.mzml_id"
+    label 'process_low'
+    label 'process_single'
+    label 'error_retry'
+
+    // for rawfileparser this is conda "conda-forge::mono bioconda::thermorawfileparser=1.3.4"
+    // conda is not enabled for DIA so ... disabling anyway
+
+    // container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    //    'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.3.4--ha8f3691_0' :
+    //    'quay.io/biocontainers/thermorawfileparser:1.3.4--ha8f3691_0' }"
+    container 'mfreitas/tdf2mzml:latest' // I don't know which stable tag to use...
+
+    stageInMode {
+        if (task.attempt == 1) {
+            if (executor == "awsbatch") {
+                'symlink'
+            } else {
+                'link'
+            }
+        } else if (task.attempt == 2) {
+            if (executor == "awsbatch") {
+                'copy'
+            } else {
+                'symlink'
+            }
+        } else {
+            'copy'
+        }
+    }
+
+    input:
+    tuple val(meta), path(rawfile)
+
+    output:
+    tuple val(meta), path("*.mzML"), emit: mzmls_converted
+    tuple val(meta), path("*.d"),   emit: dotd_files
+    path "versions.yml",   emit: version
+    path "*.log",   emit: log
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.mzml_id}"
+
+    """
+    tar --version
+    echo "Unpacking..." | tee --append ${rawfile.baseName}_conversion.log
+    tar -xvf ${rawfile} 2>&1 | tee --append ${rawfile.baseName}_conversion.log
+    echo "Converting..." | tee --append ${rawfile.baseName}_conversion.log
+    tdf2mzml.py -i *.d 2>&1 | tee --append ${rawfile.baseName}_conversion.log
+    echo "Compressing..." | tee --append ${rawfile.baseName}_conversion.log
+    mv *.mzml ${file(rawfile.baseName).baseName}.mzML
+    mv *.d ${file(rawfile.baseName).baseName}.d
+    # gzip ${file(rawfile.baseName).baseName}.mzML
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        tdf2mzml.py: \$(tdf2mzml.py --version)
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/tdf2mzml/meta.yml b/modules/local/tdf2mzml/meta.yml
@@ -0,0 +1,42 @@
+name: tdf2mzml
+description: convert raw bruker files to mzml files
+keywords:
+  - raw
+  - mzML
+  - .d
+tools:
+  - tdf2mzml:
+      description: |
+        It takes a bruker .d raw file as input and outputs indexed mzML
+      homepage: https://github.com/mafreitas/tdf2mzml
+      documentation: https://github.com/mafreitas/tdf2mzml
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+  - rawfile:
+      type: file
+      description: |
+        Bruker Raw file archived using tar
+      pattern: "*.d.tar"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'MD5', enzyme:trypsin ]
+  - mzml:
+      type: file
+      description: indexed mzML
+      pattern: "*.mzML"
+  - log:
+      type: file
+      description: log file
+      pattern: "*.log"
+  - version:
+      type: file
+      description: File containing software version
+      pattern: "versions.yml"
+authors:
+  - "@jspaezp"
diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
diff --git a/nextflow.config b/nextflow.config
@@ -157,7 +157,8 @@ params {
     mass_acc_automatic      = true
     pg_level                = 2
     species_genes           = false
-    diann_normalize = true
+    diann_normalize         = true
+    diann_speclib           = ''
 
     // MSstats general options
     msstats_remove_one_feat_prot    = true

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -891,6 +891,13 @@
                     "fa_icon": "far fa-check-square",
                     "default": false
                 },
+                "diann_speclib": {
+                    "type": "string",
+                    "description": "The spectral library to use for DIA-NN",
+                    "fa_icon": "fas fa-file",
+                    "help_text": "If passed, will use that spectral library to carry out the DIA-NN search, instead of predicting one from the fasta file.",
+                    "hidden": false
+                },
                 "diann_debug": {
                     "type": "integer",
                     "description": "Debug level",

diff --git a/subworkflows/local/file_preparation.nf b/subworkflows/local/file_preparation.nf
@@ -3,13 +3,14 @@
 //
 
 include { THERMORAWFILEPARSER } from '../../modules/local/thermorawfileparser/main'
+include { TDF2MZML } from '../../modules/local/tdf2mzml/main'
 include { MZMLINDEXING        } from '../../modules/local/openms/mzmlindexing/main'
 include { MZMLSTATISTICS      } from '../../modules/local/mzmlstatistics/main'
 include { OPENMSPEAKPICKER    } from '../../modules/local/openms/openmspeakpicker/main'
 
 workflow FILE_PREPARATION {
     take:
-    ch_mzmls            // channel: [ val(meta), raw/mzml ]
+    ch_mzmls            // channel: [ val(meta), raw/mzml/d.tar ]
 
     main:
     ch_versions   = Channel.empty()
@@ -23,6 +24,7 @@ workflow FILE_PREPARATION {
     .branch {
         raw: WorkflowQuantms.hasExtension(it[1], 'raw')
         mzML: WorkflowQuantms.hasExtension(it[1], 'mzML')
+        dotD: WorkflowQuantms.hasExtension(it[1], '.d.tar')
 public static boolean hasExtension(file, extension) { 
     return file.toString().toLowerCase().endsWith(extension.toLowerCase()) 
 } 
 public static boolean hasExtension(file, extension) { 
     return file.toString().toLowerCase().endsWith(extension.toLowerCase()) 
 } 
     }
     .set { ch_branched_input }
 
@@ -46,22 +48,34 @@ workflow FILE_PREPARATION {
     ch_results = ch_results.mix(ch_branched_input_mzMLs.inputIndexedMzML)
 
     THERMORAWFILEPARSER( ch_branched_input.raw )
+    // Output is
+    // {'mzmls_converted': Tuple[val(meta), path(mzml)],
+    //  'version': Path(versions.yml),
+    //  'log': Path(*.txt)}
+
+    // Where meta is the same as the input meta
     ch_versions = ch_versions.mix(THERMORAWFILEPARSER.out.version)
     ch_results  = ch_results.mix(THERMORAWFILEPARSER.out.mzmls_converted)
 
     MZMLINDEXING( ch_branched_input_mzMLs.nonIndexedMzML )
     ch_versions = ch_versions.mix(MZMLINDEXING.out.version)
     ch_results  = ch_results.mix(MZMLINDEXING.out.mzmls_indexed)
 
-    ch_results.map{ it -> [it[0], it[1]] }.set{ ch_mzml }
+    ch_results.map{ it -> [it[0], it[1]] }.set{ indexed_mzml_bundle }
+
+    TDF2MZML( ch_branched_input.dotD )
+    ch_versions = ch_versions.mix(TDF2MZML.out.version)
+    ch_results = indexed_mzml_bundle.mix(TDF2MZML.out.dotd_files)
+    indexed_mzml_bundle = indexed_mzml_bundle.mix(TDF2MZML.out.mzmls_converted)
 
-    MZMLSTATISTICS( ch_mzml )
+    MZMLSTATISTICS( indexed_mzml_bundle )
     ch_statistics = ch_statistics.mix(MZMLSTATISTICS.out.mzml_statistics.collect())
     ch_versions = ch_versions.mix(MZMLSTATISTICS.out.version)
 
     if (params.openms_peakpicking){
+        // If the peak picker is enabled, it will over-write not bypass the .d files
         OPENMSPEAKPICKER (
-            ch_results
+            indexed_mzml_bundle
         )
 
         ch_versions = ch_versions.mix(OPENMSPEAKPICKER.out.version)
@@ -70,7 +84,7 @@ workflow FILE_PREPARATION {
 
 
     emit:
-    results         = ch_results        // channel: [val(mzml_id), indexedmzml]
+    results         = ch_results        // channel: [val(mzml_id), indexedmzml|.d.tar]
     statistics      = ch_statistics     // channel: [ *_mzml_info.tsv ]
     version         = ch_versions       // channel: [ *.version.txt ]
 }
diff --git a/workflows/dia.nf b/workflows/dia.nf
@@ -55,12 +55,18 @@ workflow DIA {
     //
     // MODULE: SILICOLIBRARYGENERATION
     //
-    SILICOLIBRARYGENERATION(ch_searchdb, DIANNCFG.out.diann_cfg)
+    if (!params.diann_speclib) {
+        SILICOLIBRARYGENERATION(ch_searchdb, DIANNCFG.out.diann_cfg)
+        speclib = SILICOLIBRARYGENERATION.out.predict_speclib
+    } else {
+        speclib = Channel.fromPath(params.diann_speclib)
+    }
+
 
     //
     // MODULE: DIANN_PRELIMINARY_ANALYSIS
     //
-    DIANN_PRELIMINARY_ANALYSIS(ch_file_preparation_results.combine(SILICOLIBRARYGENERATION.out.predict_speclib))
+    DIANN_PRELIMINARY_ANALYSIS(ch_file_preparation_results.combine(speclib))
     ch_software_versions = ch_software_versions.mix(DIANN_PRELIMINARY_ANALYSIS.out.version.ifEmpty(null))
 
     //
@@ -69,7 +75,7 @@ workflow DIA {
     ASSEMBLE_EMPIRICAL_LIBRARY(ch_result.mzml.collect(),
                                 meta,
                                 DIANN_PRELIMINARY_ANALYSIS.out.diann_quant.collect(),
-                                SILICOLIBRARYGENERATION.out.predict_speclib
+                                speclib
                             )
     ch_software_versions = ch_software_versions.mix(ASSEMBLE_EMPIRICAL_LIBRARY.out.version.ifEmpty(null))