Skip to content

Commit

Permalink
Updates to pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
DLBPointon committed Nov 29, 2024
1 parent 3d24a42 commit 3b8d923
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 79 deletions.
16 changes: 6 additions & 10 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,6 @@ workflow {
}
.set { branched_assemblies }

// branched_assemblies.organellar_genome.view {"ORGANELLAR: $it"}
// branched_assemblies.sample_genome.view {"GENOMIC: $it"}
// branched_assemblies.error.view {"ERROR CHANNELS: $it"}


//
// MODULE: ENSURE THAT THE TAXID FOR THE INPUT GENOME IS INDEED IN THE TAXDUMP
Expand Down Expand Up @@ -163,12 +159,12 @@ workflow {
//
// WORKFLOW: Run main workflow for ORGANELLAR samples
//
SANGERTOL_ASCC_ORGANELLAR (
branched_assemblies.organellar_genome,
VALIDATE_TAXID.out.versions,
organellar_include,
organellar_exclude
)
//SANGERTOL_ASCC_ORGANELLAR (
// branched_assemblies.organellar_genome,
// VALIDATE_TAXID.out.versions,
// organellar_include,
// organellar_exclude
//)


//
Expand Down
98 changes: 73 additions & 25 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,109 +8,151 @@
"blast/blastn": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"blast/makeblastdb": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"],
"installed_by": [
"modules"
],
"patch": "modules/nf-core/blast/makeblastdb/blast-makeblastdb.diff"
},
"custom/getchromsizes": {
"branch": "master",
"git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358",
"installed_by": ["modules"],
"installed_by": [
"modules"
],
"patch": "modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff"
},
"diamond/blastx": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"fastqc": {
"branch": "master",
"git_sha": "21f230b8cca43755bf73470e6fd0290832a98aef",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"fcs/fcsadaptor": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"fcs/fcsgx": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"gnu/sort": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"kraken2/kraken2": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"minimap2/align": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"minimap2/index": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"multiqc": {
"branch": "master",
"git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"ncbitools/vecscreen": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/depth": {
"branch": "master",
"git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/faidx": {
"branch": "master",
"git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/index": {
"branch": "master",
"git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/merge": {
"branch": "master",
"git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/sort": {
"branch": "master",
"git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/view": {
"branch": "master",
"git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"seqkit/sliding": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"tiara/tiara": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
}
}
},
Expand All @@ -119,20 +161,26 @@
"utils_nextflow_pipeline": {
"branch": "master",
"git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
"installed_by": ["subworkflows"]
"installed_by": [
"subworkflows"
]
},
"utils_nfcore_pipeline": {
"branch": "master",
"git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3",
"installed_by": ["subworkflows"]
"installed_by": [
"subworkflows"
]
},
"utils_nfvalidation_plugin": {
"branch": "master",
"git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
"installed_by": ["subworkflows"]
"installed_by": [
"subworkflows"
]
}
}
}
}
}
}
}
4 changes: 2 additions & 2 deletions modules/local/generate_samplesheet.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ process GENERATE_SAMPLESHEET {
tuple val(meta), path(pacbio_path)

output:
tuple val(meta), path("*csv"), path(pacbio_path), emit: csv
path "versions.yml", emit: versions
tuple val(meta), path("*csv"), emit: csv
path "versions.yml", emit: versions

script:
def prefix = task.ext.prefix ?: "${meta.id}"
Expand Down
40 changes: 30 additions & 10 deletions modules/local/run/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ process NEXTFLOW_RUN {
input:
val pipeline_name // String
val nextflow_opts // String
val params_file // pipeline params-file
val samplesheet // pipeline samplesheet
val nextflow_files // Map [ params-file: params.yml , c: configs/multiqc.config ]
val pipeline_files // Map [ input: samplesheet.csv ]
val additional_config // custom configs

when:
Expand All @@ -22,17 +22,37 @@ process NEXTFLOW_RUN {
'nextflow run',
pipeline_name,
nextflow_opts,
params_file ? "-params-file $params_file" : '',
additional_config ? "-c $additional_config" : '',
samplesheet ? "--input $samplesheet" : '',
nextflow_files ? nextflow_files.collect{ key, value -> "-$key $value" }.join(' ') : '',
pipeline_files ? pipeline_files.collect{ key, value -> "--$key $value" }.join(' ') : '',
"--outdir $task.workDir/results",
]
def builder = new ProcessBuilder(nxf_cmd.join(" ").tokenize(" "))

ProcessBuilder builder = new ProcessBuilder(nxf_cmd.join(" ").tokenize(" "))
builder.directory(cache_dir.toFile())
process = builder.start()
assert process.waitFor() == 0: process.text
def process = builder.start()

// Read stdout and stderr concurrently
def output_data = new StringBuilder()
def error = new StringBuilder()

def stdoutThread = Thread.start {
process.inputStream.eachLine { line -> output_data.append(line).append("\n") }
}
def stderrThread = Thread.start {
process.errorStream.eachLine { line -> error.append(line).append("\n") }
}

// Wait for the process to complete and join threads
def exitCode = process.waitFor()

stdoutThread.join()
stderrThread.join()

// Check the exit code
assert exitCode == 0 : "Pipeline failed with exit code ${exitCode}\nError: ${error}\nOutput: ${output_data}"

// Emit results
output:
path "results" , emit: output
val process.text, emit: log
path "results", emit: output
//val output_data, emit: log // <-- This need investigating, why is the output_data not assigned at this point but is in the original version by mahesh?
}
14 changes: 3 additions & 11 deletions modules/local/sanger_tol_btk.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,15 @@ process SANGER_TOL_BTK {

input:
tuple val(meta), path(reference)
tuple val(meta3), path(samplesheet_csv), path(bam_data) // this is a merged channel to ensure that both come from the right place.
tuple val(meta3), path(samplesheet_csv) // this is a merged channel to ensure that both come from the right place.
path blastp, stageAs: "blastp.dmnd"
path blastn
path blastx
path btk_config_file
path tax_dump
path btk_yaml, stageAs: "BTK.yaml"
val busco_lineages_folder
val busco_lineages
val taxon
val gca_accession

output:
tuple val(meta), path("${meta.id}_btk_out/blobtoolkit/${meta.id}*"), emit: dataset
Expand All @@ -28,13 +26,9 @@ process SANGER_TOL_BTK {
script:
def prefix = task.ext.prefix ?: "${meta.id}"
def args = task.ext.args ?: ""
def executor = task.ext.executor ?: ""
def profiles = task.ext.profiles ?: ""
def get_version = task.ext.version_data ?: "UNKNOWN - SETTING NOT SET"
def btk_config = btk_config_file ? "-c $btk_config_file" : ""
def pipeline_version = task.ext.version ?: "0.6.0"
// YAML used to avoid the use of GCA accession number
// https://github.com/sanger-tol/blobtoolkit/issues/77

// Seems to be an issue where a nested pipeline can't see the files in the same directory
// Running realpath gets around this but the files copied into the folder are
Expand All @@ -47,7 +41,7 @@ process SANGER_TOL_BTK {


"""
$executor 'nextflow run sanger-tol/blobtoolkit \\
nextflow run sanger-tol/blobtoolkit \\
-r $pipeline_version \\
-profile $profiles \\
--input "\$(realpath $samplesheet_csv)" \\
Expand All @@ -60,9 +54,7 @@ process SANGER_TOL_BTK {
--blastp "\$(realpath blastp.dmnd)" \\
--blastn "\$(realpath $blastn)" \\
--blastx "\$(realpath $blastx)" \\
$btk_config \\
--blastx_outext "txt" \\
$args'
$args
mv ${prefix}_btk_out/pipeline_info blobtoolkit_pipeline_info
Expand Down
Loading

0 comments on commit 3b8d923

Please sign in to comment.