diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..b290e090 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,20 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + "runArgs": ["--privileged"], + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..72dda289 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,33 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_size = 4 +indent_style = space + +[*.{md,yml,yaml,html,css,scss,js}] +indent_size = 2 + +# These files are edited and tested upstream in nf-core/modules +[/modules/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset + +[/assets/email*] +indent_size = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset diff --git a/.gitattributes b/.gitattributes index 7fe55006..7a2dabc2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow +modules/nf-core/** linguist-generated +subworkflows/nf-core/** linguist-generated diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml new file mode 100644 index 00000000..191fabd2 --- /dev/null +++ b/.github/.dockstore.yml @@ -0,0 +1,6 @@ +# Dockstore config version, not pipeline version +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /nextflow.config + publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 6a5ef862..22c08721 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,47 +1,125 @@ # nf-core/rnafusion: Contributing Guidelines -Hi there! Many thanks for taking an interest in improving nf-core/rnafusion. +Hi there! +Many thanks for taking an interest in improving nf-core/rnafusion. -We try to manage the required tasks for nf-core/rnafusion using GitHub issues, you probably came to this page when creating one. Please use the pre-filled template to save time. - -However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) - -> If you need help using or modifying nf-core/rnafusion then the best place to ask is on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/). +We try to manage the required tasks for nf-core/rnafusion using GitHub issues, you probably came to this page when creating one. +Please use the pre-filled template to save time. +However, don't be put off by this template - other more general issues and suggestions are welcome! +Contributions to the code are even more welcome ;) +> [!NOTE] +> If you need help using or modifying nf-core/rnafusion then the best place to ask is on the nf-core Slack [#rnafusion](https://nfcore.slack.com/channels/rnafusion) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow -If you'd like to write some code for nf-core/rnafusion, the standard workflow -is as follows: -1. Check that there isn't already an issue about your idea in the - [nf-core/rnafusion issues](https://github.com/nf-core/rnafusion/issues) to avoid - duplicating work. - * If there isn't one already, please create one so that others know you're working on this -2. Fork the [nf-core/rnafusion repository](https://github.com/nf-core/rnafusion) to your GitHub account -3. Make the necessary changes / additions within your forked repository -4. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged. +If you'd like to write some code for nf-core/rnafusion, the standard workflow is as follows: -If you're not used to this workflow with git, you can start with some [basic docs from GitHub](https://help.github.com/articles/fork-a-repo/) or even their [excellent interactive tutorial](https://try.github.io/). +1. Check that there isn't already an issue about your idea in the [nf-core/rnafusion issues](https://github.com/nf-core/rnafusion/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/rnafusion repository](https://github.com/nf-core/rnafusion) to your GitHub account +3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) +4. Use `nf-core pipelines schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged +If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). ## Tests -When you create a pull request with changes, [Travis CI](https://travis-ci.org/) will run automatic tests. + +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command, BUT DON'T FORGET TO ADD THE PARAMETERS cosmic_username AND cosmic_passwd in tests/main.nf.test. + +```bash +nf-test test --profile debug,test,docker --verbose +``` + +When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. There are typically two types of tests that run: -### Lint Tests -The nf-core has a [set of guidelines](http://nf-co.re/guidelines) which all pipelines must adhere to. -To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. +### Lint tests + +`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core pipelines lint ` command. If any failures or warnings are encountered, please follow the listed URL for more documentation. -### Pipeline Tests -Each nf-core pipeline should be set up with a minimal set of test-data. -Travis CI then runs the pipeline on this data to ensure that it exists successfully. +### Pipeline tests + +Each `nf-core` pipeline should be set up with a minimal set of test-data. +`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. If there are any failures then the automated tests fail. -These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code. +These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. + +## Patch + +:warning: Only in the unlikely and regretful event of a release happening with a bug. + +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. ## Getting help -For further information/help, please consult the [nf-core/rnafusion documentation](https://github.com/nf-core/rnafusion#documentation) and don't hesitate to get in touch on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/). + +For further information/help, please consult the [nf-core/rnafusion documentation](https://nf-co.re/rnafusion/usage) and don't hesitate to get in touch on the nf-core Slack [#rnafusion](https://nfcore.slack.com/channels/rnafusion) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Pipeline contribution conventions + +To make the nf-core/rnafusion code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. + +### Adding a new step + +If you wish to contribute a new step, please use the following coding standards: + +1. Define the corresponding input channel into your new process from the expected previous process channel +2. Write the process block (see below). +3. Define the output channel if needed (see below). +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test command in `.github/workflow/ci.yml`. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. + +### Default values + +Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. + +Once there, use `nf-core pipelines schema build` to add to `nextflow_schema.json`. + +### Default processes resource requirements + +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/main/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. + +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. + +### Naming schemes + +Please use the following naming schemes, to make it easy to understand what is going where. + +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` + +### Nextflow version bumping + +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core pipelines bump-version --nextflow . [min-nf-version]` + +### Images and figures + +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/rnafusion/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 405f88fe..00000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,31 +0,0 @@ -Hi there! - -Thanks for telling us about a problem with the pipeline. Please delete this text and anything that's not relevant from the template below: - -#### Describe the bug -A clear and concise description of what the bug is. - -#### Steps to reproduce -Steps to reproduce the behaviour: -1. Command line: `nextflow run ...` -2. See error: _Please provide your error message_ - -#### Expected behaviour -A clear and concise description of what you expected to happen. - -#### System: - - Hardware: [e.g. HPC, Desktop, Cloud...] - - Executor: [e.g. slurm, local, awsbatch...] - - OS: [e.g. CentOS Linux, macOS, Linux Mint...] - - Version [e.g. 7, 10.13.6, 18.3...] - -#### Nextflow Installation: - - Version: [e.g. 0.31.0] - -#### Container engine: - - Engine: [e.g. Conda, Docker or Singularity] - - version: [e.g. 1.0.0] - - Image tag: [e.g. nfcore/rnafusion:1.0.0] - -#### Additional context -Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000..7755ffb4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,50 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: + + - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) + - [nf-core/rnafusion pipeline documentation](https://nf-co.re/rnafusion/usage) + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... + + Some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 23.04.0)_ + * Hardware _(eg. HPC, Desktop, Cloud)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + * Version of nf-core/rnafusion _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..69a065e7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,7 @@ +contact_links: + - name: Join nf-core + url: https://nf-co.re/join + about: Please join the nf-core community here + - name: "Slack #rnafusion channel" + url: https://nfcore.slack.com/channels/rnafusion + about: Discussion about the nf-core/rnafusion pipeline diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index 1f025b77..00000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,16 +0,0 @@ -Hi there! - -Thanks for suggesting a new feature for the pipeline! Please delete this text and anything that's not relevant from the template below: - -#### Is your feature request related to a problem? Please describe. -A clear and concise description of what the problem is. -Ex. I'm always frustrated when [...] - -#### Describe the solution you'd like -A clear and concise description of what you want to happen. - -#### Describe alternatives you've considered -A clear and concise description of any alternative solutions or features you've considered. - -#### Additional context -Add any other context about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 00000000..2c388d2f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the nf-core/rnafusion pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 2eb3f51f..7b812497 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,15 +1,26 @@ -Many thanks to contributing to nf-core/rnafusion! + ## PR checklist - - [ ] This comment contains a description of changes (with reason) - - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If necessary, also make a PR on the [nf-core/rnafusion branch on the nf-core/test-datasets repo]( https://github.com/nf-core/test-datasets/pull/new/nf-core/rnafusion) - - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`). - - [ ] Make sure your code lints (`nf-core lint .`). - - [ ] Documentation in `docs` is updated - - [ ] `CHANGELOG.md` is updated - - [ ] `README.md` is updated - -**Learn more about contributing:** https://github.com/nf-core/rnafusion/tree/master/.github/CONTRIBUTING.md + +- [ ] This comment contains a description of changes (with reason). +- [ ] If you've fixed a bug or added code that should be tested, add tests! +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/rnafusion/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/rnafusion _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core pipelines lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). +- [ ] Usage Documentation in `docs/usage.md` is updated. +- [ ] Output Documentation in `docs/output.md` is updated. +- [ ] `CHANGELOG.md` is updated. +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml deleted file mode 100644 index e052a635..00000000 --- a/.github/markdownlint.yml +++ /dev/null @@ -1,9 +0,0 @@ -# Markdownlint configuration file -default: true, -line-length: false -no-multiple-blanks: 0 -blanks-around-headers: false -blanks-around-lists: false -header-increment: false -no-duplicate-header: - siblings_only: true diff --git a/.github/pylintrc b/.github/pylintrc deleted file mode 100644 index f58b60bb..00000000 --- a/.github/pylintrc +++ /dev/null @@ -1,562 +0,0 @@ -[MASTER] - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code. -extension-pkg-whitelist= - -# Add files or directories to the blacklist. They should be base names, not -# paths. -ignore=CVS - -# Add files or directories matching the regex patterns to the blacklist. The -# regex matches against base names, not paths. -ignore-patterns= - -# Python code to execute, usually for sys.path manipulation such as -# pygtk.require(). -#init-hook= - -# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the -# number of processors available to use. -jobs=1 - -# Control the amount of potential inferred values when inferring a single -# object. This can help the performance when dealing with large functions or -# complex, nested conditions. -limit-inference-results=100 - -# List of plugins (as comma separated values of python modules names) to load, -# usually to register additional checkers. -load-plugins= - -# Pickle collected data for later comparisons. -persistent=yes - -# Specify a configuration file. -#rcfile= - -# When enabled, pylint would attempt to guess common misconfiguration and emit -# user-friendly hints instead of false-positive error messages. -suggestion-mode=yes - -# Allow loading of arbitrary C extensions. Extensions are imported into the -# active Python interpreter and may run arbitrary code. -unsafe-load-any-extension=no - - -[MESSAGES CONTROL] - -# Only show warnings with the listed confidence levels. Leave empty to show -# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. -confidence= - -# Disable the message, report, category or checker with the given id(s). You -# can either give multiple identifiers separated by comma (,) or put this -# option multiple times (only on the command line, not in the configuration -# file where it should appear only once). You can also use "--disable=all" to -# disable everything first and then reenable specific checks. For example, if -# you want to run only the similarities checker, you can use "--disable=all -# --enable=similarities". If you want to run only the classes checker, but have -# no Warning level messages displayed, use "--disable=all --enable=classes -# --disable=W". -disable=print-statement, - parameter-unpacking, - unpacking-in-except, - old-raise-syntax, - backtick, - long-suffix, - old-ne-operator, - old-octal-literal, - import-star-module-level, - non-ascii-bytes-literal, - raw-checker-failed, - bad-inline-option, - locally-disabled, - file-ignored, - suppressed-message, - useless-suppression, - deprecated-pragma, - use-symbolic-message-instead, - apply-builtin, - basestring-builtin, - buffer-builtin, - cmp-builtin, - coerce-builtin, - execfile-builtin, - file-builtin, - long-builtin, - raw_input-builtin, - reduce-builtin, - standarderror-builtin, - unicode-builtin, - xrange-builtin, - coerce-method, - delslice-method, - getslice-method, - setslice-method, - no-absolute-import, - old-division, - dict-iter-method, - dict-view-method, - next-method-called, - metaclass-assignment, - indexing-exception, - raising-string, - reload-builtin, - oct-method, - hex-method, - nonzero-method, - cmp-method, - input-builtin, - round-builtin, - intern-builtin, - unichr-builtin, - map-builtin-not-iterating, - zip-builtin-not-iterating, - range-builtin-not-iterating, - filter-builtin-not-iterating, - using-cmp-argument, - eq-without-hash, - div-method, - idiv-method, - rdiv-method, - exception-message-attribute, - invalid-str-codec, - sys-max-int, - bad-python3-import, - deprecated-string-function, - deprecated-str-translate-call, - deprecated-itertools-function, - deprecated-types-field, - next-method-defined, - dict-items-not-iterating, - dict-keys-not-iterating, - dict-values-not-iterating, - deprecated-operator-function, - deprecated-urllib-function, - xreadlines-attribute, - deprecated-sys-function, - exception-escape, - comprehension-escape, - too-few-public-methods - -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -enable=c-extension-no-member - - -[REPORTS] - -# Python expression which should return a note less than 10 (10 is the highest -# note). You have access to the variables errors warning, statement which -# respectively contain the number of errors / warnings messages and the total -# number of statements analyzed. This is used by the global evaluation report -# (RP0004). -evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) - -# Template used to display messages. This is a python new-style format string -# used to format the message information. See doc for all details. -#msg-template= - -# Set the output format. Available formats are text, parseable, colorized, json -# and msvs (visual studio). You can also give a reporter class, e.g. -# mypackage.mymodule.MyReporterClass. -output-format=text - -# Tells whether to display a full report or only the messages. -reports=no - -# Activate the evaluation score. -score=yes - - -[REFACTORING] - -# Maximum number of nested blocks for function / method body -max-nested-blocks=5 - -# Complete name of functions that never returns. When checking for -# inconsistent-return-statements if a never returning function is called then -# it will be considered as an explicit return statement and no message will be -# printed. -never-returning-functions=sys.exit - - -[BASIC] - -# Naming style matching correct argument names. -argument-naming-style=snake_case - -# Regular expression matching correct argument names. Overrides argument- -# naming-style. -#argument-rgx= - -# Naming style matching correct attribute names. -attr-naming-style=snake_case - -# Regular expression matching correct attribute names. Overrides attr-naming- -# style. -#attr-rgx= - -# Bad variable names which should always be refused, separated by a comma. -bad-names=foo, - bar, - baz, - toto, - tutu, - tata - -# Naming style matching correct class attribute names. -class-attribute-naming-style=any - -# Regular expression matching correct class attribute names. Overrides class- -# attribute-naming-style. -#class-attribute-rgx= - -# Naming style matching correct class names. -class-naming-style=PascalCase - -# Regular expression matching correct class names. Overrides class-naming- -# style. -#class-rgx= - -# Naming style matching correct constant names. -const-naming-style=UPPER_CASE - -# Regular expression matching correct constant names. Overrides const-naming- -# style. -#const-rgx= - -# Minimum line length for functions/classes that require docstrings, shorter -# ones are exempt. -docstring-min-length=-1 - -# Naming style matching correct function names. -function-naming-style=snake_case - -# Regular expression matching correct function names. Overrides function- -# naming-style. -#function-rgx= - -# Good variable names which should always be accepted, separated by a comma. -good-names=i, - j, - k, - ex, - Run, - _ - -# Include a hint for the correct naming format with invalid-name. -include-naming-hint=no - -# Naming style matching correct inline iteration names. -inlinevar-naming-style=any - -# Regular expression matching correct inline iteration names. Overrides -# inlinevar-naming-style. -#inlinevar-rgx= - -# Naming style matching correct method names. -method-naming-style=snake_case - -# Regular expression matching correct method names. Overrides method-naming- -# style. -#method-rgx= - -# Naming style matching correct module names. -module-naming-style=snake_case - -# Regular expression matching correct module names. Overrides module-naming- -# style. -#module-rgx= - -# Colon-delimited sets of names that determine each other's naming style when -# the name regexes allow several styles. -name-group= - -# Regular expression which should only match function or class names that do -# not require a docstring. -no-docstring-rgx=^_ - -# List of decorators that produce properties, such as abc.abstractproperty. Add -# to this list to register other decorators that produce valid properties. -# These decorators are taken in consideration only for invalid-name. -property-classes=abc.abstractproperty - -# Naming style matching correct variable names. -variable-naming-style=snake_case - -# Regular expression matching correct variable names. Overrides variable- -# naming-style. -#variable-rgx= - - -[FORMAT] - -# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. -expected-line-ending-format= - -# Regexp for a line that is allowed to be longer than the limit. -ignore-long-lines=^\s*(# )??$ - -# Number of spaces of indent required inside a hanging or continued line. -indent-after-paren=4 - -# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 -# tab). -indent-string=' ' - -# Maximum number of characters on a single line. -max-line-length=100 - -# Maximum number of lines in a module. -max-module-lines=1000 - -# List of optional constructs for which whitespace checking is disabled. `dict- -# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. -# `trailing-comma` allows a space between comma and closing bracket: (a, ). -# `empty-line` allows space-only lines. -no-space-check=trailing-comma, - dict-separator - -# Allow the body of a class to be on the same line as the declaration if body -# contains single statement. -single-line-class-stmt=no - -# Allow the body of an if to be on the same line as the test if there is no -# else. -single-line-if-stmt=no - - -[LOGGING] - -# Format style used to check logging format string. `old` means using % -# formatting, while `new` is for `{}` formatting. -logging-format-style=old - -# Logging modules to check that the string format arguments are in logging -# function parameter format. -logging-modules=logging - - -[MISCELLANEOUS] - -# List of note tags to take in consideration, separated by a comma. -notes=FIXME, - XXX, - TODO - - -[SIMILARITIES] - -# Ignore comments when computing similarities. -ignore-comments=yes - -# Ignore docstrings when computing similarities. -ignore-docstrings=yes - -# Ignore imports when computing similarities. -ignore-imports=no - -# Minimum lines number of a similarity. -min-similarity-lines=4 - - -[SPELLING] - -# Limits count of emitted suggestions for spelling mistakes. -max-spelling-suggestions=4 - -# Spelling dictionary name. Available dictionaries: none. To make it working -# install python-enchant package.. -spelling-dict= - -# List of comma separated words that should not be checked. -spelling-ignore-words= - -# A path to a file that contains private dictionary; one word per line. -spelling-private-dict-file= - -# Tells whether to store unknown words to indicated private dictionary in -# --spelling-private-dict-file option instead of raising a message. -spelling-store-unknown-words=no - - -[TYPECHECK] - -# List of decorators that produce context managers, such as -# contextlib.contextmanager. Add to this list to register other decorators that -# produce valid context managers. -contextmanager-decorators=contextlib.contextmanager - -# List of members which are set dynamically and missed by pylint inference -# system, and so shouldn't trigger E1101 when accessed. Python regular -# expressions are accepted. -generated-members= - -# Tells whether missing members accessed in mixin class should be ignored. A -# mixin class is detected if its name ends with "mixin" (case insensitive). -ignore-mixin-members=yes - -# Tells whether to warn about missing members when the owner of the attribute -# is inferred to be None. -ignore-none=yes - -# This flag controls whether pylint should warn about no-member and similar -# checks whenever an opaque object is returned when inferring. The inference -# can return multiple potential results while evaluating a Python object, but -# some branches might not be evaluated, which results in partial inference. In -# that case, it might be useful to still emit no-member and other checks for -# the rest of the inferred objects. -ignore-on-opaque-inference=yes - -# List of class names for which member attributes should not be checked (useful -# for classes with dynamically set attributes). This supports the use of -# qualified names. -ignored-classes=optparse.Values,thread._local,_thread._local - -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis. It -# supports qualified module names, as well as Unix pattern matching. -ignored-modules= - -# Show a hint with possible names when a member name was not found. The aspect -# of finding the hint is based on edit distance. -missing-member-hint=yes - -# The minimum edit distance a name should have in order to be considered a -# similar match for a missing member name. -missing-member-hint-distance=1 - -# The total number of similar names that should be taken in consideration when -# showing a hint for a missing member. -missing-member-max-choices=1 - - -[VARIABLES] - -# List of additional names supposed to be defined in builtins. Remember that -# you should avoid defining new builtins when possible. -additional-builtins= - -# Tells whether unused global variables should be treated as a violation. -allow-global-unused-variables=yes - -# List of strings which can identify a callback function by name. A callback -# name must start or end with one of those strings. -callbacks=cb_, - _cb - -# A regular expression matching the name of dummy variables (i.e. expected to -# not be used). -dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ - -# Argument names that match this expression will be ignored. Default to name -# with leading underscore. -ignored-argument-names=_.*|^ignored_|^unused_ - -# Tells whether we should check for unused import in __init__ files. -init-import=no - -# List of qualified module names which can have objects that can redefine -# builtins. -redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io - - -[CLASSES] - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=__init__, - __new__, - setUp - -# List of member names, which should be excluded from the protected access -# warning. -exclude-protected=_asdict, - _fields, - _replace, - _source, - _make - -# List of valid names for the first argument in a class method. -valid-classmethod-first-arg=cls - -# List of valid names for the first argument in a metaclass class method. -valid-metaclass-classmethod-first-arg=cls - - -[DESIGN] - -# Maximum number of arguments for function / method. -max-args=5 - -# Maximum number of attributes for a class (see R0902). -max-attributes=7 - -# Maximum number of boolean expressions in an if statement. -max-bool-expr=5 - -# Maximum number of branch for function / method body. -max-branches=12 - -# Maximum number of locals for function / method body. -max-locals=15 - -# Maximum number of parents for a class (see R0901). -max-parents=7 - -# Maximum number of public methods for a class (see R0904). -max-public-methods=20 - -# Maximum number of return / yield for function / method body. -max-returns=6 - -# Maximum number of statements in function / method body. -max-statements=50 - -# Minimum number of public methods for a class (see R0903). -min-public-methods=2 - - -[IMPORTS] - -# Allow wildcard imports from modules that define __all__. -allow-wildcard-with-all=no - -# Analyse import fallback blocks. This can be used to support both Python 2 and -# 3 compatible code, which means that the block might have code that exists -# only in one or another interpreter, leading to false positives when analysed. -analyse-fallback-blocks=no - -# Deprecated modules which should not be used, separated by a comma. -deprecated-modules=optparse,tkinter.tix - -# Create a graph of external dependencies in the given file (report RP0402 must -# not be disabled). -ext-import-graph= - -# Create a graph of every (i.e. internal and external) dependencies in the -# given file (report RP0402 must not be disabled). -import-graph= - -# Create a graph of internal dependencies in the given file (report RP0402 must -# not be disabled). -int-import-graph= - -# Force import order to recognize a module as part of the standard -# compatibility libraries. -known-standard-library= - -# Force import order to recognize a module as part of a third party library. -known-third-party=enchant - - -[EXCEPTIONS] - -# Exceptions that will emit a warning when being caught. Defaults to -# "Exception". -overgeneral-exceptions=Exception diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml new file mode 100644 index 00000000..baaa0461 --- /dev/null +++ b/.github/workflows/awsfulltest.yml @@ -0,0 +1,56 @@ +name: nf-core AWS full size tests +# This workflow is triggered on PRs opened against the master branch. +# It can be additionally triggered manually with GitHub actions workflow dispatch button. +# It runs the -profile 'test_full' on AWS batch + +on: + pull_request: + branches: + - master + workflow_dispatch: + pull_request_review: + types: [submitted] + +jobs: + run-platform: + name: Run AWS full tests + # run only if the PR is approved by at least 2 reviewers and against the master branch or manually triggered + if: github.repository == 'nf-core/rnafusion' && github.event.review.state == 'approved' && github.event.pull_request.base.ref == 'master' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + steps: + - uses: octokit/request-action@v2.x + id: check_approvals + with: + route: GET /repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/reviews + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - id: test_variables + if: github.event_name != 'workflow_dispatch' + run: | + JSON_RESPONSE='${{ steps.check_approvals.outputs.data }}' + CURRENT_APPROVALS_COUNT=$(echo $JSON_RESPONSE | jq -c '[.[] | select(.state | contains("APPROVED")) ] | length') + test $CURRENT_APPROVALS_COUNT -ge 2 || exit 1 # At least 2 approvals are required + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "all": true, + } + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v4 + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml new file mode 100644 index 00000000..977adb49 --- /dev/null +++ b/.github/workflows/awstest.yml @@ -0,0 +1,37 @@ +name: nf-core AWS test +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. +# It runs the -profile 'test' on AWS batch + +on: + workflow_dispatch: +jobs: + run-platform: + name: Run AWS tests + if: github.repository == 'nf-core/rnafusion' + runs-on: ubuntu-latest + steps: + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "all": true, + "stub": true + } + profiles: test,aws_tower + - uses: actions/upload-artifact@v4 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml new file mode 100644 index 00000000..19704d23 --- /dev/null +++ b/.github/workflows/branch.yml @@ -0,0 +1,44 @@ +name: nf-core branch protection +# This workflow is triggered on PRs to master branch on the repository +# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +on: + pull_request_target: + branches: [master] + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + - name: Check PRs + if: github.repository == 'nf-core/rnafusion' + run: | + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/rnafusion ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + with: + message: | + ## This PR is against the `master` branch :x: + + * Do not close this PR + * Click _Edit_ and change the `base` to `dev` + * This CI test will remain failed until you push a new commit + + --- + + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + Note that even after this, the test will continue to show as failing until you push a new commit. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..a510d72d --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,121 @@ +name: nf-core CI +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + push: + branches: + - dev + pull_request: + branches: + - dev + - master + release: + types: [published] + workflow_dispatch: + +env: + NFT_DIFF: "pdiff" + NFT_DIFF_ARGS: "--line-numbers --expand-tabs=2" + NFT_VER: "0.9.2" + NFT_WORKDIR: "~" + NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + +jobs: + test: + name: "${{ matrix.NXF_VER }} | ${{ matrix.test_profile }} | ${{ matrix.compute_profile }}" + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/rnafusion') }}" + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + NXF_VER: + - "24.04.2" + - "latest-stable" + test_profile: + - "test_stub" + compute_profile: + - "docker" + - "singularity" + - "conda" + isMaster: + - ${{ github.base_ref == 'master' }} + # Exclude conda and singularity on dev + exclude: + - isMaster: false + compute_profile: "conda" + - isMaster: false + compute_profile: "singularity" + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + fetch-depth: 0 + + - name: Set up Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Set up Apptainer + if: matrix.compute_profile == 'singularity' + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: matrix.compute_profile == 'singularity' + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Set up Miniconda + if: matrix.compute_profile == 'conda' + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 + with: + miniconda-version: "latest" + auto-update-conda: true + conda-solver: libmamba + channels: conda-forge,bioconda + + - name: Set up Conda + if: matrix.compute_profile == 'conda' + run: | + echo $(realpath $CONDA)/condabin >> $GITHUB_PATH + echo $(realpath python) >> $GITHUB_PATH + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + architecture: "x64" + + - name: Install pdiff to see diff between nf-test snapshots + run: | + python -m pip install --upgrade pip + pip install pdiff + + - uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} + + - name: Run Tests (${{matrix.NXF_VER}} | ${{matrix.test_profile}} | ${{matrix.compute_profile}}) + run: | + nf-test test \ + --ci \ + --tag ${{matrix.test_profile}} \ + --profile "+${{ matrix.compute_profile }}" \ + --junitxml=test.xml \ + --debug --verbose + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails + with: + report_paths: test.xml + annotate_only: true diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..0b6b1f27 --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..713dc3e7 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,119 @@ +name: Test successful pipeline download with 'nf-core pipelines download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core pipelines download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + + - name: Setup Apptainer + uses: eWaterCycle/setup-apptainer@4bb22c52d4f63406c49e94c804632975787312b3 # v2.0.0 + with: + apptainer-version: 1.3.4 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Make a cache directory for the container images + run: | + mkdir -p ./singularity_container_images + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + run: | + nf-core pipelines download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration 'yes' + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Count the downloaded number of container images + id: count_initial + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Initial container image count: $image_count" + echo "IMAGE_COUNT_INITIAL=$image_count" >> ${GITHUB_ENV} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results + + - name: Count the downloaded number of container images + id: count_afterwards + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Post-pipeline run container image count: $image_count" + echo "IMAGE_COUNT_AFTER=$image_count" >> ${GITHUB_ENV} + + - name: Compare container image counts + run: | + if [ "${{ env.IMAGE_COUNT_INITIAL }}" -ne "${{ env.IMAGE_COUNT_AFTER }}" ]; then + initial_count=${{ env.IMAGE_COUNT_INITIAL }} + final_count=${{ env.IMAGE_COUNT_AFTER }} + difference=$((final_count - initial_count)) + echo "$difference additional container images were \n downloaded at runtime . The pipeline has no support for offline runs!" + tree ./singularity_container_images + exit 1 + else + echo "The pipeline can be downloaded successfully!" + fi diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 00000000..df750880 --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,89 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + fix-linting: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/rnafusion' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true + + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" + + - name: Commit & push changes + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix code linting" + git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/rnafusion/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 00000000..a502573c --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,83 @@ +name: nf-core linting +# This workflow is triggered on pushes and PRs to the repository. +# It runs the `nf-core pipelines lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + run: pre-commit run --all-files + + nf-core: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + + - name: read .nf-core.yml + uses: pietrobolcato/action-read-yaml@1.1.0 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Run nf-core pipelines lint + if: ${{ github.base_ref != 'master' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Run nf-core pipelines lint --release + if: ${{ github.base_ref == 'master' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt + + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 + with: + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml new file mode 100644 index 00000000..42e519bf --- /dev/null +++ b/.github/workflows/linting_comment.yml @@ -0,0 +1,28 @@ +name: nf-core linting comment +# This workflow is triggered after the linting action is complete +# It posts an automated comment to the PR, even if the PR is coming from a fork + +on: + workflow_run: + workflows: ["nf-core linting"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # v6 + with: + workflow: linting.yml + workflow_conclusion: completed + + - name: Get PR number + id: pr_number + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 00000000..c6ba35df --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,75 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" | sed 's/-//g' >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template_version_comment.yml new file mode 100644 index 00000000..e8aafe44 --- /dev/null +++ b/.github/workflows/template_version_comment.yml @@ -0,0 +1,46 @@ +name: nf-core template version comment +# This workflow is triggered on PRs to check if the pipeline template version matches the latest nf-core version. +# It posts a comment to the PR, even if it comes from a fork. + +on: pull_request_target + +jobs: + template_version: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Read template version from .nf-core.yml + uses: nichmor/minimal-read-yaml@v0.0.2 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + + - name: Install nf-core + run: | + python -m pip install --upgrade pip + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Check nf-core outdated + id: nf_core_outdated + run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} + + - name: Post nf-core template version comment + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + if: | + contains(env.OUTPUT, 'nf-core') + with: + repo-token: ${{ secrets.NF_CORE_BOT_AUTH_TOKEN }} + allow-repeats: false + message: | + > [!WARNING] + > Newer version of the nf-core template is available. + > + > Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}. + > Please update your pipeline to the latest version. + > + > For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync). + # diff --git a/.gitignore b/.gitignore index a0a8c962..23b0c7de 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,10 @@ .nextflow* work/ +data/ results/ .DS_Store -tests/test_data +testing/ +testing* *.pyc -.vscode/ \ No newline at end of file +null/ +.nf-test* diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 00000000..46118637 --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,17 @@ +image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + +vscode: + extensions: # based on nf-core.nf-core-extensionpack + #- esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml new file mode 100644 index 00000000..49964714 --- /dev/null +++ b/.nf-core.yml @@ -0,0 +1,21 @@ +bump_version: null +lint: + files_unchanged: + - .github/CONTRIBUTING.md + - .github/PULL_REQUEST_TEMPLATE.md + - conf/igenomes.config + - conf/igenomes_ignored.config +nf_core_version: 3.0.2 +org_path: null +repository_type: pipeline +template: + author: Martin Proks, Annick Renevey + description: Nextflow rnafusion analysis pipeline, part of the nf-core community. + force: false + is_nfcore: true + name: rnafusion + org: nf-core + outdir: . + skip_features: null + version: 4.0.0dev +update: null diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..9e9f0e1c --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "3.0.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..437d763d --- /dev/null +++ b/.prettierignore @@ -0,0 +1,12 @@ +email_template.html +adaptivecard.json +slackreport.json +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc +bin/ diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 00000000..c81f9a76 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1 @@ +printWidth: 120 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 176e3505..00000000 --- a/.travis.yml +++ /dev/null @@ -1,46 +0,0 @@ -sudo: required -language: python -jdk: openjdk8 -services: docker -python: '3.6' -cache: pip -matrix: - fast_finish: true - -before_install: - # PRs to master are only ok if coming from dev branch - - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])' - # Pull the docker image first so the test doesn't wait for this - - docker pull nfcore/rnafusion:dev - # Fake the tag locally so that the pipeline runs properly - # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1) - - docker tag nfcore/rnafusion:dev nfcore/rnafusion:1.0.2 - -install: - # Install Nextflow - - mkdir /tmp/nextflow && cd /tmp/nextflow - - wget -qO- get.nextflow.io | bash - - sudo ln -s /tmp/nextflow/nextflow /usr/local/bin/nextflow - # Install nf-core/tools - - pip install --upgrade pip - - pip install nf-core - # Install markdownlint-cli - - sudo apt-get install npm && npm install -g markdownlint-cli - # Reset - - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests - -env: - - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work - - NXF_VER='' # Plus: get the latest NF version and check that it works - -script: - # Lint the pipeline code - - nf-core lint ${TRAVIS_BUILD_DIR} - # Lint the documentation - - markdownlint ${TRAVIS_BUILD_DIR} -c ${TRAVIS_BUILD_DIR}/.github/markdownlint.yml - # Test pipeline help page - - nextflow run ${TRAVIS_BUILD_DIR} --help - # Test downloading references help page - - nextflow run ${TRAVIS_BUILD_DIR}/download-references.nf --help - # Test downloading singularity images help page - - nextflow run ${TRAVIS_BUILD_DIR}/download-singularity-img.nf --help \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 30a22fa7..e7288298 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,82 +1,538 @@ -# nfcore/rnafusion +# nf-core/rnafusion: Changelog -## nfcore/rnafusion version 1.0.3 - +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## v4.0.0dev - [date] + +### Added + +- Normalized gene expression calculated [#488](https://github.com/nf-core/rnafusion/pull/488) +- Primary assembly now used as main reference genome FASTA file, as recommended by the STAR manual [#488](https://github.com/nf-core/rnafusion/pull/488) +- Use of only ensembl GTF file, not chr.gtf file as GTF reference file [#488](https://github.com/nf-core/rnafusion/pull/488) +- Add nf-test to local module: `ENSEMBL_DOWNLOAD` [#539](https://github.com/nf-core/rnafusion/pull/539) +- Add nf-test to local module: `HGNC_DOWNLOAD` [#540](https://github.com/nf-core/rnafusion/pull/540) +- Add nf-test to local subworkflow: `STRINGTIE_WORKFLOW` [#541](https://github.com/nf-core/rnafusion/pull/541) +- Option to avoid using COSMIC (for example in the case of clinical use) [#547](https://github.com/nf-core/rnafusion/pull/547) +- Add nf-test to nf-core module: `PICARD_COLLECTRNASEQMETRICS` and update module [#551](https://github.com/nf-core/rnafusion/pull/551) +- Add `--skip_vcf` boolean parameter to skip vcf file generation [#554](https://github.com/nf-core/rnafusion/pull/554) +- Add nf-test to local module: `FUSIONREPORT_DOWNLOAD` [#560](https://github.com/nf-core/rnafusion/pull/560) +- Add nf-test to local subworkflow: `QC_WORKFLOW` [#568](https://github.com/nf-core/rnafusion/pull/568) +- Add nf-test to local subworkflow: `TRIM_WORKFLOW` [#572](https://github.com/nf-core/rnafusion/pull/572) +- Add nf-test to local module: `FUSIONREPORT_DETECT`. Improve `FUSIONREPORT_DOWNLOAD` module [#577](https://github.com/nf-core/rnafusion/pull/577) +- Add nf-test to local subworkflow: `ARRIBA_WORKFLOW` [#578](https://github.com/nf-core/rnafusion/pull/578) +- Add nf-test to local module: `STARFUSION_BUILD`. [#585](https://github.com/nf-core/rnafusion/pull/585) +- Add nf-test to local module: `STARFUSION_DETECT`. [#586](https://github.com/nf-core/rnafusion/pull/586) +- Added a new module `CTATSPLICING_STARTOCANCERINTRONS` and a new parameter `--ctatsplicing`. This options creates reports on cancer splicing abberations and requires one or both of `--arriba` and `--starfusion` to be given. [#587](https://github.com/nf-core/rnafusion/pull/587) +- Add parameter `--references_only` when no data should be analysed, but only the references should be built [#505](https://github.com/nf-core/rnafusion/pull/505) +- Add nf-test to local subworkflow: `STARFUSION_WORKFLOW`. [#597](https://github.com/nf-core/rnafusion/pull/597) + +### Changed + +- Updated modules and migrated non-specific modules to nf-core/modules [#484](https://github.com/nf-core/rnafusion/pull/484) +- Updated to nf-core/tools 3.0.2 [#504](https://github.com/nf-core/rnafusion/pull/504) +- Remove local module `RRNA_TRANSCRIPTS` (replaced by nf-core module) [#541](https://github.com/nf-core/rnafusion/pull/541) +- Allow fastq files without a dot before .fn(.gz)/.fastq(.gz) files [#548](https://github.com/nf-core/rnafusion/pull/548) +- Remove double nested folder introduced in [#577](https://github.com/nf-core/rnafusion/pull/577), [#581](https://github.com/nf-core/rnafusion/pull/581) +- Use docker.io and galaxy containers for fusioncatcher and starfusion (incl. fusioninspector) instead of wave as they are not functional on wave [#588](https://github.com/nf-core/rnafusion/pull/588) +- Update STAR-Fusion to 1.14 [#588](https://github.com/nf-core/rnafusion/pull/588) +- Use "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" (to mimic gms/tomte) for GTF_TO_REFFLAT [#505](https://github.com/nf-core/rnafusion/pull/505) +- Integrate reference building in the main workflow [#505](https://github.com/nf-core/rnafusion/pull/505) +- Move from ensembl to gencode base [#505](https://github.com/nf-core/rnafusion/pull/505) +- Update from ensembl 102 to gencode 46 default references [#505](https://github.com/nf-core/rnafusion/pull/505) + +### Fixed + +- Fixed some Nextflow run-commands in the docs [#491](https://github.com/nf-core/rnafusion/pull/491) +- Fixed bug when trying to build indices behind a proxy and wget was unable to download arriba indices [#495](https://github.com/nf-core/rnafusion/issues/495) +- Fixed bug in `FUSIONREPORT_DOWNLOAD` when building references with `--no_cosmic parameter` [#555](https://github.com/nf-core/rnafusion/issues/555) +- Refactor structure in `FUSIONREPORT_DOWNLOAD` to use cosmic credentials in `ext.args` [#556](https://github.com/nf-core/rnafusion/issues/556) +- Fixed bug in nf-core `RRNATRANSCRIPTS` module [#563](https://github.com/nf-core/rnafusion/issues/563) +- Fixed bug in `GFFREAD` that caused output `gffread_fasta` not being produced [#565](https://github.com/nf-core/rnafusion/issues/565) +- Fixed bug in `FUSIONCATCHER_DOWNLOAD` that caused an error when running with singularity profile [#573](https://github.com/nf-core/rnafusion/issues/573) + +### Removed + +- Remove fusionGDB from documentation and fusion-report download stubs [#503](https://github.com/nf-core/rnafusion/pull/503) +- Removed test-build as reference building gets integrated in the main workflow [#505](https://github.com/nf-core/rnafusion/pull/505) +- Removed parameter `--build_references` + +### Parameters + +| Old parameter | New parameter | +| -------------------- | ------------------- | +| | `--no_cosmic` | +| `--build_references` | `--references_only` | + +## v3.0.2 - [2024-04-10] + +### Added + +### Changed + +- Update to nf-tools 2.11.1 [#457] (https://github.com/nf-core/rnafusion/pull/457) +- Update picard collectrnaseqmetrics memory requirements to 0.8x what is provided [#474](https://github.com/nf-core/rnafusion/pull/474) + +### Fixed + +- Fix bug when using parameter "whitelist" [#466](https://github.com/nf-core/rnafusion/pull/466) +- Fix VCF_COLLECT handling when a tool is absent from FUSIONREPORT report [#458](https://github.com/nf-core/rnafusion/pull/458) +- Fix VCF_COLLECT when fusioninspector output is empty but fusionreport is not [#465](https://github.com/nf-core/rnafusion/pull/465) +- Fix VCF_COLLECT bug [#481](https://github.com/nf-core/rnafusion/pull/481) +- Fix conda package for starfusion/detect[#482](https://github.com/nf-core/rnafusion/pull/482) +- Fix logical gate so when stringtie should run but not starfusion, starfusion will not run[#482](https://github.com/nf-core/rnafusion/pull/482) + +### Removed + +## v3.0.1 - [2023-11-29] + +### Added + +### Changed + +- Python3 explicit in vcf_collect [#452](https://github.com/nf-core/rnafusion/pull/452) + +### Fixed + +- software-version.yml and in general version track-keeping was incomplete [#451](https://github.com/nf-core/rnafusion/pull/451) + +### Removed + +## v3.0.0 - [2023-11-27] + +### Added + +- Add picard CollectInsertSizeMetrics to QC workflow [#408](https://github.com/nf-core/rnafusion/pull/408) +- Build CRAM index in the same directory as CRAM files for Arriba and STAR-Fusion [#427](https://github.com/nf-core/rnafusion/pull/427) + +### Changed + +- Replace PICARD_MARKDUPLICATES with GATK4_MARKDUPLICATES [#409](https://github.com/nf-core/rnafusion/pull/409) +- Removed `--fusioninspector_filter` and `--fusionreport_filter` in favor of `--tools_cutoff` (default = 1, no filters applied) [#389](https://github.com/nf-core/rnafusion/pull/389) +- Now publishing convert2bed output to convert2bed to keep the output file [#420](https://github.com/nf-core/rnafusion/pull/420) +- No more checks for existence of samplesheet, which made building references fail (building references uses a fake sample sheet if none is provided) [#420](https://github.com/nf-core/rnafusion/pull/420) +- `--annotate --examine_coding_effect` to collect more data from fusioninspector [#426](https://github.com/nf-core/rnafusion/pull/426) +- Update vcf creation to get positions/chromosomes and strands even when fusions are filtered out by fusioninspector, using the csv output from fusion-report [#443](https://github.com/nf-core/rnafusion/pull/443) +- `Arriba` updated to 2.4.0 [#429](https://github.com/nf-core/rnafusion/pull/429) +- Change megafusion into vcf_collect, taking into account e.g. the annotation and coding effects outputs from fusioninspector, HGNC ids, frame status... [#414](https://github.com/nf-core/rnafusion/pull/414) +- CI tests on `--all` instead of each tool separately, and include trimmed/not trimmed matrix tests [#430](https://github.com/nf-core/rnafusion/pull/430) +- AWS tests on `--all` instead of each tool separately, and include trimmed/not trimmed matrix tests [#433](https://github.com/nf-core/rnafusion/pull/433) +- Update `fusion-report` to 2.1.8, updated COSMIC database to fix 404 error, fix download of references via proxy and removing FusionGDB database [#445](https://github.com/nf-core/rnafusion/pull/445) +- Update documentation [#446](https://github.com/nf-core/rnafusion/pull/446) + +### Fixed + +- Fix channel i/o issue in StringTie workflow and add StringTie in github CI tests [#416](https://github.com/nf-core/rnafusion/pull/416) +- Update modules, and make sure MultiQC displays the QC results properly [#440](https://github.com/nf-core/rnafusion/pull/440) +- Add 'when' condition to run CollectInsertSizeMetrics only when STAR-fusion bam files are available [#444](https://github.com/nf-core/rnafusion/pull/444) + +### Removed + +- Remove `squid` and `pizzly` fusion detection tools [#406](https://github.com/nf-core/rnafusion/pull/406) +- Remove harsh trimming option `--trim` [#413](https://github.com/nf-core/rnafusion/pull/413) +- Remove qualimap rna_seq [#407](https://github.com/nf-core/rnafusion/pull/407) + +## v2.4.0 - [2023/09/22] ### Added -* Added `Arriba 1.1.0` [#63](https://github.com/nf-core/rnafusion/issues/63) +### Changed + +- Use institutional configs by default [#381](https://github.com/nf-core/rnafusion/pull/381) +- Remove redundant indexing in starfusion and qc workflows [#387](https://github.com/nf-core/rnafusion/pull/387) +- Output bai files in same directory as bam files [#387](https://github.com/nf-core/rnafusion/pull/387) +- Update and review documentation [#396](https://github.com/nf-core/rnafusion/pull/396) +- Update picard container for `PICARD_COLLECTRNASEQMETRICS` to 3.0.0 [#395](https://github.com/nf-core/rnafusion/pull/395) +- Renamed output files [#395](https://github.com/nf-core/rnafusion/pull/395) + - `Arriba` visualisation pdf from meta.id to meta.id_combined_fusions_arriba_visualisation + - cram file from output bam of `STAR_FOR_ARRIBA`: meta.id to meta.id_star_for_arriba + - cram file from output bam of `STAR_FOR_STARFUSION`: meta.id to meta.id.star_for_starfusion.Aligned.sortedByCoord.out + - `fusion-report` index.html file to meta.id_fusionreport_index.html + - meta.id.vcf output from `MEGAFUSION` to meta.id_fusion_data.vcf + - Update metro map [#428](https://github.com/nf-core/rnafusion/pull/428) + +### Fixed + +- Tail trimming for reverse reads [#379](https://github.com/nf-core/rnafusion/pull/379) +- Set html files as optional in fusionreport [#380](https://github.com/nf-core/rnafusion/pull/380) +- Provide gene count file by default when running STAR_FOR_STARFUSION [#385](https://github.com/nf-core/rnafusion/pull/385) +- Fix fusion-report issue with MACOXS directories [#386](https://github.com/nf-core/rnafusion/pull/386) +- The fusion lists is updated to contain two branches, one in case no fusions are detected and one for if fusions are detected, that will be used to feed to fusioninspector, megafusion, arriba visualisation [#388](https://github.com/nf-core/rnafusion/pull/388) +- Update fusionreport to 2.1.5p4 to fix 403 error in downloading databases [#403](https://github.com/nf-core/rnafusion/pull/403) + +### Removed + +- `samtools sort` and `samtools index` for `arriba` workflow were dispensable and were removed [#395](https://github.com/nf-core/rnafusion/pull/395) +- Removed trimmed fastqc report from multiqc [#394](https://github.com/nf-core/rnafusion/pull/394) + +## v2.3.0 - [2023/04/24] + +### Added + +- Shell specification to bash +- COSMIC password put into quotes +- Trimmed reads QC in MultiQC +- Add `ARRIBA_VISUALISATION` to processed affected by `--skip_vis` +- Option `fusionreport_filter` to in/activate fusionreport displaying of fusions detected by 2 or more tools ### Changed -* Divided `running_tools` into fusion and visualization tools -* Updated `Squid STAR` version to `2.7.0f` -* Upgraded `STAR-Fusion v1.5.0` to `STAR-Fusion v1.6.0` [#83](https://github.com/nf-core/rnafusion/issues/83) -* Parameter `igenomesIgnore` renamed to `igenome` [#81](https://github.com/nf-core/rnafusion/issues/81) +- `Arriba` visualisation now runs for FusionInspector (combined tools) results, not only `Arriba` results +- Updated metro map with trimming options and placed `Arriba` visualisation after `FusionInspector` +- Exit with error when using squid in combination with any ensembl version different from 102 + +### Fixed + +- Channel issue with indexing of files with using `--cram squid` +- `Arriba` references published in the correct folder -## nfcore/rnafusion version 1.0.2 - 2018/05/13 +### Removed + +## v2.2.0 - [2023/03/13] + +### Added + +- exitStatus 140 now part of the retry strategy +- stubs to all local modules +- `--stringtie` option added with StringTie v2.2.1 to detect splicing events. Not included in `fusion-report` or `fusionInspector` summaries. Included in the `--all` workflow +- Generation of ribosomal RNA interval list with build_references and use it in picard CollectRnaMetrics +- Add csv output to fusionreport +- Trimming workflow using `fastp`: use trimmed reads for all tools +- `whitelist` parameter to add custom fusions to the detected ones and consider the whole for the `fusionInspector` analysis +- Compression to CRAM files for arriba, squid and starfusion workflows (fusioncatcher and pizzly do not produce SAM/BAM files, fusioninspector BAM files are too small to benefit from compression) +- `--qiagen` option to download from QIAGEN instead of COSMIC (use QIAGEN user and password for `cosmic_username` and `cosmic_passwd`) +- Bumped `STAR genomegenerate` time request for building as it was always crashing for most users +- Fixed issue with arriba visualisation parameters [#326](https://github.com/nf-core/rnafusion/issues/326) ### Changed -* Bumped nf-core template to 1.6 [#69](https://github.com/nf-core/rnafusion/pull/69) +- Test profiles unified under 'test' but if the references do not all need to be downloaded, run with `-stub` +- Update CUSTOM_DUMPSOFTWAREVERSIONS to use multiqc version 1.13 +- Updated to nf-core template 2.7.2, with all module updates +- `MultiQC` updated to 1.13a in process dumpsoftwareversion +- Patch fusion-report version with fixed mittelman DB and DB extraction date written into software_version.yaml +- `Arriba` references back to downloading with `build_references` instead of taking from container +- `Arriba` visualisation now running with `Arriba` v2.3.0 +- Updated `STAR-Fusion` to 1.12.0 ### Fixed -* Fixed COSMIC parameters not wrapped in quotes [#75](https://github.com/nf-core/rnafusion/issues/75) -* Implemented output output for fusion tools [#72](https://github.com/nf-core/rnafusion/issues/72) -* Fixed reference download link for STAR-Fusion [#71](https://github.com/nf-core/rnafusion/issues/71) +- AWS megatest to display on nf-core website +- `arriba` visualisation references updated to 2.3.0 +- Removed issue with multiple outputs in samtools view for squid + +### Removed + +- FUSIONINSPECTOR_DEV process as the option fusioninspector_limitSjdbInsertNsj is part of the main starfusion release -## nfcore/rnafusion version 1.0.1 - 2018/04/06 +## v2.1.0 - [2022/07/12] ### Added -* Added support for extra parameters for tools STAR-Fusion, FusionCatcher and fusion-report -* Added example configuration for `singularity` and `docker` -* Added [fusion-report](https://github.com/matq007/fusion-report) into the stack [#62](https://github.com/nf-core/rnafusion/issues/62), [#55](https://github.com/nf-core/rnafusion/issues/55), [#53](https://github.com/nf-core/rnafusion/issues/53), [#51](https://github.com/nf-core/rnafusion/issues/51) -* Added nextflow helper script `download-singularity-img.nf` -* Added nextflow helper script `download-references.nf` -* Added `Jenkinsfile` for in-house testing +- `FusionCatcher` single_end support for single reads ABOVE 130 bp +- `--fusioninspector_only` parameter to run FusionInspector standalone feeding gene list manually with parameter `--fusioninspector_fusions PATH` +- `--fusioncatcher_limitSjdbInsertNsj` parameter to feed --limitSjdbInsertNsj to FusionCatcher +- `--fusioninspector_limitSjdbInsertNsj` parameter to feed --limitSjdbInsertNsj to FusionInspector !!Any other value than default will use the dev version of FusionInspector!! +- OPTIONAL trimming option `--trim` for hard trimming to 75 bp in case of high read-through. Only fusioncatcher uses trimmed reads as STAR-based fusion detection tools are less sensitive to read-through +- `picard` metrics, STAR final log, and QualiMap output included in `MultiQC` report ### Changed -* Updated installation of `FusionCatcher` (available now on bioconda) +- `seq_platform` and `seq_center` changed from boolean to string +- `seq_platform` set to an empty string and `seq_center` set to an empty string if not existing +- Arriba use ensembl references-built starindex independently of `starfusion_build` parameter +- ftp to http protocol for STARFUSION_BUILD process `Pfam-A.hmm.gz` download as ftp causes issues on some servers +- Updated README and usage documentation with more detailed information and metro map +- Arriba use ensembl references-built starindex independently of starfusion_build parameter +- Update of the single-end reads support table in README, added recommendation to use single-end reads only in last resort +- STAR updated to 2.7.10a +- Arriba updated to 2.3.0, references for blacklist and protein domains changed to 2.3.0 from singularity/docker container -> arriba download of references not necessary any more +- multiQC updated to 1.13a +- picard updated to 2.27.4 +- dumpsoftwareversions module updated to use multiqc=1.12 containers ### Fixed -* Fixed empty symlinks (`input.X`) in fusion-report [#68](https://github.com/nf-core/rnafusion/issues/68) -* Fixed FASTA issues [#60](https://github.com/nf-core/rnafusion/issues/60) -* Fixed centralized nf-core/config [#64](https://github.com/nf-core/rnafusion/issues/64) -* Fixed `scrape_software_versions.py` to parse tools versions correctly [#65](https://github.com/nf-core/rnafusion/issues/65) +- FusionInspector does not mix sample reads with fusion lists and meta information from other samples anymore +- Arriba visualisation does not mix sample reads with fusion lists and meta information from other samples anymore +- logging of STAR-fusion and fusionreport version ### Removed -* Removed `Singularity` - -## nfcore/rnafusion version 1.0 - 2018/02/14 - -Version 1.0 marks the first production release of this pipeline under the nf-core flag. The pipeline includes -additional help scripts to download references for fusion tools and Singularity images. - -* Fusion gene detection tools: - * `STAR-Fusion v1.5.0` - * `Fusioncatcher v1.00` - * `Ericscript v0.5.5` - * `Pizzly v0.37.3` - * `Squid v1.5` -* Visualization tools: - * `FusionInspector v1.3.1` -* Other tools: - * `Summary report` - * `FastQ v0.11.8` - * `MultiQC v1.7` - * `FusionGDB updated 2019/01/23` - -## SciLifeLab/NGI-RNAfusion version 0.1 (ARCHIVED) - 2018/10/05 - -Initial release of NGI-RNAfusion, created with the [nf-core](http://nf-co.re/) template. Source code can be found -at [SciLifeLab/NGI-RNAfusion](https://github.com/SciLifeLab/NGI-RNAfusion). The solution works with Docker and Singularity. - -* Tools: - * STAR-Fusion - * Fusioncatcher - * FusionInspector - * Custom tool for fusion comparison - generates intersection of detected fusion genes from all tools +## v2.0.0 - [2022/05/19] + +Update to DSL2 and newer software/reference versions + +### Added + +- Added `qualimap/rnaseq v2.2.2d` from nf-core modules +- Added UCSC `gtfToGenePred v377` +- Added `picard CollectRnaSeqMetrics v2.26.10` +- Added `picard MarkDuplicates v2.26.10` from nf-core modules +- Added `cat/fastqc` from nf-core modules +- Added possibility for manually feeding the results of fusions from different tools to speed-up reruns +- STAR-Fusion references can be downloaded or built but downloaded references are NOT RECOMMENDED as not thoroughly tested (--starfusion_build parameter is true by default, use --starfusion_build false to use downloaded STAR-Fusion references). + +### Changed + +- Upgrade default ensembl version to `102` +- Upgrade to `nf-core/tools v2.3.2` +- Upgrade `Arriba v1.2.0` to `Arriba v2.2.1` +- Upgrade `FusionCatcher v1.20` to `FusionCatcher v1.33` +- Upgrade `STAR-fusion v1.8.1` to `STAR-fusion v1.10.1` +- Upgrade `STAR v2.7.1` to `STAR v2.7.9` +- Upgrade `fusion-report v2.1.3` to `fusion-report v2.1.5` +- Upgrade `kallisto v0.44.0` to `kallisto v0.46.2` +- Upgrade `fastqc v0.11.8` to `fastqc v0.11.9` +- Upgrade `samtools v1.9` to `samtools v1.15.1` +- Upgrade `arriba` references from `v1.2.0` to `v2.1.0` +- Upgrade `fusioncatcher` references from `v98` to `v102` +- Use `arriba` (detect only), `kallisto` and `STAR` from nf-core modules +- Instead of separate script to build the references, added `--build_references` argument in the main +- `--fasta` argument is not required with `--build_references` and set by default to the ensembl references built in the detection workflow +- CI test done on stubs of reference building for subprocesses ensembl and arriba + +Parameters for `STAR` for `arriba` changed from: + +```bash +--readFilesCommand zcat \\ + --outSAMtype BAM Unsorted \\ +--outStd BAM_Unsorted \\ +--outSAMunmapped Within \\ +--outBAMcompression 0 \\ +--outFilterMultimapNmax 1 \\ +--outFilterMismatchNmax 3 \\ +--chimSegmentMin 10 \\ +--chimOutType WithinBAM SoftClip \\ +--chimJunctionOverhangMin 10 \\ +--chimScoreMin 1 \\ +--chimScoreDropMax 30 \\ +--chimScoreJunctionNonGTAG 0 \\ +--chimScoreSeparation 1 \\ +--alignSJstitchMismatchNmax 5 -1 5 5 \\ +--chimSegmentReadGapMax 3 \\ +--sjdbOverhang ${params.read_length - 1} +``` + +to + +```bash +--readFilesCommand zcat \ +--outSAMtype BAM Unsorted \ +--outSAMunmapped Within \ +--outBAMcompression 0 \ +--outFilterMultimapNmax 50 \ +--peOverlapNbasesMin 10 \ +--alignSplicedMateMapLminOverLmate 0.5 \ +--alignSJstitchMismatchNmax 5 -1 5 5 \ +--chimSegmentMin 10 \ +--chimOutType WithinBAM HardClip \ +--chimJunctionOverhangMin 10 \ +--chimScoreDropMax 30 \ +--chimScoreJunctionNonGTAG 0 \ +--chimScoreSeparation 1 \ +--chimSegmentReadGapMax 3 \ +--chimMultimapNmax 50 +``` + +As recommended [here](https://arriba.readthedocs.io/en/latest/workflow/). + +Parameters for `STAR` for `STAR-fusion` changed from: + +```bash +--twopassMode Basic \\ +--outReadsUnmapped None \\ +--chimSegmentMin 12 \\ +--chimJunctionOverhangMin 12 \\ +--alignSJDBoverhangMin 10 \\ +--alignMatesGapMax 100000 \\ +--alignIntronMax 100000 \\ +--chimSegmentReadGapMax 3 \\ +--alignSJstitchMismatchNmax 5 -1 5 5 \\ +--runThreadN ${task.cpus} \\ +--outSAMstrandField intronMotif ${avail_mem} \\ +--outSAMunmapped Within \\ +--outSAMtype BAM Unsorted \\ +--outSAMattrRGline ID:GRPundef \\ +--chimMultimapScoreRange 10 \\ +--chimMultimapNmax 10 \\ +--chimNonchimScoreDropMin 10 \\ +--peOverlapNbasesMin 12 \\ +--peOverlapMMp 0.1 \\ +--readFilesCommand zcat \\ +--sjdbOverhang ${params.read_length - 1} \\ +--chimOutJunctionFormat 1 +``` + +to + +```bash +--outReadsUnmapped None \ +--readFilesCommand zcat \ +--outSAMtype BAM SortedByCoordinate \ +--outSAMstrandField intronMotif \ +--outSAMunmapped Within \ +--chimSegmentMin 12 \ +--chimJunctionOverhangMin 8 \ +--chimOutJunctionFormat 1 \ +--alignSJDBoverhangMin 10 \ +--alignMatesGapMax 100000 \ +--alignIntronMax 100000 \ +--alignSJstitchMismatchNmax 5 -1 5 5 \ +--chimMultimapScoreRange 3 \ +--chimScoreJunctionNonGTAG -4 \ +--chimMultimapNmax 20 \ +--chimNonchimScoreDropMin 10 \ +--peOverlapNbasesMin 12 \ +--peOverlapMMp 0.1 \ +--alignInsertionFlush Right \ +--alignSplicedMateMapLminOverLmate 0 \ +--alignSplicedMateMapLmin 30 \ +--chimOutType Junctions +``` + +`Homo_sapiens.${params.genome}.${ensembl_version}.gtf.gz` used for squid and arriba, `Homo_sapiens.${params.genome}.${ensembl_version}.chr.gtf.gz` used for STAR-fusion and the quality control as the quality control is based on the STAR-fusion alignment. + +### Fixed + +### Removed + +- Ericscript tool +- GRCh37 support. Subdirectory with params.genome are removed +- Running with conda + +## v1.3.0 - [2020/07/15] + +- Using official STAR-Fusion container [#160](https://github.com/nf-core/rnafusion/issues/160) + +### Added + +- Added social preview image [#107](https://github.com/nf-core/rnafusion/issues/107) +- Added support for GRCh37 genome assembly [#77](https://github.com/nf-core/rnafusion/issues/77) + +### Changed + +- Upgrade `fusion-report v2.1.2` to `fusion-report v2.1.3` +- Upgrade `fusion-report v2.1.1` to `fusion-report v2.1.2` +- Upgrade `fusion-report v2.1.0` to `fusion-report v2.1.1` +- Upgrade `Arriba v1.1.0` to `Arriba v1.2.0` +- Upgrade `fusion-report v2.0.2` to `fusion-report v2.1.0` + +### Fixed + +- Missing `strip-components` in `download-references.nf/star-fusion` [#148](https://github.com/nf-core/rnafusion/issues/148) +- Missing version prefix for cdna [#143](https://github.com/nf-core/rnafusion/issues/143) +- `samtools` missing header in empty file for FusionInspector [ref](https://github.com/STAR-Fusion/STAR-Fusion/issues/191) +- Removed `profile` from helper scripts [#139](https://github.com/nf-core/rnafusion/issues/139) +- Wrong url path for `Pfam-A.hmm.gz` [#140](https://github.com/nf-core/rnafusion/issues/140) + +### Removed + +- Removed `scripts/download-singularity-img.sh` and `download-singularity-img.nf` as they are not necessary any more + +--- + +## v1.1.0 - [2020/02/10] + +- Fusion gene detection tools: + - `Arriba v1.1.0` + - `Ericscript v0.5.5` + - `Fusioncatcher v1.20` + - `Pizzly v0.37.3` + - `Squid v1.5` + - `STAR-Fusion v1.6.0` +- Visualization tools: + - `Arriba v1.1.0` + - `FusionInspector v1.3.1` +- Other tools: + - `fusion-report v2.0.1` + - `FastQ v0.11.8` + - `MultiQC v1.7` + - `STAR aligner v2.7.0f` + +### Added + +- Added `Arriba 1.1.0` [#63](https://github.com/nf-core/rnafusion/issues/63) +- Added Batch mode [#54](https://github.com/nf-core/rnafusion/issues/54) + +### Changed + +- Updated examples and configurations +- Upgraded `fusion-report v1.0.0` to `fusion-report v2.0.1` +- Divided `running_tools` into fusion and visualization tools +- Updated `STAR` in `Squid`, `Fusion-Inspector` version to `2.7.0f` +- Upgraded `STAR-Fusion v1.5.0` to `STAR-Fusion v1.6.0` [#83](https://github.com/nf-core/rnafusion/issues/83) +- Parameter `igenomesIgnore` renamed to `igenome` [#81](https://github.com/nf-core/rnafusion/issues/81) +- Finished STAR-Fusion file renaming [#18](https://github.com/nf-core/rnafusion/issues/18) +- Updated logos +- Updated to nf-core `1.8` TEMPLATE + +### Fixed + +- iGenomes optional, but not really [#91](https://github.com/nf-core/rnafusion/issues/91) +- Updated `fusioncatcher` to latest `1.20` version also solving [#95](https://github.com/nf-core/rnafusion/issues/95) + +### Removed + +- Variables `pizzly_fasta` and `pizzly_gtf` have been removed and replaced with `transcript` and `gtf` +- `Jenkisfile`, test configuration, pylintrc configuration +- Removed `igenomes.config` because the pipeline only supports `Ensembl` version + +--- + +## v1.0.2 - [2019/05/13] + +### Changed + +- Bumped nf-core template to 1.6 [#69](https://github.com/nf-core/rnafusion/pull/69) + +### Fixed + +- Fixed COSMIC parameters not wrapped in quotes [#75](https://github.com/nf-core/rnafusion/issues/75) +- Implemented output output for fusion tools [#72](https://github.com/nf-core/rnafusion/issues/72) +- Fixed reference download link for STAR-Fusion [#71](https://github.com/nf-core/rnafusion/issues/71) + +--- + +## v1.0.1 - [2019/04/06] + +### Added + +- Added support for extra parameters for tools STAR-Fusion, FusionCatcher and fusion-report +- Added example configuration for `singularity` and `docker` +- Added [fusion-report](https://github.com/matq007/fusion-report) into the stack [#62](https://github.com/nf-core/rnafusion/issues/62), [#55](https://github.com/nf-core/rnafusion/issues/55), [#53](https://github.com/nf-core/rnafusion/issues/53), [#51](https://github.com/nf-core/rnafusion/issues/51) +- Added nextflow helper script `download-singularity-img.nf` +- Added nextflow helper script `download-references.nf` +- Added `Jenkinsfile` for in-house testing + +### Changed + +- Updated installation of `FusionCatcher` (available now on bioconda) + +### Fixed + +- Fixed empty symlinks (`input.X`) in fusion-report [#68](https://github.com/nf-core/rnafusion/issues/68) +- Fixed FASTA issues [#60](https://github.com/nf-core/rnafusion/issues/60) +- Fixed centralized nf-core/config [#64](https://github.com/nf-core/rnafusion/issues/64) +- Fixed `scrape_software_versions.py` to parse tools versions correctly [#65](https://github.com/nf-core/rnafusion/issues/65) + +### Removed + +- Removed `Singularity` + +--- + +## v1.0 - [2018/02/14] + +Version 1.0 marks the first production release of this pipeline under the nf-core flag. +The pipeline includes additional help scripts to download references for fusion tools and Singularity images. + +Initial release of nf-core/rnafusion, created with the [nf-core](https://nf-co.re/) template. + +### `Added` + +### `Fixed` + +### `Dependencies` + +### `Deprecated` diff --git a/CITATIONS.md b/CITATIONS.md new file mode 100644 index 00000000..5d284554 --- /dev/null +++ b/CITATIONS.md @@ -0,0 +1,88 @@ +# nf-core/rnafusion: Citations + +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) + +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. + +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) + +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +## Pipeline tools + +- [Arriba](https://github.com/suhrig/arriba) + + > Uhrig S, Ellermann J, Walther T, Burkhardt P, Fröhlich M, Hutter B, Toprak UH, Neumann O, Stenzinger A, Scholl C, Fröhling S, Brors B. Accurate and efficient detection of gene fusions from RNA sequencing data. Genome Research. 2021 Mar 31;448-460. doi: 10.1101/gr.257246.119. Epub 2021 Jan 13. PubMed PMID: 33441414. + +- [BEDOPS](https://bedops.readthedocs.io/en/latest/index.html) - convert2bed + + > Neph S, Scott Kuehn M, Reynolds AP, Haugen E, Thurman RE, Johnson AK, Rynes E, Maurano MT, Vierstra J, Thomas S, Sandstrom R, Humbert R, Stamatoyannopoulos JA. BEDOPS: high-performance genomic feature operations. Bioinformatics. 2012 May, 28 (14): 1919-1920. doi: 10.1093/bioinformatics/bts277, PubMed PMID: PMID: 22576172. + +- [FastP](https://academic.oup.com/bioinformatics/article/34/17/i884/5093234) + + > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sept 34:17 (i884–i890), doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086. PubMed Central PMCID: PMC6129281 + +- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + +> Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + +- [FusionCatcher](https://github.com/ndaniel/fusioncatcher) + + > Nicorici D, Satalan M, Edgren H, Kangaspeska S, Murumagi A, Kallioniemi O, Virtanen S, Kilkku O. FusionCatcher – a tool for finding somatic fusion genes in paired-end RNA-sequencing data. BioRxiv, 2014 Nov. doi: 10.1101/011650. + +- [FusionInspector](https://github.com/FusionInspector/FusionInspector) + + > Haas BJ, Dobin A, Ghandi M, Van Arsdale A, Tickle T, Robinson JT, Gillani R, Kasif S, Regev A. Targeted in silico characterization of fusion transcripts in tumor and normal tissues via FusionInspector. Cell Reports Methods. 2023 May 3:5, doi: 10.1016/j.crmeth.2023.100467, PMID: 37323575 + +- [Fusion-report](https://github.com/matq007/fusion-report) + + > Proks M, Genomic Profiling of a Comprehensive Nation-wide Collection of Childhood Solid Tumors, Master Thesis, Supervisors: Grøntved L, Díaz de Ståhl T, Nistér M, Ewels P, Garcia MU, Juhos S, University of Southern Denmark, 2019, unpublished. + +- [GATK4](https://gatk.broadinstitute.org/hc/en-us) + + > Van der Auwera GA. Somatic variation discovery with GATK4. Proceedings of the American Association for Cancer Research Annual Meeting 2017. 2017 Apr 1-5. Cancer Res 2017;77(13 Suppl) doi:10.1158/1538-7445.AM2017-3590 + +- [MegaFusion](https://github.com/J35P312/MegaFusion) + +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + +> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +- [picard-tools](http://broadinstitute.github.io/picard) + +- [SAMtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +- [STAR](https://pubmed.ncbi.nlm.nih.gov/23104886/) + + > Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905. + +- [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion) + + > Haas BJ, Dobin A, Li B, Stransky N, Pochet N, Regev A. Accuracy assessment of fusion transcript detection via read-mapping and de novo fusion transcript assembly-based methods. Genome Biology 2019 Oct;20,213. doi: 10.1186/s13059-019-1842-9 + +- [StringTie](https://ccb.jhu.edu/software/stringtie/index.shtml) + > Shumate A, Wong B, Pertea G, Pertea M. Improved transcriptome assembly using a hybrid of long and short reads with StringTie. PLOS Computational Biology 18, 6 (2022), doi.org/10.1371/journal.pcbi.1009730 + +## Software packaging/containerisation tools + +- [Anaconda](https://anaconda.com) + + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 09226d0d..c089ec78 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,46 +1,182 @@ -# Contributor Covenant Code of Conduct +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: -## Our Standards +- Age +- Ability +- Body size +- Caste +- Familial status +- Gender identity and expression +- Geographical location +- Level of experience +- Nationality and national origins +- Native language +- Neurodiversity +- Race or ethnicity +- Religion +- Sexual identity and orientation +- Socioeconomic status -Examples of behavior that contributes to creating a positive environment include: +Please note that the list above is alphabetised and is therefore not ranked in any order of preference or importance. -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members +## Preamble -Examples of unacceptable behavior by participants include: +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -* The use of sexualized language or imagery and unwelcome sexual attention or advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a professional setting +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. + +nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. + +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. + +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. + +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. + +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. + +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. + +## When and where does this Code of Conduct apply? + +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): + +- Communicating with an official project email address. +- Communicating with community members within the nf-core Slack channel. +- Participating in hackathons organised by nf-core (both online and in-person events). +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. +- Representing nf-core on social media. This includes both official and personal accounts. + +## nf-core cares 😊 + +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): + +- Ask for consent before sharing another community member’s personal information (including photographs) on social media. +- Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) +- Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) +- Focus on what is best for the team and the community. (When in doubt, ask) +- Accept feedback, yet be unafraid to question, deliberate, and learn. +- Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) +- Take breaks when you feel like you need them. +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) + +## nf-core frowns on 😕 + +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: + +- Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. +- “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. +- Spamming or trolling of individuals on social media. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. + +### Online Trolling + +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. + +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. + +## Procedures for reporting CoC violations + +If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. + +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. -Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +All reports will be handled with the utmost discretion and confidentiality. -## Scope +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: -This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. + +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. + +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. + +## Attribution and Acknowledgements + +- The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4) +- The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition) +- The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/) +- The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla) + +## Changelog + +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. -Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. +### v1.1 - October 14th, 2021 -## Attribution +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] +### v1.0 - March 15th, 2021 -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ +- Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index a8967ec0..00000000 --- a/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM nfcore/base -LABEL authors="Martin Proks " \ - description="Docker image containing all requirements for nf-core/rnafusion pipeline" - -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-rnafusion-1.0.2/bin:$PATH diff --git a/Jenkinsfile b/Jenkinsfile deleted file mode 100644 index eeaa816d..00000000 --- a/Jenkinsfile +++ /dev/null @@ -1,47 +0,0 @@ -pipeline { - agent any - - environment { - JENKINS_API = credentials('api') - NXF_VER = 0.32.0 - } - - stages { - stage('Setup environment') { - steps { - sh "pip install nf-core" - sh "docker pull nfcore/rnafusion:1.0.2" - } - } - stage('Lint markdown') { - steps { - sh "markdownlint $WORKSPACE -c $WORKSPACE/.github/markdownlint.yml" - } - } - stage('Nextflow legacy build') { - steps { - // sh "nextflow run kraken,jenkins nf-core/rnafusion" - sh "nextflow run nf-core/rnafusion -r 1.0.2 --help" - sh "nextflow run nf-core/rnafusion/download-references.nf -r 1.0.2 --help" - sh "nextflow run nf-core/rnafusion/download-singularity-img.nf -r 1.0.2 --help" - } - } - stage('Nextflow latest build') { - steps { - // sh "nextflow run kraken,jenkins nf-core/rnafusion" - sh "NXF_VER='' nextflow run nf-core/rnafusion -r 1.0.2 --help" - sh "NXF_VER='' nextflow run nf-core/rnafusion/download-references.nf -r 1.0.2 --help" - sh "NXF_VER='' nextflow run nf-core/rnafusion/download-singularity-img.nf -r 1.0.2 --help" - } - } - } - - post { - failure { - script { - def response = sh(script: "curl -u ${JENKINS_API_USR}:${JENKINS_API_PSW} ${BUILD_URL}/consoleText", returnStdout: true).trim().replace('\n', '
') - def comment = pullRequest.comment("## :rotating_light: Build log output:
${response}
") - } - } - } -} \ No newline at end of file diff --git a/LICENSE b/LICENSE index 8295c89b..86e71fe1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) Rickard Hammarén, Martin Proks +Copyright (c) Martin Proks, Annick Renevey Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 1284e351..809de85f 100644 --- a/README.md +++ b/README.md @@ -1,73 +1,141 @@ -# ![nf-core/rnafusion](https://raw.githubusercontent.com/nf-core/rnafusion/master/docs/images/rnafusion_logo.png) +

+ + + nf-core/rnafusion + +

-**Nextflow rnafusion analysis pipeline, part of the nf-core community.**. +[![GitHub Actions CI Status](https://github.com/nf-core/rnafusion/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/rnafusion/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/rnafusion/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/rnafusion/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnafusion/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.2565517-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.2565517) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Build Status](https://travis-ci.org/nf-core/rnafusion.svg?branch=master)](https://travis-ci.org/nf-core/rnafusion) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.32.0-brightgreen.svg)](https://www.nextflow.io/) -[![DOI](https://zenodo.org/badge/151721952.svg)](https://zenodo.org/badge/latestdoi/151721952) -[![Slack Status](https://nf-core-invite.herokuapp.com/badge.svg)](https://nf-core-invite.herokuapp.com) -[![MIT License](https://img.shields.io/github/license/nf-core/rnafusion.svg)](https://github.com/nf-core/rnafusion/blob/master/LICENSE) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/rnafusion) -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) -[![Docker](https://img.shields.io/docker/automated/nfcore/rnafusion.svg)](https://hub.docker.com/r/nfcore/rnafusion) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnafusion-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/rnafusion)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. +**nf-core/rnafusion** is a bioinformatics best-practice analysis pipeline for RNA sequencing consisting of several tools designed for detecting and visualizing fusion genes. Results from up to 5 fusion callers tools are created, and are also aggregated, most notably in a pdf visualisation document, a vcf data collection file, and html and tsv reports. -| Tool | Single-end reads | CPU (recommended) | RAM (recommended) | -| --------------- |:----------------:|:-----------------:|:-----------------:| -| [Arriba](https://github.com/suhrig/arriba) | **No** | >=16 cores | ~30GB | -| [EricScript](https://sites.google.com/site/bioericscript/getting-started) | **No** | >=16 cores | ~30GB | -| [FusionCatcher](https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md) | Yes | >=16 cores | ~64GB | -| [fusion-report](https://github.com/matq007/fusion-report) | - | - | - | -| [Pizzly](https://github.com/pmelsted/pizzly) | **No** | >=16 cores | ~30GB | -| [Squid](https://github.com/Kingsford-Group/squid) | **No** | >=16 cores | ~30GB | -| [Star-Fusion](https://github.com/STAR-Fusion/STAR-Fusion/wiki) | Yes | >=16 cores | ~30GB | -| [FusionInspector](https://github.com/FusionInspector/FusionInspector/wiki) | **No** | >=16 cores | ~30GB | +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/rnafusion/results). -For available parameters or help run: +In rnafusion the full-sized test includes reference building and fusion detection. The test dataset is taken from [here](https://github.com/nf-core/test-datasets/tree/rnafusion/testdata/human). + +## Pipeline summary + +![nf-core/rnafusion metro map](docs/images/nf-core-rnafusion_metro_map.png) + +### Build references + +`--references_only` triggers a workflow to ONLY build references, otherwise the references are build when the analysis is run: + +1. Download gencode fasta and gtf files +2. Create [STAR](https://github.com/alexdobin/STAR) index +3. Download [Arriba](https://github.com/suhrig/arriba) references +4. Download [FusionCatcher](https://github.com/ndaniel/fusioncatcher) references +5. Download and build [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion) references +6. Download [Fusion-report](https://github.com/Clinical-Genomics/fusion-report) DBs + +#### Main workflow + +1. Input samplesheet check +2. Concatenate fastq files per sample ([cat](http://www.linfo.org/cat.html)) +3. Reads quality control ([FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +4. Optional trimming with [fastp](https://github.com/OpenGene/fastp) +5. Arriba subworkflow + - [STAR](https://github.com/alexdobin/STAR) alignment + - [Arriba](https://github.com/suhrig/arriba) fusion detection +6. STAR-fusion subworkflow + - [STAR](https://github.com/alexdobin/STAR) alignment + - [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion) fusion detection +7. Fusioncatcher subworkflow + - [FusionCatcher](https://github.com/ndaniel/fusioncatcher) fusion detection +8. StringTie subworkflow + - [StringTie](https://ccb.jhu.edu/software/stringtie/) +9. Fusion-report + - Merge all fusions detected by the selected tools with [Fusion-report](https://github.com/Clinical-Genomics/fusion-report) +10. Post-processing and analysis of data + - [FusionInspector](https://github.com/FusionInspector/FusionInspector) + - [Arriba](https://github.com/suhrig/arriba) visualisation + - Collect metrics ([`picard CollectRnaSeqMetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360037057492-CollectRnaSeqMetrics-Picard-), [`picard CollectInsertSizeMetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360037055772-CollectInsertSizeMetrics-Picard-) and ([`picard MarkDuplicates`](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-)) +11. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +12. Compress bam files to cram with [samtools view](http://www.htslib.org/) + +## Usage + +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. + +As the reference building is computationally heavy (> 24h on HPC), it is recommended to test the pipeline with the `-stub` parameter (creation of empty files): + +First, build the references: + +```bash +nextflow run nf-core/rnafusion \ + -profile test, \ + --outdir \ + --references_only \ + -stub +``` + +Then perform the analysis: ```bash -nextflow run nf-core/rnafusion --help +nextflow run nf-core/rnafusion \ + -profile test, \ + --outdir \ + -stub ``` -## Documentation +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). -The nf-core/rnafusion pipeline comes with documentation about the pipeline, found in the `docs/` directory: +> **Notes:** +> +> - Conda is not currently supported; run with singularity or docker. +> - Paths need to be absolute. +> - GRCh38 is the only supported reference. +> - Single-end reads are to be used as last-resort. Paired-end reads are recommended. FusionCatcher cannot be used with single-end reads shorter than 130 bp. -1. [Installation](https://nf-co.re/usage/installation) -2. Pipeline configuration - * [Download references for tools](docs/references.md) - * [Local installation](https://nf-co.re/usage/local_installation) - * [Adding your own system config](https://nf-co.re/usage/adding_own_config) - * [Reference genomes](https://nf-co.re/usage/reference_genomes) -3. [Running the pipeline](docs/usage.md) -4. [Output and how to interpret the results](docs/output.md) -5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/rnafusion/usage) and the [parameter documentation](https://nf-co.re/rnafusion/parameters). -Use predefined configuration for desired Institution cluster provided at [nfcore/config](https://github.com/nf-core/configs) repository. +## Pipeline output + +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/rnafusion/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/rnafusion/output). ## Credits -This pipeline was written by Martin Proks ([@matq007](https://github.com/matq007)) in collaboration with Karolinska Institutet, SciLifeLab and University of Southern Denmark as a master thesis. This is a follow-up development started by Rickard Hammarén ([@Hammarn](https://github.com/Hammarn)). Special thanks goes to all supervisors: Teresita Díaz de Ståhl, PhD., Assoc. Prof.; Monica Nistér, MD, PhD; Maxime U Garcia PhD ([@MaxUlysse](https://github.com/MaxUlysse)); Szilveszter Juhos ([@szilvajuhos](https://github.com/szilvajuhos)); Phil Ewels PhD ([@ewels](https://github.com/ewels)) and Lars Grøntved, PhD., Assoc. Prof. - -## Tool References - -* **STAR-Fusion: Fast and Accurate Fusion Transcript Detection from RNA-Seq** -Brian Haas, Alexander Dobin, Nicolas Stransky, Bo Li, Xiao Yang, Timothy Tickle, Asma Bankapur, Carrie Ganote, Thomas Doak, Natalie Pochet, Jing Sun, Catherine Wu, Thomas Gingeras, Aviv Regev -bioRxiv 120295; doi: [https://doi.org/10.1101/120295](https://doi.org/10.1101/120295) -* D. Nicorici, M. Satalan, H. Edgren, S. Kangaspeska, A. Murumagi, O. Kallioniemi, S. Virtanen, O. Kilkku, **FusionCatcher – a tool for finding somatic fusion genes in paired-end RNA-sequencing data**, bioRxiv, Nov. 2014, -[DOI:10.1101/011650](http://dx.doi.org/10.1101/011650) -* Benelli M, Pescucci C, Marseglia G, Severgnini M, Torricelli F, Magi A. **Discovering chimeric transcripts in paired-end RNA-seq data by using EricScript**. Bioinformatics. 2012; 28(24): 3232-3239. -* **Fusion detection and quantification by pseudoalignment** -Páll Melsted, Shannon Hateley, Isaac Charles Joseph, Harold Pimentel, Nicolas L Bray, Lior Pachter, bioRxiv 166322; doi: [https://doi.org/10.1101/166322](https://doi.org/10.1101/166322) -* **SQUID: transcriptomic structural variation detection from RNA-seq** Cong Ma, Mingfu Shao and Carl Kingsford, Genome Biology, 2018, doi: [https://doi.org/10.1186/s13059-018-1421-5](https://doi.org/10.1186/s13059-018-1421-5) -* **Fusion-Inspector** download: [https://github.com/FusionInspector](https://github.com/FusionInspector) -* **fusion-report** download: [https://github.com/matq007/fusion-report](https://github.com/matq007/fusion-report) -* **FastQC** download: [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) -* **MultiQC** Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. [https://doi.org/10.1093/bioinformatics/btw354](https://doi.org/10.1093/bioinformatics/btw354) Download: [https://multiqc.info/](https://multiqc.info/) - -| | | | -:-:|:-:|:-:| -![nf-core/rnafusion](docs/images/ngi-logo.png) | ![nf-core/rnafusion](docs/images/ki-logo.png) | ![nf-core/rnafusion](docs/images/sdu-logo.png) +nf-core/rnafusion was written by Martin Proks ([@matq007](https://github.com/matq007)), Maxime Garcia ([@maxulysse](https://github.com/maxulysse)) and Annick Renevey ([@rannick](https://github.com/rannick)) + +We thank the following people for their help in the development of this pipeline: + +- [Phil Ewels](https://github.com/ewels) +- [Rickard Hammarén](https://github.com/Hammarn) +- [Alexander Peltzer](https://github.com/apeltzer) +- [Praveen Raj](https://github.com/praveenraj2018) + +## Contributions and Support + +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). + +For further information or help, don't hesitate to get in touch on the [Slack `#rnafusion` channel](https://nfcore.slack.com/channels/rnafusion) (you can join with [this invite](https://nf-co.re/join/slack)). + +## Citations + +If you use nf-core/rnafusion for your analysis, please cite it using the following doi: [10.5281/zenodo.3946477](https://doi.org/10.5281/zenodo.3946477) + +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. + +You can cite the `nf-core` publication as follows: + +> **The nf-core framework for community-curated bioinformatics pipelines.** +> +> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. +> +> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). diff --git a/adapter_fasta_test b/adapter_fasta_test new file mode 100644 index 00000000..e69de29b diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..3ae96b42 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/rnafusion v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/dummy_file_arriba.txt b/assets/dummy_file_arriba.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_fusioncatcher.txt b/assets/dummy_file_fusioncatcher.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_pizzly.txt b/assets/dummy_file_pizzly.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_squid.txt b/assets/dummy_file_squid.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_starfusion.txt b/assets/dummy_file_starfusion.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/email_template.html b/assets/email_template.html index af566215..72ed98c8 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,6 +1,5 @@ - @@ -11,7 +10,9 @@
-

nf-core/rnafusion v${version}

+ + +

nf-core/rnafusion ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 7efad5be..bee92f36 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -1,6 +1,11 @@ -======================================== - nf-core/rnafusion v${version} -======================================== +---------------------------------------------------- + ,--./,-. + ___ __ __ __ ___ /,-._.--~\\ + |\\ | |__ __ / ` / \\ |__) |__ } { + | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, + `._,._,' + nf-core/rnafusion ${version} +---------------------------------------------------- Run Name: $runName <% if (success){ diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 00000000..80452425 --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,27 @@ +id: "nf-core-rnafusion-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/rnafusion Methods Description" +section_href: "https://github.com/nf-core/rnafusion" +plot_type: "html" +data: | +

Methods

+

Data was processed using nf-core/rnafusion v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography} +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml deleted file mode 100644 index bdd59b85..00000000 --- a/assets/multiqc_config.yaml +++ /dev/null @@ -1,9 +0,0 @@ -report_comment: > - This report has been generated by the nf-core/rnafusion - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - nf-core/rnafusion-software-versions: - order: -1000 - -export_plots: true diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml new file mode 100644 index 00000000..963555cf --- /dev/null +++ b/assets/multiqc_config.yml @@ -0,0 +1,38 @@ +report_comment: > + This report has been generated by the nf-core/rnafusion + analysis pipeline. For information about how to interpret these results, please see the + documentation. + +report_section_order: + nf-core-rnafusion-methods-description: + order: -1000 + software_versions: + order: -1001 + nf-core-rnafusion-summary: + order: -1002 + +export_plots: true +disable_version_detection: true + +# Run only these modules +run_modules: + - custom_content + - fastqc + - fastp + - star + - samtools + - picard + - arriba + +module_order: + - fastp + - fastqc: + name: "FastQC (raw)" + info: "This section of the report shows FastQC results before adapter trimming." + path_filters: + - "*.zip" + - fastqc: + name: "FastQC (trimmed)" + info: "This section of the report shows FastQC results after adapter trimming." + path_filters: + - "*_trimmed*.zip" diff --git a/assets/nf-core-rnafusion_logo_light.png b/assets/nf-core-rnafusion_logo_light.png new file mode 100644 index 00000000..fce71278 Binary files /dev/null and b/assets/nf-core-rnafusion_logo_light.png differ diff --git a/assets/schema_input.json b/assets/schema_input.json new file mode 100644 index 00000000..a5500810 --- /dev/null +++ b/assets/schema_input.json @@ -0,0 +1,38 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/rnafusion/master/assets/schema_input.json", + "title": "nf-core/rnafusion pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] + }, + "fastq_1": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "fastq_2": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "strandedness": { + "type": "string", + "enum": ["forward", "reverse", "unstranded", "unknown"], + "errorMessage": "Strandedness has to be forward, reverse, unstranded or unknown" + } + }, + "required": ["sample", "fastq_1", "fastq_2", "strandedness"] + } +} diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index 2d671220..ff6631ad 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -8,6 +8,23 @@ Content-Type: text/html; charset=utf-8 $email_html +--nfcoremimeboundary +Content-Type: image/png;name="nf-core-rnafusion_logo.png" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: inline; filename="nf-core-rnafusion_logo_light.png" + +<% out << new File("$projectDir/assets/nf-core-rnafusion_logo_light.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> + <% if (mqcFile){ def mqcFileObj = new File("$mqcFile") @@ -20,15 +37,15 @@ Content-ID: Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" ${mqcFileObj. - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' )} + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} """ }} %> diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 00000000..66b6e474 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/rnafusion ${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/get_rrna_transcripts.py b/bin/get_rrna_transcripts.py new file mode 100755 index 00000000..670d5f06 --- /dev/null +++ b/bin/get_rrna_transcripts.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +import argparse +import logging +import sys +from pathlib import Path + + +def get_rrna_intervals(file_in, file_out): + """ + Get lines containing ``#`` or ``gene_type rRNA`` or ```` or ``gene_type rRNA_pseudogene`` or ``gene_type MT_rRNA`` + Create output file + + Args: + file_in (pathlib.Path): The given GTF file. + file_out (pathlib.Path): Where the ribosomal RNA GTF file should + be created; always in GTF format. + """ + + patterns = { + "#", + 'transcript_biotype "Mt_rRNA"', + 'transcript_biotype "rRNA"', + 'transcript_biotype "rRNA_pseudogene"', + } + line_starts = {"MT", "1", "2", "3", "4", "5", "6", "7", "8", "9"} + out_lines = [] + with file_in.open() as f: + data = f.readlines() + for line in data: + for pattern in patterns: + if pattern in line: + for line_start in line_starts: + if line.startswith(line_start): + out_lines.append(line) + + with file_out.open(mode="w") as out_file: + out_file.writelines(out_lines) + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Extract ribosomal RNA intervals from a gtf file.", + epilog="Example: python get_rrna_transcripts.py ", + ) + parser.add_argument( + "file_in", + metavar="FILE_IN", + type=Path, + help="Input in GTF format.", + ) + parser.add_argument( + "file_out", + metavar="FILE_OUT", + type=Path, + help="Transformed output intervals in GTF format.", + ) + parser.add_argument( + "-l", + "--log-level", + help="The desired log level (default WARNING).", + choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), + default="WARNING", + ) + return parser.parse_args(argv) + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") + if not args.file_in.is_file(): + logger.error(f"The given input file {args.file_in} was not found!") + sys.exit(2) + args.file_out.parent.mkdir(parents=True, exist_ok=True) + get_rrna_intervals(args.file_in, args.file_out) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/markdown_to_html.r b/bin/markdown_to_html.r deleted file mode 100755 index abe13350..00000000 --- a/bin/markdown_to_html.r +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env Rscript - -# Command line argument processing -args = commandArgs(trailingOnly=TRUE) -if (length(args) < 2) { - stop("Usage: markdown_to_html.r ", call.=FALSE) -} -markdown_fn <- args[1] -output_fn <- args[2] - -# Load / install packages -if (!require("markdown")) { - install.packages("markdown", dependencies=TRUE, repos='http://cloud.r-project.org/') - library("markdown") -} - -base_css_fn <- getOption("markdown.HTML.stylesheet") -base_css <- readChar(base_css_fn, file.info(base_css_fn)$size) -custom_css <- paste(base_css, " -body { - padding: 3em; - margin-right: 350px; - max-width: 100%; -} -#toc { - position: fixed; - right: 20px; - width: 300px; - padding-top: 20px; - overflow: scroll; - height: calc(100% - 3em - 20px); -} -#toc_header { - font-size: 1.8em; - font-weight: bold; -} -#toc > ul { - padding-left: 0; - list-style-type: none; -} -#toc > ul ul { padding-left: 20px; } -#toc > ul > li > a { display: none; } -img { max-width: 800px; } -") - -markdownToHTML( - file = markdown_fn, - output = output_fn, - stylesheet = custom_css, - options = c('toc', 'base64_images', 'highlight_code') -) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py deleted file mode 100755 index 45eb3c4b..00000000 --- a/bin/scrape_software_versions.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -from collections import OrderedDict -import re -import os - -regexes = { - 'nf-core/rnafusion': ['v_pipeline.txt', r"(\S+)"], - 'Nextflow': ['v_nextflow.txt', r"(\S+)"], - 'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"], - 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], - 'Arriba': ['v_arriba.txt', r"arriba=(\S+)"], - 'STAR-Fusion': ['v_star_fusion.txt', r"star-fusion=(\S+)"], - 'FusionCatcher': ['v_fusioncatcher.txt', r"fusioncatcher=(\S+)"], - 'Fusion-Inspector': ['v_fusion_inspector.txt', r"fusion-inspector=(\S+)"], - 'EricScript': ['v_ericscript.txt', r"ericscript=(\S+)"], - 'Pizzly': ['v_pizzly.txt', r"pizzly=(\S+)"], - 'Squid': ['v_squid.txt', r"squid=(\S+)"], - 'fusion-report': ['v_fusion_report.txt', r"fusion-report=(\S+)"] -} -results = OrderedDict() -results['nf-core/rnafusion'] = 'N/A' -results['Nextflow'] = 'N/A' -results['FastQC'] = 'N/A' -results['MultiQC'] = 'N/A' -results['Arriba'] = 'N/A' -results['STAR-Fusion'] = 'N/A' -results['FusionCatcher'] = 'N/A' -results['Fusion-Inspector'] = 'N/A' -results['Pizzly'] = 'N/A' -results['Squid'] = 'N/A' -results['fusion-report'] = 'N/A' - -# Search each file using its regex -for k, v in regexes.items(): - if os.path.exists(v[0]): - with open(v[0]) as x: - versions = x.read() - match = re.search(v[1], versions) - if match: - results[k] = "v{}".format(match.group(1)) - -# Remove software set to false in results -for k in results: - if not results[k]: - del(results[k]) - -# Dump to YAML -print (''' -id: 'software_versions' -section_name: 'nf-core/rnafusion Software Versions' -section_href: 'https://github.com/nf-core/rnafusion' -plot_type: 'html' -description: 'are collected at run time from the software output.' -data: | -
-''') -for k,v in results.items(): - print("
{}
{}
".format(k,v)) -print ("
") - -# Write out regexes as csv file: -with open('software_versions.csv', 'w') as f: - for k,v in results.items(): - f.write("{}\t{}\n".format(k,v)) diff --git a/bin/vcf_collect.py b/bin/vcf_collect.py new file mode 100755 index 00000000..1decbe90 --- /dev/null +++ b/bin/vcf_collect.py @@ -0,0 +1,612 @@ +#!/usr/bin/env python3 + +import argparse +import logging +import sys +from pathlib import Path +import pandas as pd +import ast +import numpy as np +import csv + +logger = logging.getLogger() + + +def vcf_collect( + fusioninspector_in_file: str, + fusionreport_in_file: str, + gtf: str, + fusionreport_csv: str, + hgnc: str, + sample: str, + out_file, +) -> None: + """ + Process FusionInspector and FusionReport data, + merge with GTF from FusionInspector and HGNC database, + and write a VCF file. + + Args: + fusioninspector_in_file (str): Path to FusionInspector input file. + fusionreport_in_file (str): Path to Fusion-report input file. + sample (str): Sample name for the header. + hgnc (str): Path to HGNC file. + gtf (str): Path to output GTF file from FusionInspector in TSV format. + fusionreport_csv (str): Path to Fusion-report CSV output file. + out (str): Output VCF file path. + + Adapted from: https://github.com/J35P312/MegaFusion + """ + merged_df = ( + build_fusioninspector_dataframe(fusioninspector_in_file) + .join(read_build_fusionreport(fusionreport_in_file), how="outer", on="FUSION") + .reset_index() + ) + hgnc_df = build_hgnc_dataframe(hgnc) + df_symbol = merged_df[merged_df["Left_ensembl_gene_id"].isna()] + df_not_symbol = merged_df[merged_df["Left_ensembl_gene_id"].notna()] + + df_not_symbol = hgnc_df.merge( + df_not_symbol, + how="right", + left_on="ensembl_gene_id", + right_on="Left_ensembl_gene_id", + ) + df_symbol = hgnc_df.merge( + df_symbol, how="right", left_on="symbol", right_on="GeneA" + ) + df = pd.concat([df_not_symbol, df_symbol]) + df = df.rename(columns={"hgnc_id": "Left_hgnc_id"}) + + df_symbol = df[df["Right_ensembl_gene_id"].isna()] + df_not_symbol = df[df["Right_ensembl_gene_id"].notna()] + + df_not_symbol = hgnc_df.merge( + df_not_symbol, + how="right", + left_on="ensembl_gene_id", + right_on="Right_ensembl_gene_id", + ) + df_symbol = hgnc_df.merge( + df_symbol, how="right", left_on="symbol", right_on="GeneB" + ) + df = pd.concat([df_not_symbol, df_symbol]) + df = df.rename(columns={"hgnc_id": "Right_hgnc_id"}) + + gtf_df = build_gtf_dataframe(gtf) + all_df = df.merge( + gtf_df, how="left", left_on="CDS_LEFT_ID", right_on="Transcript_id" + ) + all_df[["PosA", "orig_start", "orig_end"]] = ( + all_df[["PosA", "orig_start", "orig_end"]].fillna(0).astype(int) + ) + + all_df = all_df[ + ( + (all_df["PosA"] >= all_df["orig_start"]) + & (all_df["PosA"] <= all_df["orig_end"]) + ) + | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0)) + ] + + all_df.replace("", np.nan, inplace=True) + all_df = all_df.drop_duplicates() + + all_df[["exon_number", "transcript_version"]] = all_df[ + ["exon_number", "transcript_version"] + ].replace(0, np.nan) + # Fill non-empty values within each group for 'exon_number' and 'transcript_version' + all_df["exon_number"] = all_df.groupby("PosA")["exon_number"].transform( + lambda x: x.fillna(method="ffill").fillna(method="bfill") + ) + all_df["transcript_version"] = all_df.groupby("PosA")[ + "transcript_version" + ].transform(lambda x: x.fillna(method="ffill").fillna(method="bfill")) + + all_df = all_df.rename(columns={"transcript_version": "Left_transcript_version"}) + all_df = all_df.rename(columns={"exon_number": "Left_exon_number"}) + all_df = all_df[ + [ + "FUSION", + "GeneA", + "GeneB", + "PosA", + "PosB", + "ChromosomeA", + "ChromosomeB", + "TOOLS_HITS", + "SCORE", + "FOUND_DB", + "FOUND_IN", + "JunctionReadCount", + "SpanningFragCount", + "FFPM", + "PROT_FUSION_TYPE", + "CDS_LEFT_ID", + "CDS_RIGHT_ID", + "Left_transcript_version", + "Left_exon_number", + "Left_hgnc_id", + "Right_hgnc_id", + "Strand1", + "Strand2", + "annots", + ] + ].drop_duplicates() + all_df["CDS_RIGHT_ID"] = all_df["CDS_RIGHT_ID"].astype("str") + all_df = all_df.merge( + gtf_df, how="left", left_on="CDS_RIGHT_ID", right_on="Transcript_id" + ) + all_df[["PosB", "orig_start", "orig_end"]] = all_df[ + ["PosB", "orig_start", "orig_end"] + ].fillna(0) + all_df[["PosB", "orig_start", "orig_end"]] = all_df[ + ["PosB", "orig_start", "orig_end"] + ].astype(int) + all_df = all_df[ + ( + (all_df["PosB"] >= all_df["orig_start"]) + & (all_df["PosB"] <= all_df["orig_end"]) + ) + | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0)) + ] + + all_df[["PosA", "PosB"]] = all_df[["PosA", "PosB"]].replace(0, np.nan) + all_df = all_df.replace("", np.nan) + + all_df[["exon_number", "transcript_version"]] = all_df[ + ["exon_number", "transcript_version"] + ].replace(0, np.nan) + # Fill non-empty values within each group for 'exon_number' and 'transcript_version' + all_df["exon_number"] = all_df.groupby("PosB")["exon_number"].transform( + lambda x: x.fillna(method="ffill").fillna(method="bfill") + ) + all_df["transcript_version"] = all_df.groupby("PosB")[ + "transcript_version" + ].transform(lambda x: x.fillna(method="ffill").fillna(method="bfill")) + + all_df = all_df.rename(columns={"transcript_version": "Right_transcript_version"}) + all_df = all_df.rename(columns={"exon_number": "Right_exon_number"}) + + all_df = all_df[ + [ + "FUSION", + "GeneA", + "GeneB", + "PosA", + "PosB", + "ChromosomeA", + "ChromosomeB", + "TOOLS_HITS", + "SCORE", + "FOUND_DB", + "FOUND_IN", + "JunctionReadCount", + "SpanningFragCount", + "FFPM", + "PROT_FUSION_TYPE", + "CDS_LEFT_ID", + "CDS_RIGHT_ID", + "Left_transcript_version", + "Left_exon_number", + "Left_hgnc_id", + "Right_transcript_version", + "Right_exon_number", + "Right_hgnc_id", + "Strand1", + "Strand2", + "annots", + ] + ].drop_duplicates() + all_df = all_df.rename(columns={"FUSION": "Fusion"}) + all_df = all_df.set_index("Fusion") + + all_df = all_df.combine_first(read_fusionreport_csv(fusionreport_csv)) + + return write_vcf(column_manipulation(all_df), header_def(sample), out_file) + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Validate and transform a tabular samplesheet.", + epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", + ) + parser.add_argument( + "--fusioninspector", + metavar="FUSIONINSPECTOR", + type=Path, + help="FusionInspector output in TSV format.", + ) + parser.add_argument( + "--fusionreport", + metavar="FUSIONREPORT", + type=Path, + help="Fusionreport output in index/html format.", + ) + parser.add_argument( + "--fusionreport_csv", + metavar="FUSIONREPORT_CSV", + type=Path, + help="Fusionreport output in CSV format.", + ) + parser.add_argument( + "--fusioninspector_gtf", + metavar="GTF", + type=Path, + help="FusionInspector GTF output.", + ) + parser.add_argument( + "--hgnc", + metavar="HGNC", + type=Path, + help="HGNC database.", + ) + parser.add_argument( + "--sample", metavar="SAMPLE", type=Path, help="Sample name.", default="Sample" + ) + parser.add_argument( + "--out", + metavar="OUT", + type=Path, + help="VCF output path.", + ) + return parser.parse_args(argv) + + +def header_def(sample: str) -> str: + """ + Define the header of the VCF file + """ + return '##fileformat=VCFv4.1\n\ +##ALT=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{}'.format( + sample + ) + + +def convert_to_list(annots_str: str) -> list: + try: + return ast.literal_eval(annots_str) + except (SyntaxError, ValueError): + return np.nan + + +def build_fusioninspector_dataframe(file: str) -> pd.DataFrame: + """ + Read FusionInspector output from a CSV file, preprocess the data, and set 'FUSION' as the index. + """ + df = pd.read_csv(file, sep="\t") + df = df.rename(columns={"#FusionName": "FUSION"}) + if not (df.empty): + df[["ChromosomeA", "PosA", "Strand1"]] = df["LeftBreakpoint"].str.split( + ":", expand=True + ) + df[["ChromosomeB", "PosB", "Strand2"]] = df["RightBreakpoint"].str.split( + ":", expand=True + ) + df[["LeftGeneName", "Left_ensembl_gene_id"]] = df["LeftGene"].str.split( + "^", expand=True + ) + df[["RightGeneName", "Right_ensembl_gene_id"]] = df["RightGene"].str.split( + "^", expand=True + ) + df["annots"] = ( + df["annots"] + .apply(convert_to_list) + .apply( + lambda x: ( + ",".join(map(str, x)) + if isinstance(x, list) + else str(x) if pd.notna(x) else "" + ) + ) + ) + else: + for i in [ + "ChromosomeA", + "Strand1", + "ChromosomeB", + "Strand2", + "LeftGeneName", + "Left_ensembl_gene_id", + "RightGeneName", + "Right_ensembl_gene_id", + "annots", + ]: + df[i] = "" + for j in [ + "PosA", + "PosB", + ]: + df[j] = np.nan + + return df.set_index(["FUSION"]) + + +def replace_value_with_column_name( + row: pd.Series, value_to_replace: str, column_name: str +) -> str: + """ + Replace a specific value in a row with the corresponding column name. + """ + new_values = "" + for col_name, value in row.items(): + if col_name == column_name: + if value == value_to_replace: + new_values = col_name + else: + new_values = "" + return new_values + + +def concatenate_columns(row: pd.Series) -> str: + """ + Concatenate non-empty values in a row into a single string separated by commas. + """ + non_empty_values = [str(value) for value in row if value != ""] + return ",".join(non_empty_values) + + +def read_build_fusionreport(fusionreport_file: str) -> pd.DataFrame: + """ + Read and preprocess fusion-report data from a file, including handling missing tool columns, + getting the columns with each tool and create a new FOUND_IN column with all the tool hits. + Convert the list of databases in FOUND_DB into a joined string with a comma separator. + Make all column headers uppercase. + """ + with open(fusionreport_file) as f: + from_html = [ + line.split('rows": ')[1] for line in f if 'name="fusion_list' in line + ] + tmp = str(from_html)[2:] + tmp2 = tmp.split(', "tools": ')[0] + fusion_report = pd.DataFrame(ast.literal_eval(tmp2)) + if not "arriba" in fusion_report.columns: + fusion_report["arriba"] = "" + if not "fusioncatcher" in fusion_report.columns: + fusion_report["fusioncatcher"] = "" + if not "starfusion" in fusion_report.columns: + fusion_report["starfusion"] = "" + fusion_report["arriba"] = fusion_report[["arriba"]].apply( + replace_value_with_column_name, args=("true", "arriba"), axis=1 + ) + fusion_report["fusioncatcher"] = fusion_report[["fusioncatcher"]].apply( + replace_value_with_column_name, args=("true", "fusioncatcher"), axis=1 + ) + fusion_report["starfusion"] = fusion_report[["starfusion"]].apply( + replace_value_with_column_name, args=("true", "starfusion"), axis=1 + ) + fusion_report["FOUND_IN"] = fusion_report[ + ["arriba", "starfusion", "fusioncatcher"] + ].apply(concatenate_columns, axis=1) + fusion_report.columns = fusion_report.columns.str.upper() + fusion_report["FOUND_DB"] = fusion_report["FOUND_DB"].apply( + lambda x: ",".join(x) if len(x) > 0 else "" + ) + fusion_report[["GeneA", "GeneB"]] = fusion_report["FUSION"].str.split( + "--", expand=True + ) + + return fusion_report[ + ["FUSION", "GeneA", "GeneB", "TOOLS_HITS", "SCORE", "FOUND_DB", "FOUND_IN"] + ].set_index(["FUSION"]) + + +def read_fusionreport_csv(file: str) -> pd.DataFrame: + df = pd.read_csv(file) + columns_to_iterate = ["starfusion", "arriba", "fusioncatcher"] + for column in columns_to_iterate: + if column not in df.columns: + df[column] = "" + df[["starfusion", "arriba", "fusioncatcher"]] = df[ + ["starfusion", "arriba", "fusioncatcher"] + ].astype("str") + for index, row in df.iterrows(): + for column in columns_to_iterate: + cell_value = row[column] + + if "#" in cell_value: + df.at[index, column] = df.at[index, column].split(",")[0] + df.at[index, column] = df.at[index, column].replace("position: ", "") + df.at[index, "A"] = df.at[index, column].split("#")[0] + df.at[index, "B"] = df.at[index, column].split("#")[1] + df.at[index, "ChromosomeA"] = df.at[index, "A"].split(":")[0] + df.at[index, "PosA"] = df.at[index, "A"].split(":")[1] + if "+" in df.at[index, "A"] or "-" in df.at[index, "A"]: + df.at[index, "StrandA"] = df.at[index, "A"].split(":")[2] + else: + df.at[index, "StrandA"] = "" + + df.at[index, "ChromosomeB"] = df.at[index, "B"].split(":")[0] + df.at[index, "PosB"] = df.at[index, "B"].split(":")[1] + if "+" in df.at[index, "B"] or "-" in df.at[index, "B"]: + df.at[index, "StrandB"] = df.at[index, "B"].split(":")[2] + else: + df.at[index, "StrandB"] = "" + + break + df[["GeneA", "GeneB"]] = df["Fusion"].str.split("--", expand=True) + df = df.set_index("Fusion") + df.to_csv("tmp.csv") + return df[ + [ + "GeneA", + "GeneB", + "ChromosomeA", + "PosA", + "StrandA", + "ChromosomeB", + "PosB", + "StrandB", + ] + ] + + +def column_manipulation(df: pd.DataFrame) -> pd.DataFrame: + """ + Manipulate and prepare DataFrame for VCF file creation. + """ + df["ALT"] = "" + df = df.reset_index() + df["FORMAT"] = "GT:DV:RV:FFPM" + df["ID"] = "." + df["QUAL"] = "." + df["FILTER"] = "PASS" + df["REF"] = "N" + df["INFO"] = "" + df["Sample"] = "" + df["Strand1"] = df["Strand1"].astype(str) + df["JunctionReadCount"] = df["JunctionReadCount"].fillna(0).astype(int).astype(str) + df["SpanningFragCount"] = df["SpanningFragCount"].fillna(0).astype(int).astype(str) + df["FFPM"] = df["FFPM"].fillna(0).astype(float).astype(str) + df["ChromosomeA"] = df["ChromosomeA"].fillna(0).astype(str) + df["ChromosomeB"] = df["ChromosomeB"].fillna(0).astype(str) + df["Left_hgnc_id"] = df["Left_hgnc_id"].fillna(0).astype(int).astype(str) + df["Right_hgnc_id"] = df["Right_hgnc_id"].fillna(0).astype(int).astype(str) + df["Left_exon_number"] = df["Left_exon_number"].fillna(0).astype(int).astype(str) + df["Right_exon_number"] = df["Right_exon_number"].fillna(0).astype(int).astype(str) + df["Left_transcript_version"] = ( + df["Left_transcript_version"].fillna(0).astype(int).astype(str) + ) + df["Right_transcript_version"] = ( + df["Right_transcript_version"].fillna(0).astype(int).astype(str) + ) + df["PosA"] = df["PosA"].fillna(0).astype(int).astype(str) + df["PosB"] = df["PosB"].fillna(0).astype(int).astype(str) + df["PROT_FUSION_TYPE"] = df["PROT_FUSION_TYPE"].replace(".", "nan") + df["CDS_LEFT_ID"] = df["CDS_LEFT_ID"].replace(".", "nan") + df["CDS_RIGHT_ID"] = df["CDS_RIGHT_ID"].replace(".", "nan") + + for index, row in df.iterrows(): + if row["Strand1"] == "-" and row["Strand2"] == "-": + df.loc[index, "ALT"] = f'[{row["ChromosomeB"]}:{row["PosB"]}[N' + elif row["Strand1"] == "+" and row["Strand2"] == "-": + df.loc[index, "ALT"] = f'N]{row["ChromosomeB"]}:{row["PosB"]}]' + elif row["Strand1"] == "-" and row["Strand2"] == "+": + df.loc[index, "ALT"] = f'N]{row["ChromosomeB"]}:{row["PosB"]}]' + else: + df.loc[index, "ALT"] = f'N[{row["ChromosomeB"]}:{row["PosB"]}[' + + df.loc[index, "INFO"] = ( + f"SVTYPE=BND;CHRA={row['ChromosomeA']};CHRB={row['ChromosomeB']};GENEA={row['GeneA']};GENEB={row['GeneB']};" + f"POSA={row['PosA']};POSB={row['PosB']};ORIENTATION={row['Strand1']},{row['Strand2']};FOUND_DB={row['FOUND_DB']};" + f"FOUND_IN={row['FOUND_IN']};TOOL_HITS={row['TOOLS_HITS']};SCORE={row['SCORE']};FRAME_STATUS={row['PROT_FUSION_TYPE']};" + f"TRANSCRIPT_ID_A={row['CDS_LEFT_ID']};TRANSCRIPT_ID_B={row['CDS_RIGHT_ID']};" + f"TRANSCRIPT_VERSION_A={row['Left_transcript_version']};TRANSCRIPT_VERSION_B={row['Right_transcript_version']};" + f"HGNC_ID_A={row['Left_hgnc_id']};HGNC_ID_B={row['Right_hgnc_id']};" + f"EXON_NUMBER_A={row['Left_exon_number']};EXON_NUMBER_B={row['Right_exon_number']};" + f"ANNOTATIONS={row['annots']}" + ) + df.loc[index, "Sample"] = ( + f"./1:{row['JunctionReadCount']}:{row['SpanningFragCount']}:{row['FFPM']}" + ) + + return df + + +def write_vcf(df_to_print: pd.DataFrame, header: str, out_file: str) -> None: + """ + Write a VCF file with a specified DataFrame, header, and output file path. + """ + df_to_print[ + [ + "ChromosomeA", + "PosA", + "ID", + "REF", + "ALT", + "QUAL", + "FILTER", + "INFO", + "FORMAT", + "Sample", + ] + ].to_csv( + path_or_buf=out_file, sep="\t", header=None, index=False, quoting=csv.QUOTE_NONE + ) + + with open(out_file, "r+") as f: + content = f.read() + f.seek(0, 0) + f.write(header.rstrip("\r\n") + "\n" + content) + + +def build_hgnc_dataframe(file: str) -> pd.DataFrame: + """ + Build a DataFrame from HGNC input file, extracting 'hgnc_id' and 'ensembl_gene_id' columns. + """ + df = pd.read_csv(file, sep="\t", low_memory=False) + df["hgnc_id"] = df["hgnc_id"].str.replace("HGNC:", "") + return df[["hgnc_id", "ensembl_gene_id", "symbol"]].dropna() + + +def build_gtf_dataframe(file: str) -> pd.DataFrame: + """ + Build a DataFrame from GTF file converted in TSV, extracting relevant columns. + """ + df = pd.read_csv(file, sep="\t") + df[["fusion_dump", "Transcript_id"]] = df["transcript_id"].str.split( + "^", expand=True + ) + df[["orig_chromosome", "orig_start", "orig_end", "orig_dir"]] = df[ + "orig_coord_info" + ].str.split(",", expand=True) + return df[ + ["Transcript_id", "transcript_version", "exon_number", "orig_start", "orig_end"] + ] + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + if ( + not args.fusioninspector.is_file() + or not args.fusionreport.is_file() + or not args.fusioninspector_gtf + or not args.fusionreport_csv + or not args.hgnc + ): + logger.error( + f"The given input file {args.fusioninspector} or {args.fusionreport} was not found!" + ) + sys.exit(2) + vcf_collect( + args.fusioninspector, + args.fusionreport, + args.fusioninspector_gtf, + args.fusionreport_csv, + args.hgnc, + args.sample, + args.out, + ) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/conf/awsbatch.config b/conf/awsbatch.config deleted file mode 100644 index 14af5866..00000000 --- a/conf/awsbatch.config +++ /dev/null @@ -1,18 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running on AWS batch - * ------------------------------------------------- - * Base config needed for running with -profile awsbatch - */ -params { - config_profile_name = 'AWSBATCH' - config_profile_description = 'AWSBATCH Cloud Profile' - config_profile_contact = 'Alexander Peltzer (@apeltzer)' - config_profile_url = 'https://aws.amazon.com/de/batch/' -} - -aws.region = params.awsregion -process.executor = 'awsbatch' -process.queue = params.awsqueue -executor.awscli = '/home/ec2-user/miniconda/bin/aws' -params.tracedir = './' diff --git a/conf/base.config b/conf/base.config index 52b2cbf3..4dee1250 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,89 +1,54 @@ /* - * ------------------------------------------------- - * nf-core/rnafusion Nextflow base config file - * ------------------------------------------------- - * A 'blank slate' config file, appropriate for general - * use on most high performace compute environments. - * Assumes that all software is installed and available - * on the PATH. Runs in `local` mode - all jobs will be - * run on the logged in environment. - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/rnafusion Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ -process{ +process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 2.h * task.attempt, 'time' ) } + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors - withName: "multiqc|get_software_versions|summary" { - memory = { check_max( 2.GB * task.attempt, 'memory' ) } - cache = false - } - withName:build_star_index { - cpus = { check_max (24, 'cpus')} - memory = { check_max( 80.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - withName: "arriba|arriba_visualization" { - cpus = { check_max (24, 'cpus')} - memory = { check_max( 60.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - container = "nfcore/rnafusion:arriba_v${params.arriba_version}" - } - withName:star_fusion { - cpus = { check_max (24, 'cpus')} - memory = { check_max( 60.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - container = "nfcore/rnafusion:star-fusion_v${params.star_fusion_version}" - } - withName:fusioncatcher { - cpus = { check_max (24, 'cpus')} - memory = { check_max( 64.GB * task.attempt, 'memory' ) } - time = { check_max( 12.h * task.attempt, 'time' ) } - container = "nfcore/rnafusion:fusioncatcher_v${params.fusioncatcher_version}" - } - withName:fusion_inspector { - cpus = { check_max (24, 'cpus')} - memory = { check_max( 60.GB * task.attempt, 'memory' ) } - time = { check_max( 12.h * task.attempt, 'time' ) } - container = "nfcore/rnafusion:fusion-inspector_v${params.fusion_inspector_version}" - } - withName:ericscript { - cpus = { check_max (24, 'cpus')} - memory = { check_max( 60.GB * task.attempt, 'memory' ) } - time = { check_max( 12.h * task.attempt, 'time' ) } - container = "nfcore/rnafusion:ericscript_v${params.ericscript_version}" - } - withName:pizzly { - cpus = { check_max (24, 'cpus')} - memory = { check_max( 60.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - container = "nfcore/rnafusion:pizzly_v${params.pizzly_version}" - } - withName:squid { - cpus = { check_max (24, 'cpus')} - memory = { check_max( 60.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - container = "nfcore/rnafusion:squid_v${params.squid_version}" - } - // Download references - withName:download_star_fusion_ensembl { - cpus = { check_max (4, 'cpus')} - memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { check_max( 12.h * task.attempt, 'time' ) } - container = "nfcore/rnafusion:star-fusion_v${params.star_fusion_version}" - } -} - -params { - // Defaults only, expecting to be overwritten - max_memory = 128.GB - max_cpus = 16 - max_time = 240.h - igenomes_base = 's3://ngi-igenomes/igenomes/' + withLabel:process_single { + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + } + withLabel:process_low { + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } + } + withLabel:process_medium { + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } + } + withLabel:process_high { + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 16.h * task.attempt } + } + withLabel:process_long { + time = { 20.h * task.attempt } + } + withLabel:process_high_memory { + memory = { 200.GB * task.attempt } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } } diff --git a/conf/igenomes.config b/conf/igenomes.config index 1840a7a6..d608b45b 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -1,152 +1,478 @@ /* - * ------------------------------------------------- - * Nextflow config file for iGenomes paths - * ------------------------------------------------- - * Defines reference genomes, using iGenome paths - * Can be used by any config that customises the base - * path using $params.igenomes_base / --igenomes_base - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines reference genomes using iGenome paths. + Can be used by any config that customises the base path using: + $params.igenomes_base / --igenomes_base +---------------------------------------------------------------------------------------- +*/ params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + mirtrace_species = "hsa" + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + mirtrace_species = "hsa" + } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" + mirtrace_species = "mmu" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" + mito_name = "Mt" + mirtrace_species = "ath" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + // mirtrace_species = "bsu" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + mito_name = "MT" + mirtrace_species = "bta" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + mito_name = "MtDNA" + macs_gsize = "9e7" + mirtrace_species = "cel" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + mito_name = "MT" + mirtrace_species = "cfa" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + mito_name = "MT" + mirtrace_species = "dre" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + mito_name = "M" + macs_gsize = "1.2e8" + mirtrace_species = "dme" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" + mito_name = "MT" + // mirtrace_species = "ecb" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + // mirtrace_species = "ecd" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" + mito_name = "MT" + mirtrace_species = "gga" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + // mirtrace_species = "gmx" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + mito_name = "MT" + // mirtrace_species = "mcc" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" + mito_name = "Mt" + mirtrace_species = "osa" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + mito_name = "MT" + mirtrace_species = "ptr" + } + 'Rnor_5.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" + mito_name = "MT" + mirtrace_species = "rno" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" + mito_name = "MT" + mirtrace_species = "rno" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.2e7" + // mirtrace_species = "sce" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.21e7" + // mirtrace_species = "spo" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + mirtrace_species = "sbi" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" + mito_name = "MT" + mirtrace_species = "ssc" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + mito_name = "Mt" + mirtrace_species = "zma" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + mirtrace_species = "hsa" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" + mirtrace_species = "hsa" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" + mirtrace_species = "mmu" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" + mito_name = "chrM" + mirtrace_species = "bta" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "9e7" + mirtrace_species = "cel" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + mito_name = "chrM" + mirtrace_species = "cfa" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.37e9" + mirtrace_species = "dre" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.2e8" + mirtrace_species = "dme" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" + mito_name = "chrM" + // mirtrace_species = "ecb" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" + mito_name = "chrM" + mirtrace_species = "gga" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" + mito_name = "chrM" + mirtrace_species = "ptr" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" + mito_name = "chrM" + mirtrace_species = "rno" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" + readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.2e7" + // mirtrace_species = "sce" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" + mito_name = "chrM" + mirtrace_species = "ssc" + } } - 'GRCh38' { - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - } - 'GRCm38' { - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - } - 'TAIR10' { - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - } - 'EB2' { - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - } - 'UMD3.1' { - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - } - 'WBcel235' { - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - } - 'CanFam3.1' { - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - } - 'GRCz10' { - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - } - 'BDGP6' { - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - } - 'EquCab2' { - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - } - 'EB1' { - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - } - 'Galgal4' { - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - } - 'Gm01' { - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - } - 'Mmul_1' { - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - } - 'IRGSP-1.0' { - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - } - 'CHIMP2.1.4' { - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - } - 'Rnor_6.0' { - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - } - 'R64-1-1' { - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - } - 'EF2' { - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - } - 'Sbi1' { - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - } - 'Sscrofa10.2' { - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - } - 'AGPv3' { - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - } - } } diff --git a/conf/igenomes_ignored.config b/conf/igenomes_ignored.config new file mode 100644 index 00000000..b4034d82 --- /dev/null +++ b/conf/igenomes_ignored.config @@ -0,0 +1,9 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Empty genomes dictionary to use when igenomes is ignored. +---------------------------------------------------------------------------------------- +*/ + +params.genomes = [:] diff --git a/conf/jenkins.config b/conf/jenkins.config deleted file mode 100644 index da8a8f1c..00000000 --- a/conf/jenkins.config +++ /dev/null @@ -1,23 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Testing profile for checking just the syntax - * of the pipeline. To run use: - * nextflow run nf-core/rnafusion -profile jenkins - */ - -executor { - name = 'local' -} - -params { - reads = '/share-data/testing/rnafusion/reads_{1,2}.fq.gz' - genome = 'GRCh38' - star_fusion = true - fusioncatcher = true - ericscript = true - pizzly = true - squid = true - fusion_inspector = true -} \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config new file mode 100644 index 00000000..b3643f48 --- /dev/null +++ b/conf/modules.config @@ -0,0 +1,400 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + publishDir = [ + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + withName: 'ARRIBA_ARRIBA' { + publishDir = [ + path: { "${params.outdir}/arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}.arriba" } + } + + withName: 'ARRIBA_DOWNLOAD' { + publishDir = [ + path: { "${params.genomes_base}/arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'ARRIBA_VISUALISATION' { + ext.when = { {!params.fusioninspector_only} && ({params.starfusion} || {params.all}) } + ext.prefix = { "${meta.id}_combined_fusions_arriba_visualisation" } + publishDir = [ + path: { "${params.outdir}/arriba_visualisation" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ARRIBA_WORKFLOW:.*:CTATSPLICING_STARTOCANCERINTRONS' { + ext.args = {[ + bam ? "--vis" : "", + "--sample_name ${meta.id}", + ].join(" ")} + publishDir = [ + path: { "${params.outdir}/ctatsplicing/arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*STARFUSION_WORKFLOW:.*:CTATSPLICING_STARTOCANCERINTRONS' { + ext.args = {[ + bam ? "--vis" : "", + "--sample_name ${meta.id}", + ].join(" ")} + publishDir = [ + path: { "${params.outdir}/ctatsplicing/starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GENCODE_DOWNLOAD' { + publishDir = [ + path: { "${params.genomes_base}/gencode" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FASTP' { + ext.args = params.trim_tail ? "--trim_tail1 ${params.trim_tail} --trim_tail2 ${params.trim_tail} " : '' + } + + withName: 'FASTQC' { + ext.args = '--quiet' + ext.when = {!params.skip_qc} + publishDir = [ + path: { "${params.outdir}/fastqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FASTQC_FOR_FASTP' { + ext.args = '--quiet' + ext.when = { !params.skip_qc } + ext.prefix = { "${meta.id}_trimmed" } + publishDir = [ + path: { "${params.outdir}/fastqc_for_fastp" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FUSIONCATCHER' { + ext.args = "--limitSjdbInsertNsj ${params.fusioncatcher_limitSjdbInsertNsj}" + } + + withName: 'FUSIONCATCHER_DOWNLOAD' { + publishDir = [ + path: { "${params.genomes_base}/fusioncatcher" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FUSIONINSPECTOR' { + ext.when = { !params.skip_vis } + ext.args = { ${params.fusioninspector_limitSjdbInsertNsj} != 1000000 ? "--STAR_xtra_params \"--limitSjdbInsertNsj ${params.fusioninspector_limitSjdbInsertNsj}\"" : '' } + ext.args2 = '--annotate --examine_coding_effect' + } + + withName: 'FUSIONREPORT' { + ext.when = { !params.skip_vis } + ext.args = { {params.no_cosmic} ? "--no-cosmic" : "" } + ext.args2 = "--export csv" + publishDir = [ + path: { "${params.outdir}/fusionreport/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FUSIONREPORT_DOWNLOAD' { + ext.args = { {params.no_cosmic} ? "--no-cosmic" : " --cosmic_usr ${params.cosmic_username} --cosmic_passwd ${params.cosmic_passwd}" } + ext.args2 = { params.qiagen ? "--qiagen" : "" } + publishDir = [ + path: { "${params.genomes_base}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GATK4_BEDTOINTERVALLIST' { + publishDir = [ + path: { "${params.genomes_base}/gencode" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GATK4_MARKDUPLICATES' { + ext.when = { {!params.skip_qc} && {!params.fusioninspector_only} && ( {params.starfusion}|| {params.all}) } + publishDir = [ + path: { "${params.outdir}/picard" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'GFFREAD' { + ext.args = { '-w -S' } + publishDir = [ + path: { "${params.genomes_base}/gffread" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'GTF_TO_REFFLAT' { + ext.args = "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" + publishDir = [ + path: { "${params.genomes_base}/gencode" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'HGNC_DOWNLOAD' { + publishDir = [ + path: { "${params.genomes_base}/hgnc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + withName: 'MULTIQC' { + ext.when = { !params.skip_qc } + ext.args = {params.multiqc_title} ? "--title \"$params.multiqc_title\"" : '' + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'PICARD_COLLECTRNASEQMETRICS' { + ext.when = { {!params.skip_qc} && {!params.fusioninspector_only} && ( {params.starfusion} || {params.all}) } + + } + + withName: 'PICARD_COLLECTINSERTSIZEMETRICS' { + ext.when = { ${!params.skip_qc} && ${!params.fusioninspector_only} && (${params.starfusion} || ${params.all}) } + ext.prefix = { "${meta.id}_collectinsertsize"} + publishDir = [ + path: { "${params.outdir}/picard" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'SALMON_INDEX' { + publishDir = [ + path: { "${params.genomes_base}/salmon" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'SALMON_QUANT' { + ext.args = { [ + '--gcBias', + '--validateMappings' + ].join(' ') } + publishDir = [ + path: { "${params.outdir}/salmon" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'SAMTOOLS_FAIDX' { + publishDir = [ + path: { "${params.genomes_base}/gencode" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'SAMTOOLS_INDEX_FOR_ARRIBA' { + ext.prefix = { "${meta.id}_star_for_arriba_sorted" } + publishDir = [ + path: { "${params.outdir}/cram_arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SAMTOOLS_SORT_FOR_ARRIBA' { + ext.prefix = { "${meta.id}_star_for_arriba_sorted" } + publishDir = [ + path: { "${params.outdir}/cram_arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SAMTOOLS_VIEW_FOR_ARRIBA' { + ext.args = { "--output-fmt cram" } + ext.prefix = { "${meta.id}_star_for_arriba_sorted" } + publishDir = [ + path: { "${params.outdir}/cram_arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SAMTOOLS_INDEX_FOR_STARFUSION' { + publishDir = [ + path: { "${params.outdir}/star_for_starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SAMTOOLS_INDEX_FOR_STARFUSION_CRAM' { + ext.prefix = { "${meta.id}.star_for_starfusion.Aligned.sortedByCoord.out" } + publishDir = [ + path: { "${params.outdir}/cram_starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SAMTOOLS_VIEW_FOR_STARFUSION' { + ext.args = { "--output-fmt cram" } + ext.prefix = { "${meta.id}.star_for_starfusion.Aligned.sortedByCoord.out" } + publishDir = [ + path: { "${params.outdir}/cram_starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'STAR_FOR_ARRIBA' { + publishDir = [ + path: { "${params.outdir}/star_for_arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = '--readFilesCommand zcat \ + --outSAMtype BAM SortedByCoordinate \ + --outSAMunmapped Within \ + --outBAMcompression 0 \ + --outFilterMultimapNmax 50 \ + --peOverlapNbasesMin 10 \ + --alignSplicedMateMapLminOverLmate 0.5 \ + --alignSJstitchMismatchNmax 5 -1 5 5 \ + --chimSegmentMin 10 \ + --chimOutType WithinBAM HardClip Junctions \ + --chimJunctionOverhangMin 10 \ + --chimScoreDropMax 30 \ + --chimScoreJunctionNonGTAG 0 \ + --chimScoreSeparation 1 \ + --chimSegmentReadGapMax 3 \ + --chimMultimapNmax 50' + } + + withName: 'STAR_FOR_STARFUSION' { + publishDir = [ + path: { "${params.outdir}/star_for_starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = '--twopassMode Basic \ + --outReadsUnmapped None \ + --readFilesCommand zcat \ + --outSAMtype BAM SortedByCoordinate \ + --outSAMstrandField intronMotif \ + --outSAMunmapped Within \ + --chimSegmentMin 12 \ + --chimJunctionOverhangMin 8 \ + --chimOutJunctionFormat 1 \ + --alignSJDBoverhangMin 10 \ + --alignMatesGapMax 100000 \ + --alignIntronMax 100000 \ + --alignSJstitchMismatchNmax 5 -1 5 5 \ + --chimMultimapScoreRange 3 \ + --chimScoreJunctionNonGTAG -4 \ + --chimMultimapNmax 20 \ + --chimNonchimScoreDropMin 10 \ + --peOverlapNbasesMin 12 \ + --peOverlapMMp 0.1 \ + --alignInsertionFlush Right \ + --alignSplicedMateMapLminOverLmate 0 \ + --alignSplicedMateMapLmin 30 \ + --chimOutType Junctions \ + --quantMode GeneCounts' + } + + withName: 'STAR_GENOMEGENERATE' { + ext.args = "--sjdbOverhang ${params.read_length - 1}" + cpus = { 24 * task.attempt } + memory = { 100.GB * task.attempt } + time = { 2.d * task.attempt } + publishDir = [ + path: { "${params.genomes_base}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'NFCORE_RNAFUSION:BUILD_REFERENCES:STARFUSION_BUILD' { + cpus = { 24 * task.attempt } + memory = { 100.GB * task.attempt } + time = { 2.d * task.attempt } + publishDir = [ + path: { "${params.genomes_base}/starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = "--max_readlength ${params.read_length} --human_gencode_filter" + } + + withName: 'STARFUSION_DOWNLOAD' { + cpus = { 2 * task.attempt } + memory = { 24.GB * task.attempt } + time = { 6.h * task.attempt } + publishDir = [ + path: { "${params.genomes_base}/starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'STRINGTIE_MERGE' { + publishDir = [ + path: { "${params.outdir}/stringtie/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'VCF_COLLECT' { + ext.when = { {!params.fusioninspector_only} && {!params.skip_vcf} } + } + + withName: '.*' { + ext.when = { !params.references_only || task.process.contains('BUILD_REFERENCES') } + } +} diff --git a/conf/test.config b/conf/test.config index 6f076005..2d403c75 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,29 +1,30 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/rnafusion -profile test - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. -executor { - name = 'local' -} + Use as follows: + nextflow run nf-core/rnafusion -profile test, --outdir -stub + +---------------------------------------------------------------------------------------- +*/ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on Travis - max_cpus = 2 - max_memory = 6.GB - max_time = 48.h - // Input data - test = true - reads = 'tests/reads_{1,2}.fq.gz' - // Genome references - fasta = 'tests/genome.fa' - gtf = 'tests/genes.gtf' - star_index = 'tests/star_index' - databases = '/tests/databases' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' + all = true + no_cosmic = true +} + +// Limit and standardize resources for github actions and reproducibility +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] } diff --git a/conf/test_build.config b/conf/test_build.config new file mode 100644 index 00000000..e577ada5 --- /dev/null +++ b/conf/test_build.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/rnafusion -profile test_build, --outdir -stub + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test build references profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + references_only = true + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' + no_cosmic = true + all = true + + skip_salmon_index = true + starfusion_build = true + fusion_annot_lib = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz' + +} + +// Limit and standardize resources for github actions and reproducibility +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} diff --git a/conf/test_cosmic.config b/conf/test_cosmic.config new file mode 100644 index 00000000..9cc1bcda --- /dev/null +++ b/conf/test_cosmic.config @@ -0,0 +1,31 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/rnafusion -profile test_cosmic, --outdir -stub + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test cosmic profile' + config_profile_description = 'Minimal test cosmic dataset to check pipeline function' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' + all = true + cosmic_username = secrets.COSMIC_USERNAME + cosmic_passwd = secrets.COSMIC_PASSWD +} + +// Limit and standardize resources for github actions and reproducibility +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} diff --git a/conf/test_full.config b/conf/test_full.config new file mode 100644 index 00000000..cfcb7865 --- /dev/null +++ b/conf/test_full.config @@ -0,0 +1,22 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + Use as follows: + nextflow run nf-core/sarek -profile test_full, --outdir +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full size test + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' + all = true + // TODO + // test_full can't run currently because the references must be given and they are not available. + // This profile should be updated once they get uploaded. + + } diff --git a/docs/README.md b/docs/README.md index 1a6b6ba9..0c1d417d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,14 +1,10 @@ # nf-core/rnafusion: Documentation -The nf-core/rnafusion documentation is split into the following files: +The nf-core/rnafusion documentation is split into the following pages: -1. [Installation](https://nf-co.re/usage/installation) -2. Pipeline configuration - * [Download references for tools](references.md) - * [Local installation](https://nf-co.re/usage/local_installation) - * [Adding your own system config](https://nf-co.re/usage/adding_own_config) - * [Reference genomes](https://nf-co.re/usage/reference_genomes) - * [UPPMAX configuration](configuration/uppmax.md) -3. [Running the pipeline](usage.md) -4. [Output and how to interpret the results](output.md) -5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. + +You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/configuration/adding_your_own.md b/docs/configuration/adding_your_own.md deleted file mode 100644 index a3a2b7d2..00000000 --- a/docs/configuration/adding_your_own.md +++ /dev/null @@ -1,88 +0,0 @@ -# nf-core/rnafusion: Configuration for other clusters - -It is entirely possible to run this pipeline on other clusters, though you will need to set up your own config file so that the pipeline knows how to work with your cluster. - -> If you think that there are other people using the pipeline who would benefit from your configuration (eg. other common cluster setups), please let us know. We can add a new configuration and profile to our existing configuration repository [nf-core/configs](https://github.com/nf-core/configs). This would allow everyone to run the pipeline with just specifying `-profile `. - -If you are the only person to be running this pipeline, you can create your config file as `~/.nextflow/config` and it will be applied every time you run Nextflow. Alternatively, save the file anywhere and reference it when running the pipeline with `-c path/to/config` (see the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more). - -A basic configuration comes with the pipeline, which runs by default (the `standard` config profile - see [`conf/base.config`](../conf/base.config)). This means that you only need to configure the specifics for your system and overwrite any defaults that you want to change. - -## Cluster Environment - -By default, pipeline uses the `local` Nextflow executor - in other words, all jobs are run in the login session. If you're using a simple server, this may be fine. If you're using a compute cluster, this is bad as all jobs will run on the head node. - -To specify your cluster environment, add the following line to your config file: - -```nextflow -process.executor = 'YOUR_SYSTEM_TYPE' -``` - -Many different cluster types are supported by Nextflow. For more information, please see the [Nextflow documentation](https://www.nextflow.io/docs/latest/executor.html). - -Note that you may need to specify cluster options, such as a project or queue. To do so, use the `clusterOptions` config option: - -```nextflow -process { - executor = 'SLURM' - clusterOptions = '-A myproject' -} -``` - -## Software Requirements - -To run the pipeline, several software packages are required. How you satisfy these requirements is essentially up to you and depends on your system. If possible, we _highly_ recommend using either Docker or Singularity. - -Please see the [`installation documentation`](../installation.md) for how to run using the below as a one-off. These instructions are about configuring a config file for repeated use. - -### Docker - -Docker is a great way to run nf-core/rnafusion, as it manages all software installations and allows the pipeline to be run in an identical software environment across a range of systems. - -Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.html) with Docker, and beyond installing the two tools, not much else is required - nextflow will automatically fetch the [nfcore/rnafusion](https://hub.docker.com/r/nfcore/rnafusion/) image that we have created and is hosted at dockerhub at run time. - -To add docker support to your own config file, add the following: - -```nextflow -docker.enabled = true -process.container = "nfcore/rnafusion" -``` - -Note that the dockerhub organisation name annoyingly can't have a hyphen, so is `nfcore` and not `nf-core`. - -### Singularity image - -Many HPC environments are not able to run Docker due to security issues. -[Singularity](http://singularity.lbl.gov/) is a tool designed to run on such HPC systems which is very similar to Docker. - -To specify singularity usage in your pipeline config file, add the following: - -```nextflow -singularity.enabled = true -process.container = "docker://nf-core/rnafusion" -``` - -If you intend to run the pipeline offline, nextflow will not be able to automatically download the singularity image for you. -Instead, you'll have to do this yourself manually first, transfer the image file and then point to that. - -First, pull the image file where you have an internet connection: - -```bash -singularity pull --name nf-core-rnafusion.simg docker://nf-core/rnafusion -``` - -Then transfer this file and point the config file to the image: - -```nextflow -singularity.enabled = true -process.container = "/path/to/nf-core-rnafusion.simg" -``` - -### Conda - -If you're not able to use Docker or Singularity, you can instead use conda to manage the software requirements. -To use conda in your own config file, add the following: - -```nextflow -process.conda = "$baseDir/environment.yml" -``` diff --git a/docs/configuration/local.md b/docs/configuration/local.md deleted file mode 100644 index fad35bc0..00000000 --- a/docs/configuration/local.md +++ /dev/null @@ -1,49 +0,0 @@ -# nfcore/rnafusion: Local Configuration - -Local installation is not advised as some tools require at least **60GB** of RAM. For more details about tool memory consumption see section [tools](../tools.md). If you satisfy this requirement, we highly recommend using either Docker or Singularity. - -## Docker - -Docker is a great way to run nfcore/rnafusion, as it manages all software installations and allows the pipeline to be run in an identical software environment across a range of systems. - -Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.html) with Docker, and beyond installing the two tools, not much else is required. The nfcore/rnafusion profile comes with a configuration profile for docker, making it very easy to use. This also comes with the required presets to use the AWS iGenomes resource, meaning that if using common reference genomes you just specify the reference ID and it will be autaomtically downloaded from AWS S3. - -First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/) - -Then, simply run the analysis pipeline: - -```bash -nextflow run nf-core/rnafusion -profile docker --genome '' --reads '' -``` - -Nextflow will recognise `nf-core/rnafusion` and download the pipeline from GitHub. The `-profile docker` configuration lists the [nfcore/rnafusion](https://hub.docker.com/r/nfcore/rnafusion/) image that we have created and is hosted at dockerhub, and this is downloaded. - -For more information about how to work with reference genomes, see [`docs/configuration/reference_genomes.md`](docs/configuration/reference_genomes.md). - -### Pipeline versions - -The public docker images are tagged with the same version numbers as the code, which you can use to ensure reproducibility. When running the pipeline, specify the pipeline version with `-r`, for example `-r 1.0.1`. This uses pipeline code and docker image from this tagged version. - -## Singularity image - -Many HPC environments are not able to run Docker due to security issues. [Singularity](http://singularity.lbl.gov/) is a tool designed to run on such HPC systems which is very similar to Docker. Even better, it can use create images directly from dockerhub. - -To use the singularity image for a single run, use `-with-singularity`. This will download the docker container from dockerhub and create a singularity image for you dynamically. - -If you intend to run the pipeline offline, nextflow will not be able to automatically download the singularity image for you. Instead, you'll have to do this yourself manually first, transfer the image file and then point to that. - -First, pull the image file where you have an internet connection: - -> NB: The "tag" at the end of this command corresponds to the pipeline version. -> Here, we're pulling the docker image for version 1.0.1 of the nfcore/rnafusion pipeline -> Make sure that this tag corresponds to the version of the pipeline that you're using - -```bash -singularity pull --name nfcore-rnafusion-1.0.1.img docker://nfcore/rnafusion:1.0.1 -``` - -Then transfer this file and run the pipeline with this path: - -```bash -nextflow run /path/to/nfcore-rnafusion -with-singularity /path/to/nfcore-rnafusion-1.0.1.img -``` diff --git a/docs/configuration/reference_genomes.md b/docs/configuration/reference_genomes.md deleted file mode 100644 index afb0c989..00000000 --- a/docs/configuration/reference_genomes.md +++ /dev/null @@ -1,52 +0,0 @@ -# nf-core/rnafusion: Reference Genomes Configuration - -The nf-core/rnafusion pipeline needs a reference genome for alignment and annotation. - -These paths can be supplied on the command line at run time (see the [usage docs](../usage.md)), -but for convenience it's often better to save these paths in a nextflow config file. -See below for instructions on how to do this. -Read [Adding your own system](adding_your_own.md) to find out how to set up custom config files. - -## Adding paths to a config file - -Specifying long paths every time you run the pipeline is a pain. -To make this easier, the pipeline comes configured to understand reference genome keywords which correspond to preconfigured paths, meaning that you can just specify `--genome ID` when running the pipeline. - -Note that this genome key can also be specified in a config file if you always use the same genome. - -To use this system, add paths to your config file using the following template: - -```nextflow -params { - genomes { - 'YOUR-ID' { - fasta = '/genome.fa' - } - 'OTHER-GENOME' { - // [..] - } - } - // Optional - default genome. Ignored if --genome 'OTHER-GENOME' specified on command line - genome = 'YOUR-ID' -} -``` - -You can add as many genomes as you like as long as they have unique IDs. - -## illumina iGenomes - -To make the use of reference genomes easier, illumina has developed a centralised resource called [iGenomes](https://support.illumina.com/sequencing/sequencing_software/igenome.html). -Multiple reference index types are held together with consistent structure for multiple genomes. - -We have put a copy of iGenomes up onto AWS S3 hosting and this pipeline is configured to use this by default. -The hosting fees for AWS iGenomes are currently kindly funded by a grant from Amazon. -The pipeline will automatically download the required reference files when you run the pipeline. -For more information about the AWS iGenomes, see [https://ewels.github.io/AWS-iGenomes/](https://ewels.github.io/AWS-iGenomes/). - -Downloading the files takes time and bandwidth, so we recommend making a local copy of the iGenomes resource. -Once downloaded, you can customise the variable `params.igenomes_base` in your custom configuration file to point to the reference location. -For example: - -```nextflow -params.igenomes_base = '/path/to/data/igenomes/' -``` diff --git a/docs/configuration/uppmax.md b/docs/configuration/uppmax.md deleted file mode 100644 index 1feeefc1..00000000 --- a/docs/configuration/uppmax.md +++ /dev/null @@ -1,62 +0,0 @@ -# nfcore/rnafusion: UPPMAX Configuration - -The pipeline comes bundled with configurations to use the [Swedish UPPMAX](https://www.uppmax.uu.se/) clusters (tested on `milou`, `rackham`, `bianca` and `irma`). As such, you shouldn't need to add any custom configuration - everything _should_ work out of the box. - -To use the pipeline on UPPMAX, you **must** specificy `-profile uppmax` when running the pipeline (as of Nov 2017). - -Note that you will need to specify your UPPMAX project ID when running a pipeline. To do this, use the command line flag `--project `. The pipeline will exit with an error message if you try to run it pipeline with the UPPMAX config profile without a project. - -**Optional Extra:** To avoid having to specify your project every time you run Nextflow, you can add it to your personal Nextflow config file instead. Add this line to `~/.nextflow/config`: - -```groovy -params.project = 'project_ID' // eg. b2017123 -``` - -## Running offline - -If you are running the pipeline on Bianca or Irma, you will not have an active internet connection and some automated features will not be able to function. Specifically, you'll need to transfer the pipeline files and the singularity image manually. - -First, to generate the singularity image, run the following command. Note that you need singularity installed - this is available on the other UPPMAX clusters (Milou and Rackham): - -First, pull the image file where you have an internet connection: - -> NB: The "tag" at the end of this command corresponds to the pipeline version. -> Here, we're pulling the docker image for version 1.0.1 of the nfcore/rnafusion pipeline -> Make sure that this tag corresponds to the version of the pipeline that you're using - -```bash -singularity pull --name nfcore-rnafusion-1.0.1.img docker://nfcore/rnafusion:1.0.1 -pwd # Prints path to your singularity container -``` - -The nfcore/rnafusion pipeline files can be downloaded from [https://github.com/nf-core/rnafusion/releases](https://github.com/nf-core/rnafusion/releases). - -Download the pipeline files and transfer the compressed archive (the `.zip` -or `.tar.gz` file). Once transferred, extract the pipeline files. -For example, with a `.zip` file: - -```bash -unzip 1.0.1.zip -mv nfcore-rnafusion-1.0.1 nfcore-rnafusion # rename the folder -cd nfcore-rnafusion-1.0.1 -pwd # Prints full path to your pipeline -``` - -Finally, move to the directory where you want to run the pipeline -and execute Nextflow with the path to the pipeline, as so: - -```bash -cd /path/to/my/data/analysis -nextflow run /path/to/nfcore-rnafusion-1.0.1 -with-singularity /path/to/singularity/nfcore-rnafusion-1.0.1.img -``` - -(Note that you'll need the other common flags such as `--reads` and `--genome` in addition to this command). - -> NB: Note that you should _not_ use the `-r 1.0.1` flag recommended elsewhere. This tells Nextflow to download -> that version of the code when it runs. Here, you have already downloaded the code, so it generates an error. - -## Environment modules and development - -If you would prefer to use environment modules instead of singularity, you can use the old version of the configuration by specifying `-profile uppmax_modules` (we don't recommend this). - -For pipeline development work on `milou`, use `-profile uppmax_devel` - this uses the milou [devel partition](http://www.uppmax.uu.se/support/user-guides/slurm-user-guide/#tocjump_030509106905141747_8) for testing the pipeline quickly. Please note that this is _not_ suitable for proper analysis runs - only tiny test datasets. diff --git a/docs/images/BTB_logo.png b/docs/images/BTB_logo.png new file mode 100644 index 00000000..6a197b80 Binary files /dev/null and b/docs/images/BTB_logo.png differ diff --git a/docs/images/BTB_logo.svg b/docs/images/BTB_logo.svg new file mode 100644 index 00000000..099f1101 --- /dev/null +++ b/docs/images/BTB_logo.svg @@ -0,0 +1,184 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/docs/images/NGI_logo.png b/docs/images/NGI_logo.png new file mode 100644 index 00000000..3f4b769e Binary files /dev/null and b/docs/images/NGI_logo.png differ diff --git a/docs/images/NGI_logo.svg b/docs/images/NGI_logo.svg new file mode 100644 index 00000000..aef40fd8 --- /dev/null +++ b/docs/images/NGI_logo.svg @@ -0,0 +1,333 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/docs/images/SDU_logo.png b/docs/images/SDU_logo.png new file mode 100644 index 00000000..38e60b4b Binary files /dev/null and b/docs/images/SDU_logo.png differ diff --git a/docs/images/SciLifeLab_logo.png b/docs/images/SciLifeLab_logo.png new file mode 100644 index 00000000..bc4dbda6 Binary files /dev/null and b/docs/images/SciLifeLab_logo.png differ diff --git a/docs/images/SciLifeLab_logo.svg b/docs/images/SciLifeLab_logo.svg new file mode 100644 index 00000000..b8a44b79 --- /dev/null +++ b/docs/images/SciLifeLab_logo.svg @@ -0,0 +1,99 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/docs/images/ki-logo.png b/docs/images/ki-logo.png deleted file mode 100644 index 73a8079e..00000000 Binary files a/docs/images/ki-logo.png and /dev/null differ diff --git a/docs/images/nf-core-rnafusion_logo_dark.png b/docs/images/nf-core-rnafusion_logo_dark.png new file mode 100644 index 00000000..22fc3e9a Binary files /dev/null and b/docs/images/nf-core-rnafusion_logo_dark.png differ diff --git a/docs/images/nf-core-rnafusion_logo_light.png b/docs/images/nf-core-rnafusion_logo_light.png new file mode 100644 index 00000000..b2cf1761 Binary files /dev/null and b/docs/images/nf-core-rnafusion_logo_light.png differ diff --git a/docs/images/nf-core-rnafusion_metro_map.png b/docs/images/nf-core-rnafusion_metro_map.png new file mode 100644 index 00000000..76b6bc3f Binary files /dev/null and b/docs/images/nf-core-rnafusion_metro_map.png differ diff --git a/docs/images/nf-core-rnafusion_metro_map.svg b/docs/images/nf-core-rnafusion_metro_map.svg new file mode 100644 index 00000000..1cd7980f --- /dev/null +++ b/docs/images/nf-core-rnafusion_metro_map.svg @@ -0,0 +1,754 @@ + + + + + + + + + + + + + + + fastq + + + + + + + + + + + + + + + txt + + + + + + + + + + + + + + StringTie + Arriba + + + + + align + fastptrimming + + FusionCatcher + + + align + + + + + + + + + + + + + + STAR-Fusion + FastQC + MultiQC + FusionInspector + fusion-report + Picard:- CollectRnaSeqMetrics- CollectWgsMetrics- CollectInsertSizeMetrics + + + + + + + + + + + + + + + + + + fusioncatcher + starfusion + qc + + Workflows: + + + + arriba + stringtie + + Arribavisualisation + VCFcollect + + + + diff --git a/docs/images/ngi-logo.png b/docs/images/ngi-logo.png deleted file mode 100644 index 16bc56f1..00000000 Binary files a/docs/images/ngi-logo.png and /dev/null differ diff --git a/docs/images/rnafusion_logo.svg b/docs/images/rnafusion_logo.svg index ec35448f..fcd196c8 100644 --- a/docs/images/rnafusion_logo.svg +++ b/docs/images/rnafusion_logo.svg @@ -9,13 +9,13 @@ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" enable-background="new 0 0 1150.9 517" version="1.1" - viewBox="0 0 1456.7841 522.44342" + viewBox="0 0 1139.218 514.66212" xml:space="preserve" id="svg2" - inkscape:version="0.91 r13725" - sodipodi:docname="EmptyName_logo.svg" - width="1456.7842" - height="522.44342">nf-nf- + core/ + x="357.14139" + y="241.24541" + font-size="209.87px" + font-weight="bold" + id="text69" + style="font-weight:bold;font-size:209.86999512px;line-height:0%;font-family:'Maven Pro'">core/ + rnafusion + x="-260.05042" + y="457.04541" + font-weight="bold" + id="text59" + style="font-weight:bold;line-height:0%;font-family:'Maven Pro'">rnafusion + \ No newline at end of file + d="m 300.43725,166.1155 -21.53224,21.61638 h 61.0915 V 166.1155 Z" + id="path67" + inkscape:connector-curvature="0" + style="fill:url(#f)" /> \ No newline at end of file diff --git a/docs/images/sdu-logo.png b/docs/images/sdu-logo.png deleted file mode 100644 index 9a5f658f..00000000 Binary files a/docs/images/sdu-logo.png and /dev/null differ diff --git a/docs/installation.md b/docs/installation.md deleted file mode 100644 index 43aa2d4b..00000000 --- a/docs/installation.md +++ /dev/null @@ -1,118 +0,0 @@ -# nf-core/rnafusion: Installation - -To start using the nf-core/rnafusion pipeline, follow the steps below: - -1. [Install Nextflow](#1-install-nextflow) -2. [Install the pipeline](#2-install-the-pipeline) - * [Automatic](#21-automatic) - * [Offline](#22-offline) - * [Development](#23-development) -3. [Pipeline configuration](#3-pipeline-configuration) - * [Software deps: Docker and Singularity](#31-software-deps-docker-and-singularity) - * [Software deps: Bioconda](#32-software-deps-bioconda) - * [Configuration profiles](#33-configuration-profiles) -4. [Reference genomes](#4-reference-genomes) -5. [Appendices](#appendices) - * [Running on UPPMAX](#running-on-uppmax) - -## 1) Install NextFlow - -Nextflow runs on most POSIX systems (Linux, Mac OSX etc). It can be installed by running the following commands: - -```bash -# Make sure that Java v8+ is installed: -java -version - -# Install Nextflow -curl -fsSL get.nextflow.io | bash - -# Add Nextflow binary to your PATH: -mv nextflow ~/bin/ -# OR system-wide installation: -# sudo mv nextflow /usr/local/bin -``` - -See [nextflow.io](https://www.nextflow.io/) for further instructions on how to install and configure Nextflow. - -## 2) Install the pipeline - -### 2.1) Automatic - -This pipeline itself needs no installation - NextFlow will automatically fetch it from GitHub if `nf-core/rnafusion` is specified as the pipeline name. - -### 2.2) Offline - -The above method requires an internet connection so that Nextflow can download the pipeline files. If you're running on a system that has no internet connection, you'll need to download and transfer the pipeline files manually: - -```bash -wget https://github.com/nf-core/rnafusion/archive/master.zip -mkdir -p ~/my-pipelines/nf-core/ -unzip master.zip -d ~/my-pipelines/nf-core/ -cd ~/my_data/ -nextflow run ~/my-pipelines/nf-core/rnafusion-master -``` - -To stop nextflow from looking for updates online, you can tell it to run in offline mode by specifying the following environment variable in your ~/.bashrc file: - -```bash -export NXF_OFFLINE='TRUE' -``` - -### 2.3) Development - -If you would like to make changes to the pipeline, it's best to make a fork on GitHub and then clone the files. Once cloned you can run the pipeline directly as above. - -## 3) Pipeline configuration - -By default, the pipeline runs with the `standard` configuration profile. This uses a number of sensible defaults for process requirements and is suitable for running on a simple (if powerful!) basic server. You can see this configuration in [`conf/base.config`](../conf/base.config). - -Be warned of two important points about this default configuration: - -1. The default profile uses the `local` executor - * All jobs are run in the login session. If you're using a simple server, this may be fine. If you're using a compute cluster, this is bad as all jobs will run on the head node. - * See the [nextflow docs](https://www.nextflow.io/docs/latest/executor.html) for information about running with other hardware backends. Most job scheduler systems are natively supported. -2. Nextflow will expect all software to be installed and available on the `PATH` - -### 3.1) Software deps: Docker - -First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/) - -Then, running the pipeline with the option `-profile docker` tells Nextflow to enable Docker for this run. A set of images containing the software requirements will be automatically fetched and used from DockerHub ([https://hub.docker.com/r/nfcore/rnafusion](https://hub.docker.com/r/nfcore/rnafusion)). - -### 3.1) Software deps: Singularity - -If you're not able to use Docker then [Singularity](http://singularity.lbl.gov/) is a great alternative. -The process is very similar: running the pipeline with the option `-profile singularity` tells Nextflow to enable singularity for this run. A set of images containing all of the software requirements will be automatically fetched and used from DockerHub. - -If running offline with Singularity, you'll need to download and transfer the Singularity images first. You can use included nextflow `download-singularity-img.nf` script: - -```bash -nextflow run nf-core/rnafusion/download-singularity-img.nf --all --outdir -``` - -For additional optional parameters run: - -```bash -nextflow run nf-core/rnafusion/download-singularity-img.nf --help -``` - -### 3.2) Software deps: conda - -If you're not able to use Docker _or_ Singularity, you can instead use conda to manage the software requirements. -This is slower and less reproducible than the above, but is still better than having to install all requirements yourself! -The pipeline ships with a conda environment file and nextflow has built-in support for this. -To use it first ensure that you have conda installed (we recommend [miniconda](https://conda.io/miniconda.html)), then follow the same pattern as above and use the flag `-profile conda` - -## Appendices - -### Running on UPPMAX - -To run the pipeline on the [Swedish UPPMAX](https://www.uppmax.uu.se/) clusters (`rackham`, `irma`, `bianca` etc), use the command line flag `-profile uppmax`. This tells Nextflow to submit jobs using the SLURM job executor with Singularity for software dependencies. - -Note that you will need to specify your UPPMAX project ID when running a pipeline. To do this, use the command line flag `--project `. The pipeline will exit with an error message if you try to run it pipeline with the default UPPMAX config profile without a project. - -**Optional Extra:** To avoid having to specify your project every time you run Nextflow, you can add it to your personal Nextflow config file instead. Add this line to `~/.nextflow/config`: - -```nextflow -params.project = 'project_ID' // eg. b2017123 -``` diff --git a/docs/output.md b/docs/output.md index 40b2bde5..dd370575 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,241 +1,482 @@ # nf-core/rnafusion: Output -This document describes the output produced by the pipeline. - -## Pipeline overview - -The pipeline is built using [Nextflow](https://www.nextflow.io/) -and processes data using the following steps: - -* [FastQC](#fastqc) - read quality control -* [Star-Fusion](#star-fusion) -* [FusionCatcher](#fusioncatcher) -* [EricScript](#ericscript) -* [Pizzly](#pizzly) -* [Squid](#squid) -* [FusionInspector](#fusion-inspector) -* [fusion-report](#fusion-report) -* [MultiQC](#multiqc) - aggregate report, describing results of the whole pipeline - -## FastQC - -[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%T/A/G/C). You get information about adapter contamination and other overrepresented sequences. - -For further reading and documentation see the [FastQC help](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). - -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. To see how your reads look after trimming, look at the FastQC reports in the `trim_galore` directory. - -**Output directory: `results/fastqc`** - -* `sample_fastqc.html` - * FastQC report, containing quality metrics for your untrimmed raw fastq files -* `zips/sample_fastqc.zip` - * zip file containing the FastQC report, tab-delimited data file and plot images - -## Star-Fusion - -**Output directory: `results/tools/StarFusion`** - -* `star-fusion.fusion_predictions.tsv` - * contains all the predicted gene fusions - -|  Column | Description | -| ------- | ----------- | -| JunctionReadCount | Indicates the number of RNA-Seq fragments containing a read that aligns as a split read at the site of the putative fusion junction. | -| SpanningFragCount | Indicates the number of RNA-Seq fragments that encompass the fusion junction such that one read of the pair aligns to a different gene than the other paired-end read of that fragment. | -| SpliceType | Indicates whether the proposed breakpoint occurs at reference exon junctions as provided by the reference transcript structure annotations (ex. gencode). -| LeftGene -| LeftBreakpoint -| RightGene -| RightBreakpoint -| LargeAnchorSupport | column indicates whether there are split reads that provide 'long' (set to length of 25 bases) alignments on both sides of the putative breakpoint. | -| FFPM | fusion fragments per million total reads; **Default:** *0.1 (meaning at least 1 fusion-supporting rna-seq fragment per 10M total reads)*; **TL;DR:** can be adjusted by changing `--min_FFPM` -| LeftBreakDinuc | | -| LeftBreakEntropy | Represents Shannon entropy | -| RightBreakDinuc | -| RightBreakEntropy | Represents Shannon entropy | -| annots | Annotation generated by [FusionAnnotar](https://github.com/FusionAnnotator/FusionAnnotator/wiki) | - -For more info check the [documentation](https://github.com/STAR-Fusion/STAR-Fusion/wiki#Outputs). - -## Fusioncatcher - -**Output directory: `results/tools/Fusioncatcher`** - -* `final-list_candidate-fusion-genes.txt` - * contains all the predicted gene fusions - -|  Column | Description | -| ------- | ----------- | -| **Gene\_1\_symbol(5end\_fusion\_partner)** | Gene symbol of the 5' end fusion partner | -| **Gene\_2\_symbol\_2(3end\_fusion\_partner)** | Gene symbol of the 3' end fusion partner | -| **Gene\_1\_id(5end\_fusion\_partner)** | Ensembl gene id of the 5' end fusion partner | -| **Gene\_2\_id(3end\_fusion\_partner)** | Ensembl gene id of the 3' end fusion partner | -| **Exon\_1\_id(5end\_fusion\_partner)** | Ensembl exon id of the 5' end fusion exon-exon junction | -| **Exon\_2\_id(3end\_fusion\_partner)** | Ensembl exon id of the 3' end fusion exon-exon junction | -| **Fusion\_point\_for\_gene\_1(5end\_fusion\_partner)** | Chromosomal position of the 5' end of fusion junction (chromosome:position:strand); 1-based coordinate | -| **Fusion\_point\_for\_gene\_2(3end\_fusion\_partner)** | Chromosomal position of the 3' end of fusion junction (chromosome:position:strand); 1-based coordinate | -| **Spanning\_pairs** | Count of pairs of reads supporting the fusion (**including** also the multimapping reads) | -| **Spanning\_unique\_reads** | Count of unique reads (i.e. unique mapping positions) mapping on the fusion junction. Shortly, here are counted all the reads which map on fusion junction minus the PCR duplicated reads. | -| **Longest\_anchor\_found** | Longest anchor (hangover) found among the unique reads mapping on the fusion junction | -| **Fusion\_finding\_method** | Aligning method used for mapping the reads and finding the fusion genes. Here are two methods used which are: (i) **BOWTIE** = only Bowtie aligner is used for mapping the reads on the genome and exon-exon fusion junctions, (ii) **BOWTIE+BLAT** = Bowtie aligner is used for mapping reads on the genome and BLAT is used for mapping reads for finding the fusion junction, (iii) **BOWTIE+STAR** = Bowtie aligner is used for mapping reads on the genome and STAR is used for mapping reads for finding the fusion junction, (iv) **BOWTIE+BOWTIE2** = Bowtie aligner is used for mapping reads on the genome and Bowtie2 is used for mapping reads for finding the fusion junction. | -| **Fusion\_sequence** | The inferred fusion junction (the asterisk sign marks the junction point) | -| **Fusion\_description** | Type of the fusion gene (see the Table 2) | -| **Counts\_of\_common\_mapping\_reads** | Count of reads mapping simultaneously on both genes which form the fusion gene. This is an indication how similar are the DNA/RNA sequences of the genes forming the fusion gene (i.e. what is their homology because highly homologous genes tend to appear show as candidate fusion genes). In case of completely different sequences of the genes involved in forming a fusion gene then here it is expected to have the value zero. | -| **Predicted\_effect** | Predicted effect of the candidate fusion gene using the annotation from Ensembl database. This is shown in format **effect\_gene\_1**/**effect\_gene\_2**, where the possible values for effect\_gene\_1 or effect\_gene\_2 are: **intergenic**, **intronic**, **exonic(no-known-CDS)**, **UTR**, **CDS(not-reliable-start-or-end)**, **CDS(truncated)**, or **CDS(complete)**. In case that the fusion junction for both genes is within their CDS (coding sequence) then only the values **in-frame** or **out-of-frame** will be shown. | -| **Predicted\_fused\_transcripts** | All possible known fused transcripts in format ENSEMBL-TRANSCRIPT-1:POSITION-1/ENSEMBLE-TRANSCRIPT-B:POSITION-2, where are fused the sequence 1:POSITION-1 of transcript ENSEMBL-TRANSCRIPT-1 with sequence POSITION-2:END of transcript ENSEMBL-TRANSCRIPT-2 | -| **Predicted\_fused\_proteins** | Predicted amino acid sequences of all possible fused proteins (separated by ";"). | - -For more info check the [documentation](https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md#62---output-data-output-data). - -## EricScript - -**Output directory: `results/tools/Ericscript/tmp`** - -* `fusions.results.filtered.tsv` - * contains all the predicted gene fusions - -|  Column | Description | -| ------- | ----------- | -| GeneName1 | official gene name of 5' gene. | -| GeneName2 | official gene name of 3' gene. | -| chr1 | chromosome of 5' gene. | -| Breakpoint1 | predicted breakpoint on 5' gene. | -| strand1 | strand (-/+) of 5' gene. | -| chr2 | chromosome of 3' gene. | -| Breakpoint2 | predicted breakpoint on 3' gene. | -| strand2 | strand (-/+) of 3' gene. | -| EnsemblGene1 | Ensembl gene ID of 5' gene. | -| EnsemblGene2 | Ensembl gene ID of 3' gene. | -| crossingreads | the number of paired end discordant reads. | -| spanningreads | the number of paired end reads spanning the junction. | -| mean.insertsize | mean of insert sizes of crossing + spanning reads. | -| homology | if filled, all the homologies between the fusion junction and Ensembl genes. | -| fusiontype | intra-chromosomal, inter-chromosomal, read-through or CIS. | -| InfoGene1 | gene information about 5' gene. | -| InfoGene2 | gene information about 3' gene. | -| JunctionSequence | predicted junction fusion sequence. | -| GeneExpr1 | Read count based estimation of the expression level of 5' gene. | -| GeneExpr2 | Read count based estimation of the expression level of 3' gene. | -| GeneExpr_fused | Read count based estimation of the expression level of the predicted chimeric transcript. | -| ES | Edge score. | -| GJS | Genuine Junction score. | -| US | Uniformity score. | -| EricScore | EricScore score (adaboost classifier). | - -For more info check the [documentation](https://sites.google.com/site/bioericscript/getting-started). - -## Pizzly - -**Output directory: `results/tools/Pizzly`** - -* `pizzly_fusions.json` - * contains all the predicted gene fusions - -|  Column | Description | -| ------- | ----------- | -| geneA | `id`: reference id and `name`: gene name | -| geneB | Describes reference id and gene name | -| paircount | Number of paired count | -| splitcount | Number of split count | -| transcripts | List of all transcripts `fasta_record`, `transcriptA`, `transcriptB`, `support`, `reads` | -| readpairs | List of read pairs containing (`type`, `read1`, `read2`) | - -For more info check the [documentation](https://github.com/pmelsted/pizzly#output). - -## Squid - -**Output directory: `results/tools/Squid`** - -* `fusions_annotated.txt` - * contains all the predicted gene fusions - -|  Column | Description | -| ------- | ----------- | -| chr1 | chromosome name of the first breakpoint. -| start1 | starting position of the segment of the first breakpoint, or the predicted breakpoint position if strand1 is "-" | -| end1 | ending position of the segment of the first breakpoint, or the predicted breakpoint position if strand1 is "+" | -| chr2 | chromosome name of the second breakpoint | -| start2 | starting position of the segment of the second breakpoint, or the predicted breakpoint position if strand2 is "-" | -| end2 | ending position of the segment of the second breakpoint, or the predicted breakpoint position if strand2 is "+" | -| name | TSV is not named yet, this column shows with dot. -| score | number of reads supporting this TSV (without weighted by Discordant edge ratio multiplier) | -| strand1 | strand of the first segment in TSV. -| strand2 | strand of the second segment in TSV. -| num_concordantfrag_bp1 | number of concordant paired-end reads covering the first breakpoint. For a concordant paired-end read, it includes two ends and a inserted region in between, if any of the 3 regions covers the breakpoint, the read is counted in this number | -| num_concordantfrag_bp2 | number of concordant paired-end reads covering the second breakpoint. The count is defined in the same way as num_concordantfrag_bp1 | - -For more info check the [documentation](https://github.com/Kingsford-Group/squid#output-specification). - -## Fusion Inspector - -**Output directory: `results/tools/FusionInspector`** - -* `finspector.fa` - * the candidate fusion-gene contigs (if you copy things elsewhere, make sure to also copy the index file: `finspector.fa.fai`) -* `finspector.bed` - * the reference gene structure annotations for fusion partners -* `finspector.junction_reads.bam` - * alignments of the breakpoint-junction supporting reads. -* `finspector.spanning_reads.bam` - * alignments of the breakpoint-spanning paired-end reads. - -To visualize fusion genes in [IGV tool](https://software.broadinstitute.org/software/igv/igvtools) first create a genome `Menu->Genomes->Create .genome File`, choose name and description, then choose the following files: +## Introduction + +This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. + +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. + +## Pipeline overview + +The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: + +- [Download and build references](#references) - Build references needed to run the rest of the pipeline +- [STAR](#star) - Alignment for arriba, and STAR-fusion +- [Cat](#cat) - Concatenate fastq files per sample ID +- [Arriba](#arriba) - Arriba fusion detection +- [STAR-fusion](#starfusion) - STAR-fusion fusion detection +- [StringTie](#stringtie) - StringTie assembly +- [FusionCatcher](#fusioncatcher) - Fusion catcher fusion detection +- [CTAT-SPLICING](#ctat-splicing) - Detection and annotation of cancer splicing aberrations +- [Samtools](#samtools) - SAM/BAM file manipulation +- [Fusion-report](#fusion-report) - Summary of the findings of each tool and comparison to COSMIC, Mitelman, and FusionGDB2 databases +- [FusionInspector](#fusionInspector) - Supervised analysis of fusion predictions from fusion-report, recover and re-score evidence for such predictions +- [Arriba visualisation](#arriba-visualisation) - Arriba visualisation report for FusionInspector fusions +- [Picard](#picard) - Collect QC metrics +- [FastQC](#fastqc) - Raw read quality control +- [Salmon](#salmon) - Normalized gene expression calculation +- [MultiQC](#multiqc) - Aggregate reports describing QC results from the whole pipeline +- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution + +## Download and build references + +
+Output reference files and folder structure + +### References directory structure -* `finspector.fa` - * make sure the index file finspector.fa.fai is in the same folder -* `finspector.gtf` - * use this for 'Genes' -* `cytoBand.txt` - * use this for 'optional Cytoband' +- `references/` + - `arriba` + - `blacklist_hg38_GRCh38_v2.1.0.tsv.gz` + - `protein_domains_hg38_GRCh38_v2.1.0.gff3` + - `cytobands_hg38_GRCh38_v2.1.0.tsv` + - `ensembl` + - `Homo_sapiens.GRCh38.{ensembl_version}.all.fa` + - `Homo_sapiens.GRCh38.{ensembl_version}.cdna.all.fa.gz` + - `Homo_sapiens.GRCh38.{ensembl_version}.gtf` + - `Homo_sapiens.GRCh38.{ensembl_version}.chr.gtf` + - `Homo_sapiens.GRCh38.{ensembl_version}.chr.gtf.refflat` + - `Homo_sapiens.GRCh38.{ensembl_version}.interval_list` + - `fusioncatcher` + - `human_v` - dir with all references for fusioncatcher + - `fusion_report_db` + - `cosmic.db` + - `fusiongdb2.db` + - `mitelman.db` + - `star` - dir with STAR index + - `starfusion` + - files and dirs used to build the index + - `ctat_genome_lib_build_dir` - dir containing the index -Add the bam files by choosing `File->Load from File` and make sure to select your generated mini genome in the upper-left corner. -For more info and help check [wiki page](https://github.com/FusionInspector/FusionInspector/wiki). - -## Summary report - -**Output directory: `results/Report-`** - -* `fusions.json` - * contains all main information about found fusions (fusion name, score, explanation of the score calculation, cherry picked output from fusion tools) -* `index.html` - * main dashboard containing the list of all detected fusions -* `*.html` - * each fusion gets a custom page with fetched data from the local database -* `fusions_list_filtered.txt` - * filtered list of found fusions (uses tool cutoff as filter, by default: 2, can be adjusted by adding `-t ` when running the tool) -* `fusions_list.txt` - * unfiltered list of found fusions - -### Tool detection +(Only files or folders used by the pipeline are mentioned explicitly.) -Graphs displaying ratio of fusion genes caught by different tools. The last part *all tools* is an intersection of all tools. +
-![Tool detection](images/summary_graph_1.png) +## Main pipeline workflow -### Found in database +> If no argument is specified here, the tool was used with default parameters. -Displays how many fusions were found in a downloaded databases of the summary report. +### Directory structure -![Known/unknown fusions](images/summary_graph_2.png) +```text +{outdir} +├── arriba +├── arriba_visualisation +├── cram_arriba +├── cram_starfusion +├── fastp +├── fastqc +├── fusioncatcher +├── fusioninspector +├── fusionreport +├── kallisto_quant +├── megafusion +├── multiqc +├── picard +├── pipeline_info +├── samtools_sort_for_arriba +├── star_for_arriba +├── star_for_starfusion +├── starfusion +└── work +.nextflow.log +``` -### Tool detection distribution +If no parameters are specified, the default is applied. -For each fusion a sum of detected tools is calculated. This counts are then visualized in the graph below. +### Arriba -![Known/unknown fusions](images/summary_graph_3.png) +[Arriba](https://arriba.readthedocs.io/en/latest/) is used for i) detect gene fusions and ii) create a PDF report for the fusions found (visualisation): -## MultiQC +#### Detection -[MultiQC](http://multiqc.info) is a visualisation tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in within the report data directory. +
+Output files -The pipeline has special steps which allow the software versions used to be reported in the MultiQC output for future traceability. +- `arriba/` + - `.arriba.fusions.tsv` - contains the identified fusions + - `.arriba.fusions.discarded.tsv` -**Output directory: `results/multiqc`** +
-* `Project_multiqc_report.html` - * MultiQC report - a standalone HTML file that can be viewed in your web browser -* `Project_multiqc_data/` - * Directory containing parsed statistics from the different tools used in the pipeline +#### Visualisation + +
+Output files + +- `arriba_visualisation/` + - `_combined_fusions_arriba_visualisation.pdf` + +
-For more information about how to use MultiQC reports, see [http://multiqc.info](http://multiqc.info) +The visualisation displays the fusions that fusioninspector outputs. That means that fusions from all callers are aggregated (by fusion-report) and then analyzed through fusioninspector (Note: Fusioninspecor contains a filtering step!). + +### Cat + +
+Output files + +- `cat/` + - `_1.merged.fastq.gz` + - `_2.merged.fastq.gz` + +
+ +If multiple libraries or runs have been provided for the same sample in the input samplesheet (e.g. to increase sequencing depth) then these will be merged at the very beginning of the pipeline in order to have consistent sample naming throughout the pipeline. Please refer to the [usage](https://nf-co.re/rnafusion/usage#samplesheet-input) documentation to see how to specify these samples in the input samplesheet. + +### Fastp + +If `--trim_fastp` is selected, [fastp](https://github.com/OpenGene/fastp) will filter low quality reads as well as bases at the 5' and 3' ends, trim adapters (automatically detected, but input with parameter `--adapter_fasta` is possible). 3' trimming is also possible via parameter `--trim_tail`. + +
+Output files + +- `fastp/` + - `_1.fastp.fastq.gz` + - `_2.fastp.fastq.gz` + - `.fastp.html` + - `.fastp.json` + - `.fastp.log` + +
+ +### FastQC + +
+Output files + +- `fastqc/` + - `*_fastqc.html`: FastQC report containing quality metrics. + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +
+ +[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). + +![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) + +![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) + +![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) + +:::note +The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +::: + +### FusionCatcher + +
+Output files + +- `fusioncatcher` + - `.fusioncatcher.fusion-genes.txt` + - `.fusioncatcher.summary.txt` + - `.fusioncatcher.log` + +
+ +[FusionCatcher](https://github.com/ndaniel/fusioncatcher) searches for novel/known somatic fusion genes translocations, and chimeras in RNA-seq data. Possibility to use parameter `--fusioncatcher_limitSjdbInsertNsj` to modify limitSjdbInsertNsj. + +### CTAT-SPLICING + +
+Output files + +- `ctatsplicing` + - `arriba` + - `.cancer_intron_reads.sorted.bam` + - `.cancer_intron_reads.sorted.bam.bai` + - `.cancer.introns` + - `.cancer.introns.prelim` + - `.chckpts` + - `.ctat-splicing.igv.html` + - `.gene_reads.sorted.sifted.bam` + - `.gene_reads.sorted.sifted.bam.bai` + - `.igv.tracks` + - `.introns` + - `.introns.for_IGV.bed` + - `starfusion` + - `.cancer_intron_reads.sorted.bam` + - `.cancer_intron_reads.sorted.bam.bai` + - `.cancer.introns` + - `.cancer.introns.prelim` + - `.chckpts` + - `.ctat-splicing.igv.html` + - `.gene_reads.sorted.sifted.bam` + - `.gene_reads.sorted.sifted.bam.bai` + - `.igv.tracks` + - `.introns` + - `.introns.for_IGV.bed` + +
+ +[CTAT-SPLICING](https://github.com/TrinityCTAT/CTAT-SPLICING/wiki) detects and annotates of aberrant splicing isoforms in cancer. This is run on the input files for `arriba` and/or `starfusion`. + +### FusionInspector + +
+Output files + +- `fusioninspector` + - `.fusion_inspector_web.html` - visualisation report described in details [here](https://github.com/FusionInspector/FusionInspector/wiki/FusionInspector-Visualizations) + - `FusionInspector.log` + - `.FusionInspector.fusions.abridged.tsv` + +
+ +[FusionInspector](https://github.com/FusionInspector/FusionInspector/tree/master) performs a validation of fusion transcript predictions. Possibility to use `--fusioninspector_limitSjdbInsertNsj` to set limitSjdbInsertNsj to anything other than the default 1000000. + +### Fusion-report + +Please note that fusion-report is executed from fork https://github.com/Clinical-Genomics/fusion-report + +
+Output files + +- `fusionreport` + - + - `.fusionreport.tsv` + - `.fusionreport_filtered.tsv` + - `_fusionreport_index.html` - general report for all filtered fusions + - `.fusions.csv` - index in csv format + - `_.html` - specific report for each filtered fusion + +
+ +[Fusion-report](https://github.com/matq007/fusion-report) is a tool for parsing outputs from fusion detection tools. +The score is explained here: . Summary: + +The weights for databases are as follows: + +- COSMIC (50) +- MITELMAN (50) +- FusionGDB2 (0) + +The final formula for calculating score is: + +$$ +score = 0.5 * \sum_{tool}^{tools} f(fusion, tool)*w(tool) + 0.5 * \sum_{db}^{dbs} g(fusion, db)*w(db) +$$ + +All tools have the same weight. + +### Salmon + +
+Output files + +- `salmon` + - `` + +
+ +Folder containing the quantification results + +### Kallisto + +
+Output files + +- `kallisto` + - `.kallisto_quant.fusions.txt` + +
+ +Quantifying abundances of transcripts from bulk and single-cell RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads. + +### Vcf_collect + +
+Output files + +- `vcf_collect` + - `_fusion_data.vcf` - contains the fusions in vcf format with collected statistics. + +Vcf-collect takes as input the results of fusion-report and fusioninspector. That means fusions from all tools are aggregated. Fusioninspector applies a filter so it is possible some fusions detected by a caller are not filtered out by fusioninspector. In those cases, vcf-collect will display the fusions, but a lot of data will be missing as fusioninspector performs the analysis for each fusion. + +
+ +[Megafusion](https://github.com/J35P312/MegaFusion) converts RNA fusion files to SV VCF and collects statistics and metrics in a VCF file. + +### MultiQC + +
+Output files + +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + +
+ +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. + +Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . + +### Picard + +
+Output files + +Picard CollectRnaMetrics and picard MarkDuplicates share the same output directory. + +- `picard` + - `.MarkDuplicates.metrics.txt` - metrics from MarkDuplicates + - `_rna_metrics.txt` - metrics from CollectRnaMetrics + - `_insert_size_metrics.txt.txt` - metrics from CollectInsertSizeMetrics + - `.bam` - BAM file with marked duplicates + +
+ +### Samtools + +#### Samtools sort + +Samtools sort is used to sort BAM files from STAR_FOR_STARFUSION (for arriba visualisation) + +
+Output files + +- `samtools_sort_for_` + - `(_chimeric)_sorted.bam` - sorted BAM file + +
+ +#### Samtools index + +Samtools index is used to index BAM files from STAR_FOR_ARRIBA (for arriba visualisation) and STAR_FOR_STARFUSION (for QC) + +
+Output files + +- `samtools_for_` + - `.(Aligned.sortedByCoord).out.bam.bai` - + +
+ +### STAR + +STAR is used to align to genome reference + +STAR is run for 3 tools: + +For `arriba` with the parameters: + +```bash +--readFilesCommand zcat \ +--outSAMtype BAM Unsorted \ +--outSAMunmapped Within \ +--outBAMcompression 0 \ +--outFilterMultimapNmax 50 \ +--peOverlapNbasesMin 10 \ +--alignSplicedMateMapLminOverLmate 0.5 \ +--alignSJstitchMismatchNmax 5 -1 5 5 \ +--chimSegmentMin 10 \ +--chimOutType WithinBAM HardClip \ +--chimJunctionOverhangMin 10 \ +--chimScoreDropMax 30 \ +--chimScoreJunctionNonGTAG 0 \ +--chimScoreSeparation 1 \ +--chimSegmentReadGapMax 3 \ +--chimMultimapNmax 50 +``` + +For `STAR-fusion` with the parameters: + +```bash +--twopassMode Basic \ +--outReadsUnmapped None \ +--readFilesCommand zcat \ +--outSAMstrandField intronMotif \ +--outSAMunmapped Within \ +--chimSegmentMin 12 \ +--chimJunctionOverhangMin 8 \ +--chimOutJunctionFormat 1 \ +--alignSJDBoverhangMin 10 \ +--alignMatesGapMax 100000 \ +--alignIntronMax 100000 \ +--alignSJstitchMismatchNmax 5 -1 5 5 \ +--chimMultimapScoreRange 3 \ +--chimScoreJunctionNonGTAG -4 \ +--chimMultimapNmax 20 \ +--chimNonchimScoreDropMin 10 \ +--peOverlapNbasesMin 12 \ +--peOverlapMMp 0.1 \ +--alignInsertionFlush Right \ +--alignSplicedMateMapLminOverLmate 0 \ +--alignSplicedMateMapLmin 30 \ +--chimOutType Junctions \ +--quantMode GeneCounts +``` + +> STAR_FOR_STARFUSION uses `${params.ensembl}/Homo_sapiens.GRCh38.${params.ensembl_version}.chr.gtf` whereas STAR_FOR_ARRIBA uses `${params.ensembl_ref}/Homo_sapiens.GRCh38.${params.ensembl_version}.gtf` + +
+Output files + +**Common** + +- `star_for_` +- `.Log.final.out` +- `.Log.progress.out` +- `.SJ.out.tab` + +**For arriba:** + +- `.Aligned.out.bam` + + **For starfusion:** + +- `.Aligned.sortedByCoord.out.bam` +- `.Chimeric.out.junction` +- `.ReadsPerGene.out.tab` + +
+ +The STAR index is generated with `--sjdbOverhang ${params.read_length - 1}`, params.read_length default is 100. + +### STAR-fusion + +
+Output files + +- `starfusion` + - `.starfusion.fusion_predictions.tsv` - contains the identified fusions + - `.starfusion.abridged.tsv` - contains the identified fusions abridged + - `starfusion.abridged.coding_effect.tsv` + +
+ +### StringTie + +
+Output files + +- `stringtie//stringtie.merged.gtf` - merged gtf from annotation and stringtie output gtfs +
+ +### Pipeline information + +
+Output files + +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. + +
+ +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/references.md b/docs/references.md deleted file mode 100644 index a6e0bffc..00000000 --- a/docs/references.md +++ /dev/null @@ -1,154 +0,0 @@ -# nfcore/rnafusion: Download references for tools - -## 1. Using nextflow helper script - -Downloading references manually is a tedious long process. To make the pipeline easier to work with, we provide a script to download all necessary references for fusion detection tools. - -> **TL;DR:** Make sure to download the correct references for your need! - -```bash -# Example how to download references -# Note: STAR-Fusion has two versions of reference: -# * NCBI -# * Ensembl (generated by the script) -nextflow run nf-core/rnafusion/download-references.nf - -profile - --star_fusion - --fusioncatcher - --ericscript - --pizzly - --fusion_report --cosmic_usr --cosmic_passwd - --outdir -``` - -For additional optional parameters run: - -```bash -nextflow run nf-core/rnafusion/download-references.nf --help -``` - -## 2. Manual download - -### STAR-Fusion (NCBI) - -```bash -wget -N https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh38_v27_CTAT_lib_Feb092018.plug-n-play.tar.gz -O GRCh38_v27_CTAT_lib_Feb092018.plug-n-play.tar.gz -tar -xvzf GRCh38_v27_CTAT_lib_Feb092018.plug-n-play.tar.gz -``` - -> Update your custom configuration file to include the directory - -```groovy -params { - star_fusion_ref = "/path/to/GRCh38_v27_CTAT_lib_Feb092018/ctat_genome_lib_build_dir" -} -``` - -### STAR-Fusion (Custom Ensembl example) - -```bash -# download all chromosomes from ensembl -wget ftp://ftp.ensembl.org/pub/release-77/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.chromosome.{1..22}.fa.gz -wget ftp://ftp.ensembl.org/pub/release-77/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.chromosome.{MT,X,Y}.fa.gz -gunzip -c Homo_sapiens.GRCh38.dna.chromosome.* > Homo_sapiens.GRCh38_r77.all.fa - -# download fasta file -wget ftp://ftp.ensembl.org/pub/release-77/gtf/homo_sapiens/Homo_sapiens.GRCh38.77.chr.gtf.gz - -# download -wget ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.hmm.gz -gunzip Pfam-A.hmm.gz -hmmpress Pfam-A.hmm - -prep_genome_lib.pl - --genome_fa Homo_sapiens.GRCh38_r77.all.fa - --gtf Homo_sapiens.GRCh38.77.chr.gtf - --pfam_db Pfam-A.hmm - --CPU 10 -``` - -> Update your custom configuration file to include the directory - -```groovy -params { - star_fusion_ref = "/path/to/ctat_genome_lib_build_dir" -} -``` - -## Fusioncatcher - -```bash -wget -N http://sourceforge.net/projects/fusioncatcher/files/data/human_v90.tar.gz.aa -wget -N http://sourceforge.net/projects/fusioncatcher/files/data/human_v90.tar.gz.ab -wget -N http://sourceforge.net/projects/fusioncatcher/files/data/human_v90.tar.gz.ac -wget -N http://sourceforge.net/projects/fusioncatcher/files/data/human_v90.tar.gz.ad -cat human_v90.tar.gz.* | tar xz -``` - -> Update your custom configuration file to include the directory - -```groovy -params { - fusioncatcher_ref = '/path/to/human_v90' -} -``` - -## Ericscript - -```bash -wget -N https://raw.githubusercontent.com/circulosmeos/gdown.pl/dfd6dc910a38a42d550397bb5c2335be2c4bcf54/gdown.pl \ -&& chmod +x gdown.pl \ -&& ./gdown.pl "https://drive.google.com/uc?export=download&confirm=qgOc&id=0B9s__vuJPvIiUGt1SnFMZFg4TlE" ericscript_db_homosapiens_ensembl84.tar.bz2 \ -&& rm gdown.pl -``` - -> Update your custom configuration file to include the directory - -```groovy -params { - ericscript_ref = '/path/to/ericscript_db_homosapiens_ensembl84' -} -``` - -## Pizzly - -```bash -# transcriptome -wget -N ftp://ftp.ensembl.org/pub/release-94/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz \ - -# annotation -wget -N ftp://ftp.ensembl.org/pub/release-94/gtf/homo_sapiens/Homo_sapiens.GRCh38.94.gtf.gz && gunzip Homo_sapiens.GRCh38.94.gtf.gz -``` - -> Update your custom configuration file to include the directory - -```groovy -params { - pizzly_fasta = "/path/to/pizzly_ref/Homo_sapiens.GRCh38.cdna.all.fa.gz" - pizzly_gtf = "/path/to/pizzly_ref/Homo_sapiens.GRCh38.94.gtf" -} -``` - -## Squid - -Requires: - -* STAR-Index (is either provided by the user or built by the pipeline) -* GTF file - -## AWS iGenomes - -```bash -mkdir -p igenomes/Homo_sapiens/NCBI/GRCh38/ -aws s3 --no-sign-request --region eu-west-1 sync s3://ngi-igenomes/igenomes/Homo_sapiens/NCBI/GRCh38/ . -``` - -## FusionInspector - -> Uses reference genome from STAR-Fusion (ctat_genome_lib_build_dir) - -## fusion-report - -```bash -fusion_report download --cosmic_usr --cosmic_passwd /output/databases -``` diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md deleted file mode 100644 index 84955f8d..00000000 --- a/docs/troubleshooting.md +++ /dev/null @@ -1,33 +0,0 @@ -# nf-core/rnafusion: Troubleshooting - -## Input files not found - -If only no file, only one input file , or only read one and not read two is picked up then something is wrong with your input file declaration - -1. The path must be enclosed in quotes (`'` or `"`) -2. The path must have at least one `*` wildcard character. This is even if you are only running one paired end sample. -3. When using the pipeline with paired end data, the path must use `{1,2}` or `{R1,R2}` notation to specify read pairs. -4. If you are running Single end data make sure to specify `--singleEnd` - -If the pipeline can't find your files then you will get the following error - -```bash -ERROR ~ Cannot find any reads matching: *{1,2}.fastq.gz -``` - -Note that if your sample name is "messy" then you have to be very particular with your glob specification. A file name like `L1-1-D-2h_S1_L002_R1_001.fastq.gz` can be difficult enough for a human to read. Specifying `*{1,2}*.gz` wont work give you what you want Whilst `*{R1,R2}*.gz` will. - -## Data organization - -The pipeline can't take a list of multiple input files - it takes a glob expression. If your input files are scattered in different paths then we recommend that you generate a directory with symlinked files. If running in paired end mode please make sure that your files are sensibly named so that they can be properly paired. See the previous point. - -## Strange errors from tools - -Make sure you are using the **recommended** amount of RAM. Some tools will fail because of this and will break the pipeline throwing strange error messages and codes. Some of the tools are not maintained any more and require small hacking as can be seen in some `Dockerfiles` defined in `tools/` folder. - -## Extra resources and getting help - -If you still have an issue with running the pipeline then feel free to contact us. -Have a look at the [pipeline website](https://github.com/nf-core/rnafusion) to find out how. - -If you have problems that are related to Nextflow and not our pipeline then check out the [Nextflow gitter channel](https://gitter.im/nextflow-io/nextflow) or the [google group](https://groups.google.com/forum/#!forum/nextflow). diff --git a/docs/usage.md b/docs/usage.md index 4547ff00..b3942057 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,507 +1,456 @@ -# nf-core/rnafusion: Usage - -## Table of contents - - - -* [Table of contents](#table-of-contents) -* [Introduction](#introduction) -* [Running the pipeline](#running-the-pipeline) - * [Using Docker](#running-the-pipeline-using-docker) - * [Using Singularity](#running-the-pipeline-using-singularity) -* [Updating the pipeline](#updating-the-pipeline) -* [Reproducibility](#reproducibility) -* [Main arguments](#main-arguments) - * [`-profile`](#-profile-single-dash) - * [`awsbatch`](#awsbatch) - * [`conda`](#conda) - * [`docker`](#docker) - * [`singularity`](#singularity) - * [`test`](#test) - * [`--reads`](#--reads) - * [`--singleEnd`](#--singleend) -* [Tool flags](#tool-flags) - * [`--star_fusion`](#--star_fusion) - * [`--star_fusion_opt`](#--star_fusion_opt) - * [`--fusioncatcher`](#--fusioncatcher) - * [`--fusioncatcher_opt`](#--fusioncatcher_opt) - * [`--ericscript`](#--ericscript) - * [`--pizzly`](#--pizzly) - * [`--squid`](#--squid) - * [`--fusion_report_opt`](#--fusion_report_opt) - * [`--debug`](#--debug) -* [Visualization flags](#visualization-flags) - * [`--fusion_inspector`](#--fusion_inspector) -* [Reference genomes](#reference-genomes) - * [`--fasta`](#--fasta) - * [`--gtf`](#--gtf) - * [`--star_index`](#--star_index) - * [`--star_fusion_ref`](#--star_fusion_ref) - * [`--fusioncatcher_ref`](#--fusioncatcher_ref) - * [`--ericscript_ref`](#--ericscript_ref) - * [`--pizzly_fasta`](#--pizzly_fasta) - * [`--pizzly_gtf`](#--pizzly_gtf) - * [`--genome` (using iGenomes)](#--genome-using-igenomes) - * [`--igenomes`](#--igenomes) -* [Job resources](#job-resources) - * [Automatic resubmission](#automatic-resubmission) - * [Custom resource requests](#custom-resource-requests) -* [AWS Batch specific parameters](#aws-batch-specific-parameters) - * [`--awsqueue`](#--awsqueue) - * [`--awsregion`](#--awsregion) -* [Other command line parameters](#other-command-line-parameters) - * [`--read_length`](#--read_length) - * [`--outdir`](#--outdir) - * [`--email`](#--email) - * [`-name`](#-name) - * [`-resume`](#-resume) - * [`-c`](#-c) - * [`--custom_config_version`](#--custom_config_version) - * [`--custom_config_base`](#--custom_config_base) - * [`--max_memory`](#--max_memory) - * [`--max_time`](#--max_time) - * [`--max_cpus`](#--max_cpus) - * [`--plaintext_email`](#--plaintext_email) - * [`--monochrome_logs`](#--monochrome_logs) - * [`--multiqc_config`](#--multiqc_config) - - -## Introduction - -Nextflow handles job submissions on SLURM or other environments, and supervises running the jobs. Thus the Nextflow process must run until the pipeline is finished. We recommend that you put the process running in the background through `screen` / `tmux` or similar tool. Alternatively you can run nextflow within a cluster job submitted your job scheduler. - -It is recommended to limit the Nextflow Java virtual machines memory. We recommend adding the following line to your environment (typically in `~/.bashrc` or `~./bash_profile`): +# nf-core/rnafusion: Usage -```bash -NXF_OPTS='-Xms1g -Xmx4g' -``` - -## Running the pipeline +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/rnafusion/usage](https://nf-co.re/rnafusion/usage) -The typical command for running the whole pipeline is as follows: +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ -### Running the pipeline using Docker +## Pipeline summary -This will launch the pipeline using `docker` with configuration profile [example-docker.config](https://github.com/nf-core/rnafusion/blob/master/example/custom-docker.config). See below for more information about profiles. +The pipeline is divided into two parts: -```bash -# With custom fasta and gtf (Ensembl example) -nextflow run nf-core/rnafusion - --reads '*_R{1,2}.fastq.gz' - -profile docker -c 'example/custom-docker.config' - --fasta 'Homo_sapiens.GRCh38.95.all.fa' - --gtf 'Homo_sapiens.GRCh38.95.chr.gtf' - --star_fusion - --fusioncatcher - --ericscript - --pizzly - --squid - --fusion_inspector - -# With NCBI GRCh38 genome reference -nextflow run nf-core/rnafusion - --reads '*_R{1,2}.fastq.gz' - -profile docker -c 'example/custom-docker.config' - --genome GRCh38 - --igenomes_base '/path/to/igenomes' - --star_fusion - --fusioncatcher - --ericscript - --pizzly - --squid - --fusion_inspector -``` +1. Download and build references -### Running the pipeline using Singularity +- specified with `--references_only` parameter +- required only once before running the pipeline +- **Important**: has to be run with each new release -First start by downloading singularity images. Sometimes the pipeline can crash if you are not using downloaded images (might be some network issues). +2. Detecting fusions -```bash -nextflow run nf-core/rnafusion/download-singularity-img.nf --download_all --outdir /path +- Supported tools: `Arriba`, `FusionCatcher`, `STAR-Fusion`, `StringTie` and `CTAT-SPLICING` +- QC: `Fastqc`, `MultiQC`, and `Picard CollectInsertSize`, `Picard CollectWgsMetrics`, `Picard Markduplicates` +- Fusions visualization: `Arriba`, `fusion-report`, `FusionInspector`, and `vcf_collect` -# or +## Download and build references -cd utils && sh download-singularity-img.sh /path/to/images -``` +The rnafusion pipeline needs references for the fusion detection tools, so downloading these is a **requirement**. -This will launch the pipeline using `singularity` with configuration profile [example-singularity.config](https://github.com/nf-core/rnafusion/blob/master/example/custom-singularity.config). See below for more information about profiles. +> **IMPORTANT** +> +> - Note that this step takes about 24 hours to complete on HPC. +> - Do not provide a samplesheet via the `input` parameter, otherwise the pipeline will run the analysis directly after downloading the references (except if that is what you want). ```bash -# With custom fasta and gtf (Ensembl example) -nextflow run nf-core/rnafusion - --reads '*_R{1,2}.fastq.gz' - -profile singularity -c 'example/custom-singularity.config' - --fasta 'Homo_sapiens.GRCh38.95.all.fa' - --gtf 'Homo_sapiens.GRCh38.95.chr.gtf' - --star_fusion - --fusioncatcher - --ericscript - --pizzly - --squid - --fusion_inspector - -# With NCBI GRCh38 genome reference -nextflow run nf-core/rnafusion - --reads '*_R{1,2}.fastq.gz' - -profile singularity -c 'example/custom-singularity.config' - --genome GRCh38 - --igenomes_base '/path/to/igenomes' - --star_fusion - --fusioncatcher - --ericscript - --pizzly - --squid - --fusion_inspector +nextflow run nf-core/rnafusion \ + -profile \ + --references_only --all \ + --cosmic_username --cosmic_passwd \ + --genomes_base \ + --outdir ``` ---- - -It is also possible to execute **only** specific tools: +References for each tools can also be downloaded separately with: ```bash -nextflow run nf-core/rnafusion - --reads '*_R{1,2}.fastq.gz' - --genome GRCh38 -profile docker -c 'example/custom-docker.config' - --fusioncatcher - --ericscript +nextflow run nf-core/rnafusion \ + -profile \ + --references_only -- -- ... \ + --cosmic_username --cosmic_passwd \ + --genomes_base \ + --outdir ``` -Note that the pipeline will create the following files in your working directory: +If you are not covered by the research COSMIC license and want to avoid using COSMIC, you can provide the additional option `--no_cosmic`. -```bash -work # Directory containing the nextflow working files -results # Finished results (configurable, see below) -.nextflow_log # Log file from Nextflow -# Other nextflow hidden files, eg. history of pipeline runs and old logs. -``` +### Downloading the cosmic database with SANGER or QUIAGEN -### Updating the pipeline +#### For academic users -When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: +First register for a free account at COSMIC at [https://cancer.sanger.ac.uk/cosmic/register](https://cancer.sanger.ac.uk/cosmic/register) using a university email. The account is **only activated upon** clicking the link in the registration email. -```bash -nextflow pull nf-core/rnafusion -``` +#### For non-academic users -### Reproducibility +Use credentials from QIAGEN and add `--qiagen` -It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. - -First, go to the [nf-core/rnafusion releases page](https://github.com/nf-core/rnafusion/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. - -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. - -## Main arguments +```bash +nextflow run nf-core/rnafusion \ + -profile \ + --references_only -- -- ... \ + --cosmic_username --cosmic_passwd \ + --genomes_base \ + --outdir --qiagen +``` -### `-profile` +#### STAR-Fusion references downloaded vs built -Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. Note that multiple profiles can be loaded, for example: `-profile docker` - the order of arguments is important! +By default STAR-Fusion references are **built**. You can also download them from [CTAT](https://github.com/NCIP/Trinity_CTAT/wiki) by using the flag `--starfusion_build FALSE` for both reference building and fusion detection. This allows more flexibility for different organisms but **be aware that STAR-Fusion reference download is not recommended as not fully tested!** -If `-profile` is not specified at all the pipeline will be run locally and expects all software to be installed and available on the `PATH`. +#### Issues with building references -* `awsbatch` - * A generic configuration profile to be used with AWS Batch. -* `conda` - * A generic configuration profile to be used with [conda](https://conda.io/docs/) - * Pulls most software from [Bioconda](https://bioconda.github.io/) -* `docker` - * A generic configuration profile to be used with [Docker](http://docker.com/) - * Pulls software from dockerhub: [`nfcore/rnafusion`](http://hub.docker.com/r/nfcore/rnafusion/) -* `singularity` - * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) - * Pulls software from DockerHub: [`nfcore/rnafusion`](http://hub.docker.com/r/nfcore/rnafusion/) -* `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters +If process `FUSIONREPORT_DOWNLOAD` times out, it could be due to network restriction (for example if trying to run on HPC). As this process is lightweight in cpu, memory and time, running on local machines with the following options might solve the issue: -### `--reads` +```bash +nextflow run nf-core/rnafusion \ + -profile \ + --references_only \ + --cosmic_username --cosmic_passwd \ + --fusionreport \ + --genomes_base \ + --outdir +``` -Use this to specify the location of your input FastQ files. For example: +Adjustments for cpu and memory requirements can be done by feeding a custom configuration with `-c /PATH/TO/CUSTOM/CONFIG`. +Where the custom configuration could look like (adaptation to local machine necessary): -```bash ---reads 'path/to/data/sample_*_{1,2}.fastq.gz' +```text +process { + withName: 'NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:FUSIONREPORT_DOWNLOAD' { + memory = '8.GB' + cpus = 4 + } +} ``` -Please note the following requirements: +The four `fusion-report` files: `cosmic.db`, `fusiongdb2.db`, `mitelman.db` +should then be copied into the HPC `/references/fusion_report_db`. -1. The path must be enclosed in quotes -2. The path must have at least one `*` wildcard character -3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs. +#### Note about fusioncatcher references -If left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz` +The references are only built based on ensembl version 102. It is not possible currently to use any other version/source. -It is not possible to run a mixture of single-end and paired-end files in one run. +## Running the pipeline -### `--singleEnd` +### Samplesheet input -By default, the pipeline expects paired-end data. If you have single-end data, you need to specify `--singleEnd` on the command line when you launch the pipeline. A normal glob pattern, enclosed in quotation marks, can then be used for `--reads`. For example: +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. The pipeline will detect whether a sample is single- or paired-end from the samplesheet - the `fastq_2` column is empty for single-end. The samplesheet has to be a comma-separated file (.csv) but can have as many columns as you desire. There is a strict requirement for the first 4 columns to match those defined in the table below with the header row included. +A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. -```bash ---singleEnd --reads '*.fastq.gz' +```csv title="samplesheet.csv" +sample,fastq_1,fastq_2,strandedness +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,forward +CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,forward +CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,forward +TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz,,forward +TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz,,forward +TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,,forward +TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,,forward ``` -## Tool flags +As you can see above for multiple runs of the same sample, the `sample` name has to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. -### `--star_fusion` +| Column | Description | +| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `strandedness` | Strandedness: forward or reverse. | -If enabled, executes `STAR-Fusion` tool. +### Starting commands -* `--star_fusion_opt` - * Parameter for specifying additional parameters. For more info, please refer to the [documentation](https://github.com/STAR-Fusion/STAR-Fusion/wiki) of the tool. - * **Has to be specified in custom configuration file. Will not work from a command line.** +The pipeline can either be run using all fusion detection tools or specifying individual tools. Visualisation tools will be run on all fusions detected. To run all tools (`arriba`, `fusioncatcher`, `starfusion`, `stringtie`, `ctat-splicing`) use the `--all` parameter: -### `--fusioncatcher` - -If enabled, executes `Fusioncatcher` tool. +```bash +nextflow run nf-core/rnafusion \ + -profile \ + --all \ + --input \ + --genomes_base \ + --outdir +``` -* `--fusioncatcher_opt` - * Parameter for specifying additional parameters. For more info, please refer to the [documentation](https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md) of the tool. - * **Has to be specified in custom configuration file. Will not work from a command line.** +To run only a specific detection tool use: `--tool`: -### `--ericscript` +```bash +nextflow run nf-core/rnafusion \ + -profile \ + -- -- ... \ + --input \ + --genomes_base \ + --outdir +``` -If enabled, executes `Ericscript` tool. +If you are not covered by the research COSMIC license and want to avoid using COSMIC, you can provide the additional option `--no_cosmic`. -### `--pizzly` +> **IMPORTANT: Either `--all` or `--`** is necessary to run detection tools -If enabled, executes `Pizzly` tool. +`--genomes_base` should be the path to the directory containing the folder `references/` that was built with `--references_only`. -### `--squid` +Note that the pipeline will create the following files in your working directory: -If enabled, executes `Squid` tool. +```bash +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow +# Other nextflow hidden files, eg. history of pipeline runs and old logs. +``` -### `--fusion_report_opt` +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. -* Parameter for specifying additional parameters. For more info, please refer to the fusion-report [documentation](https://matq007.github.io/fusion-report/usage.html). -* **Has to be specified in custom configuration file. Will not work from a command line.** +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. -### `--debug` +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: -To run only a specific tool (testing freshly implemented tool) just add `--debug` parameter. This parameter only works on **fusion tools only**! +The above pipeline run specified with a params file in yaml format: ```bash -nextflow run nf-core/rnafusion --reads '*_R{1,2}.fastq.gz' --genome GRCh38 -profile docker --star_fusion --test +nextflow run nf-core/rnafusion -profile docker -params-file params.yaml ``` -## Visualization flags +with: -### `--fusion_inspector` +```yaml title="params.yaml" +input: './samplesheet.csv' +outdir: './results/' +<...> +``` -If enabled, executes `Fusion-Inspector` tool. +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). -## Reference genomes +:::warning +Conda is not currently supported. +Supported genome is currently only GRCh38. +::: -The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource. +### Options -### `--fasta` +#### Trimming -If you prefer, you can specify the full path to your reference genome when you run the pipeline: +When the flag `--fastp_trim` is used, `fastp` is used to provide all tools with trimmed reads. Quality and adapter trimming by default. In addition, tail trimming and adapter_fastq specification are possible. Example usage: ```bash ---fasta '[path to Fasta reference]' +nextflow run nf-core/rnafusion \ +-profile \ +-- -- ... \ +--input \ +--genomes_base \ +--outdir \ +--fastp_trim \ +--trim_tail (optional) \ +--adapter_fastq (optional) ``` -### `--gtf` - -If you prefer, you can specify the full path to your annotation when you run the pipeline: +#### Filter fusions detected by 2 or more tools ```bash ---gtf '[path to GTF annotation]' +nextflow run nf-core/rnafusion \ + -profile \ + -- -- ... \ + --input \ + --genomes_base \ + --outdir + --tools_cutoff ``` -### `--star_index` +`--tools_cutoff INT` will discard fusions detected by less than INT tools both for display in fusionreport html index and to consider in fusioninspector. Default = 1, no filtering. -If you prefer, you can specify the full path for `STAR` index when you run the pipeline. If not specified, the pipeline will build the index using for reads with length 100bp (can be adjusted with parameter `--read_length`). +#### Adding custom fusions to consider as well as the detected set: whitelist ```bash ---star_index '[path to STAR index]' +nextflow run nf-core/rnafusion \ + -profile \ + -- -- ... \ + --input \ + --genomes_base \ + --outdir + --whitelist ``` -### `--star_fusion_ref` +The custom fusion file should have the following format: -Required reference in order to run `STAR-Fusion`. - -```bash ---star_fusion_ref '[path to STAR-Fusion reference]' +``` +GENE1--GENE2 +GENE3--GENE4 ``` -### `--fusioncatcher_ref` +#### Running FusionInspector only -Required reference in order to run `Fusioncatcher`. +FusionInspector can be run as a standalone with: ```bash ---fusioncatcher_ref '[path to Fusioncatcher reference]' +nextflow run nf-core/rnafusion \ +-profile \ +--fusioninspector_only \ +--fusioninspector_fusions \ +--input \ +--outdir ``` -### `--ericscript_ref` +The custom fusion file should have the following format: -Required reference in order to run `Ericscript`. - -```bash ---ericscript_ref '[path to Ericscript reference]' +``` +GENE1--GENE2 +GENE3--GENE4 ``` -### `--pizzly_fasta` - -Required reference in order to run `Pizzly`. +#### Skipping QC ```bash ---pizzly_fasta '[path to Pizzly Fasta reference]' +nextflow run nf-core/rnafusion \ +-profile \ +--skip_qc \ +--all OR <--tool> +--input \ +--genomes_base \ +--outdir ``` -### `--pizzly_gtf` +This will skip all QC-related processes (picard metrics collection) -Required reference in order to run `Pizzly`. +#### Skipping visualisation ```bash ---pizzly_gtf '[path to Pizzly GTF annotation]' +nextflow run nf-core/rnafusion \ +-profile \ +--skip_vis \ +--all OR <--tool> +--input \ +--genomes_base \ +--outdir ``` -### `--genome` (using iGenomes) - -There are 31 different species supported in the iGenomes references. To run the pipeline, you must specify which to use with the `--genome` flag. -You can find the keys to specify the genomes in the [iGenomes config file](../conf/igenomes.config). Common genomes that are supported are: - -* Human - * `--genome GRCh38` (recommended) - -> **TL;DR:** The pipeline only supports Homo Sapiens. We recommend using fasta nad gtf from Ensembl database and build custom STAR-Fusion reference. Most of the tools references are based on Ensembl. +This will skip all visualisation processes, including `fusion-report`, `FusionInspector` and `Arriba` visualisation. -Note that you can use the same configuration setup to save sets of reference files for your own use, even if they are not part of the iGenomes resource. See the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for instructions on where to save such a file. - -The syntax for this reference configuration is as follows: - -```nextflow -params { - genomes { - 'GRCh38' { - fasta = '' // Used if no star index given - } - // Any number of additional genomes, key is used with --genome - } -} -``` +#### Optional manual feed-in of fusion files -### `--igenomes` +It is possible to give the output of each tool manually using the argument: `--_fusions PATH/TO/FUSION/FILE`: this feature need more testing, don't hesitate to open an issue if you encounter problems. -Load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`. +#### Set different `--limitSjdbInsertNsj` parameter -## Job resources +There are two parameters to increase the `--limitSjdbInsertNsj` parameter if necessary: -### Automatic resubmission +- `--fusioncatcher_limitSjdbInsertNsj`, default: 2000000 +- `--fusioninspector_limitSjdbInsertNsj`, default: 1000000 -Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. +Use the parameter `--cram` to compress the BAM files to CRAM for specific tools. Options: arriba, starfusion. Leave no space between options: -### Custom resource requests +- `--cram arriba,starfusion`, default: [] +- `--cram arriba` -Wherever process-specific requirements are set in the pipeline, the default value can be changed by creating a custom config file. See the files hosted at [`nf-core/configs`](https://github.com/nf-core/configs/tree/master/conf) for examples. +### Troubleshooting -If you are likely to be running `nf-core` pipelines regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter (see definition below). You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. +#### GstrandBit issues -If you have any questions or issues please send us a message on [Slack](https://nf-core-invite.herokuapp.com/). +The issue below sometimes occurs: -## AWS Batch specific parameters - -Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. Please use the `-awsbatch` profile and then specify all of the following parameters. +``` +EXITING because of FATAL ERROR: cannot insert sequence on the fly because of strand GstrandBit problem +SOLUTION: please contact STAR author at https://groups.google.com/forum/#!forum/rna-star +``` -### `--awsqueue` +As the error message suggests, it is a STAR-related error and your best luck in solving it will be the forum. -The JobQueue that you intend to use on AWS Batch. +### Updating the pipeline -### `--awsregion` +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -The AWS region to run your job in. Default is set to `eu-west-1` but can be adjusted to your needs. -Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a S3 storage bucket of your choice - you'll get an error message notifying you if you didn't. +```bash +nextflow pull nf-core/rnafusion +``` -## Other command line parameters +### Reproducibility -### `--read_length` +It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -Length is used to build a STAR index. Default is 100bp (Illumina). +First, go to the [nf-core/rnafusion releases page](https://github.com/nf-core/rnafusion/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. -### `--outdir` +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. -The output directory where the results will be saved. +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -### `--email` +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: -Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run. +## Core Nextflow arguments -### `-name` +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: -Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. -This is used in the MultiQC report (if not default) and in the summary HTML / e-mail (always). +### `-profile` -**NB:** Single hyphen (core Nextflow option) +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. + +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. + +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: + +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). + +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. + +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. + +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters + - !!!! Run with `-stub` as all references need to be downloaded otherwise !!!! ### `-resume` -Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. -You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). -**NB:** Single hyphen (core Nextflow option) +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. ### `-c` -Specify the path to a specific config file (this is a core NextFlow command). +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. -**NB:** Single hyphen (core Nextflow option) +## Custom configuration -Note - you can use this to override pipeline defaults. +### Resource requests -### `--custom_config_version` +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default is set to `master`. +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. -```bash -## Download and use config file with following git commid id ---custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96 -``` +### Custom Containers -### `--custom_config_base` +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. -If you're running offline, nextflow will not be able to fetch the institutional config files -from the internet. If you don't need them, then this is not a problem. If you do need them, -you should download the files from the repo and tell nextflow where to find them with the -`custom_config_base` option. For example: +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. -```bash -## Download and unzip the config files -cd /path/to/my/configs -wget https://github.com/nf-core/configs/archive/master.zip -unzip master.zip - -## Run the pipeline -cd /path/to/my/data -nextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/ -``` +### Custom Tool Arguments -> Note that the nf-core/tools helper package has a `download` command to download all required pipeline -> files + singularity containers + institutional configs in one go for you, to make this process easier. +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. -### `--max_memory` +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. -Use to set a top-limit for the default memory requirement for each process. -Should be a string in the format integer-unit. eg. `--max_memory '8.GB'` +### nf-core/configs -### `--max_time` +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. -Use to set a top-limit for the default time requirement for each process. -Should be a string in the format integer-unit. eg. `--max_time '2.h'` +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. -### `--max_cpus` +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). --> -Use to set a top-limit for the default CPU requirement for each process. -Should be a string in the format integer-unit. eg. `--max_cpus 1` +## Running in the background -### `--plaintext_email` +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. -Set to receive plain-text e-mails instead of HTML formatted. +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. -### `--monochrome_logs` +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). -Set to disable colourful command line output and live life in monochrome. +## Nextflow memory requirements -### `--multiqc_config` +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -Specify a path to a custom MultiQC configuration file. +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` diff --git a/download-references.nf b/download-references.nf deleted file mode 100644 index 3fd65458..00000000 --- a/download-references.nf +++ /dev/null @@ -1,276 +0,0 @@ -#!/usr/bin/env nextflow -/* -======================================================================================== - nf-core/rnafusion -======================================================================================== - nf-core/rnafusion Analysis Pipeline. - #### Homepage / Documentation - https://github.com/nf-core/rnafusion ----------------------------------------------------------------------------------------- -*/ - -def helpMessage() { - log.info nfcoreHeader() - log.info""" - Usage: - - The typical command for downloading references is as follows: - - nextflow run nf-core/rnafusion/download-references.nf -profile [PROFILE] [OPTIONS] --outdir /path/to/output - - Mandatory arguments: - --outdir Output directory for downloading - -profile Configuration profile to use. Can use multiple (comma separated) - Available: standard, conda, docker, singularity, awsbatch, test - - Options: - --arriba Download Arriba references - --star_fusion Download STAR-Fusion references [NCBI version by default] - --star_fusion_ensembl Download STAR-Fusion references [Ensebml, have to build manually] - --fusioncatcher Download Fusioncatcher references - --ericscript Download Ericscript references - --pizzly Download pizzly references - --fusion_report Download databases for fusion-report - --cosmic_usr [Required] COSMIC username - --cosmic_passwd [Required] COSMIC password - --igenomes Download iGenome Homo Sapiens version NCBI/GRCh38. - """.stripIndent() -} - -/* - * SET UP CONFIGURATION VARIABLES - */ - -// Show help emssage -if (params.help){ - helpMessage() - exit 0 -} - -params.running_tools = [] -if (!params.outdir) { - exit 1, "Output directory not specified!" -} -if (params.arriba) { - params.running_tools.add("Arriba") -} -if (params.igenomes) { - params.running_tools.add("iGenome") -} -if (params.star_fusion) { - params.running_tools.add("STAR-Fusion NCBI version") -} -if (params.star_fusion_ensembl) { - params.running_tools.add("STAR-Fusion Ensembl version") -} -if (params.fusioncatcher) { - params.running_tools.add("Fusioncatcher") -} -if (params.ericscript) { - params.running_tools.add("Ericscript") -} -if (params.pizzly) { - params.running_tools.add("Pizzly") -} -if (params.fusion_report) { - if (!params.cosmic_usr || !params.cosmic_passwd) { - exit 1, "Database credentials are required parameter!" - } - params.running_tools.add('fusion-report') -} - -// Header log info -log.info nfcoreHeader() -def summary = [:] -summary['Pipeline Name'] = 'nf-core/rnafusion/download-references.nf' -summary['Pipeline Version'] = workflow.manifest.version -summary['References'] = params.running_tools.size() == 0 ? 'None' : params.running_tools.join(", ") -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -summary['Output dir'] = params.outdir -summary['Working dir'] = workflow.workDir -summary['Launch dir'] = workflow.launchDir -summary['Working dir'] = workflow.workDir -summary['Script dir'] = workflow.projectDir -summary['User'] = workflow.userName -summary['Config Profile'] = workflow.profile -if(params.config_profile_description) summary['Config Description'] = params.config_profile_description -if(params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact -if(params.config_profile_url) summary['Config URL'] = params.config_profile_url -if(workflow.profile == 'awsbatch'){ - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue -} -log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") -log.info "\033[2m----------------------------------------------------\033[0m" - -process download_arriba { - publishDir "${params.outdir}/arriba_ref", mode: 'copy' - - when: - params.arriba - - output: - file '*' - - script: - """ - wget -N https://github.com/suhrig/arriba/releases/download/v1.1.0/arriba_v1.1.0.tar.gz -O arriba_v1.1.0.tar.gz - tar -xvzf arriba_v1.1.0.tar.gz && mv arriba_v1.1.0/database/* . && gunzip * && rm -rf arriba_* - """ -} - -process download_star_fusion { - publishDir "${params.outdir}/star_fusion_ref", mode: 'copy' - - when: - params.star_fusion - - output: - file '*' - - script: - """ - wget -N https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh38_gencode_v29_CTAT_lib_Mar272019.plug-n-play.tar.gz -O GRCh38_gencode_v29_CTAT_lib_Mar272019.plug-n-play.tar.gz - tar -xvzf GRCh38_gencode_v29_CTAT_lib_Mar272019.plug-n-play.tar.gz && rm GRCh38_gencode_v29_CTAT_lib_Mar272019.plug-n-play.tar.gz - """ -} - -process download_star_fusion_ensembl { - publishDir "${params.outdir}/star_fusion_ensembl_ref", mode: 'copy' - - when: - params.star_fusion_ensembl - - output: - file '*' - - script: - """ - wget -N ftp://ftp.ensembl.org/pub/release-77/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.chromosome.{1..22}.fa.gz - wget -N ftp://ftp.ensembl.org/pub/release-77/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.chromosome.{MT,X,Y}.fa.gz - gunzip -c Homo_sapiens.GRCh38.dna.chromosome.* > Homo_sapiens.GRCh38_r77.all.fa - wget -N ftp://ftp.ensembl.org/pub/release-77/gtf/homo_sapiens/Homo_sapiens.GRCh38.77.chr.gtf.gz - wget -N ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.hmm.gz - gunzip Pfam-A.hmm.gz && hmmpress Pfam-A.hmm - prep_genome_lib.pl \\ - --genome_fa Homo_sapiens.GRCh38_r77.all.fa \\ - --gtf Homo_sapiens.GRCh38.77.chr.gtf \\ - --pfam_db Pfam-A.hmm \\ - --CPU 10 - """ -} - -process download_fusioncatcher { - publishDir "${params.outdir}/fusioncatcher_ref", mode: 'copy' - - when: - params.fusioncatcher - - output: - file '*' - - script: - """ - wget -N http://sourceforge.net/projects/fusioncatcher/files/data/human_v90.tar.gz.aa - wget -N http://sourceforge.net/projects/fusioncatcher/files/data/human_v90.tar.gz.ab - wget -N http://sourceforge.net/projects/fusioncatcher/files/data/human_v90.tar.gz.ac - wget -N http://sourceforge.net/projects/fusioncatcher/files/data/human_v90.tar.gz.ad - cat human_v90.tar.gz.* | tar xz - rm human_v90.tar* - """ -} - -process download_ericscript { - publishDir "${params.outdir}/ericscript_ref", mode: 'copy' - - when: - params.ericscript - - output: - file '*' - - script: - """ - wget -N https://raw.githubusercontent.com/circulosmeos/gdown.pl/dfd6dc910a38a42d550397bb5c2335be2c4bcf54/gdown.pl - chmod +x gdown.pl - ./gdown.pl "https://drive.google.com/uc?export=download&confirm=qgOc&id=0B9s__vuJPvIiUGt1SnFMZFg4TlE" ericscript_db_homosapiens_ensembl84.tar.bz2 - tar jxf ericscript_db_homosapiens_ensembl84.tar.bz2 - rm gdown.pl ericscript_db_homosapiens_ensembl84.tar.bz2 - """ -} - -process download_pizzly { - publishDir "${params.outdir}/pizzly_ref", mode: 'copy' - - when: - params.pizzly - - output: - file '*' - - script: - """ - wget -N ftp://ftp.ensembl.org/pub/release-94/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz - wget -N ftp://ftp.ensembl.org/pub/release-94/gtf/homo_sapiens/Homo_sapiens.GRCh38.94.gtf.gz && gunzip Homo_sapiens.GRCh38.94.gtf.gz - """ -} - -process download_databases { - publishDir "${params.outdir}/databases", mode: 'copy' - - when: - params.fusion_report - - output: - file '*' - - script: - """ - fusion_report download --cosmic_usr "${params.cosmic_usr}" --cosmic_passwd "${params.cosmic_passwd}" . - """ -} - -process download_igenome { - publishDir "${params.outdir}/igenome", mode: 'copy' - - when: - params.igenomes - - output: - file '*' - - script: - """ - aws s3 --no-sign-request --region eu-west-1 sync s3://ngi-igenomes/igenomes/Homo_sapiens/NCBI/GRCh38/ . - """ -} - -/* - * Completion - */ -workflow.onComplete { - log.info "[nf-core/rnafusion] Pipeline Complete" -} - -def nfcoreHeader(){ - // Log colors ANSI codes - c_reset = params.monochrome_logs ? '' : "\033[0m"; - c_dim = params.monochrome_logs ? '' : "\033[2m"; - c_black = params.monochrome_logs ? '' : "\033[0;30m"; - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; - c_blue = params.monochrome_logs ? '' : "\033[0;34m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; - c_white = params.monochrome_logs ? '' : "\033[0;37m"; - - return """ ${c_dim}----------------------------------------------------${c_reset} - ${c_green},--.${c_black}/${c_green},-.${c_reset} - ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} - ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} - ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} - ${c_green}`._,._,\'${c_reset} - ${c_purple} nf-core/rnafusion v${workflow.manifest.version}${c_reset} - ${c_dim}----------------------------------------------------${c_reset} - """.stripIndent() -} \ No newline at end of file diff --git a/download-singularity-img.nf b/download-singularity-img.nf deleted file mode 100644 index 42fa588c..00000000 --- a/download-singularity-img.nf +++ /dev/null @@ -1,253 +0,0 @@ -#!/usr/bin/env nextflow -/* -======================================================================================== - nf-core/rnafusion -======================================================================================== - nf-core/rnafusion Analysis Pipeline. - #### Homepage / Documentation - https://github.com/nf-core/rnafusion ----------------------------------------------------------------------------------------- -*/ - -def helpMessage() { - log.info nfcoreHeader() - log.info""" - - Usage: - - The typical command for downloading singularity images is as follows: - - nextflow run nf-core/rnafusion/download-singularity-img.nf -profile [PROFILE] [OPTIONS] --outdir /path/to/output - - By default main image is downloaded. - - Mandatory arguments: - --outdir Output directory for downloading - -profile Configuration profile to use. Can use multiple (comma separated) - Available: standard, conda, docker, singularity, awsbatch, test - - Options: - --download_all Download all images - --arriba Download Arriba image - --ericscript Download Ericscript image - --fusioncatcher Download Fusioncatcher image - --fusion_inspector Download Fusion-Inspector image - --pizzly Download Pizzly image - --squid Download Squid image - --star_fusion Download STAR-Fusion image - - """.stripIndent() -} - -/* - * SET UP CONFIGURATION VARIABLES - */ - -// Show help emssage -if (params.help){ - helpMessage() - exit 0 -} - -params.running_tools = ["nf-core/rnafusion/${workflow.manifest.version}"] -if (!params.outdir) { - exit 1, "Output directory not specified!" -} -if (params.download_all) { - params.running_tools.add("All") -} -if (params.arriba) { - params.running_tools.add("Arriba") -} -if (params.star_fusion) { - params.running_tools.add("STAR-Fusion") -} -if (params.fusioncatcher) { - params.running_tools.add("Fusioncatcher") -} -if (params.ericscript) { - params.running_tools.add("Ericscript") -} -if (params.pizzly) { - params.running_tools.add("Pizzly") -} -if (params.squid) { - params.running_tools.add("Squid") -} -if (params.fusion_inspector) { - params.running_tools.add("Fusion-Inspector") -} - -// Header log info -log.info nfcoreHeader() -def summary = [:] -summary['Pipeline Name'] = 'nf-core/rnafusion/download-singularity-img.nf' -summary['Pipeline Version'] = workflow.manifest.version -summary['Tool images'] = params.running_tools.size() == 0 ? 'None' : params.running_tools.join(", ") -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -summary['Output dir'] = params.outdir -summary['Working dir'] = workflow.workDir -summary['Launch dir'] = workflow.launchDir -summary['Working dir'] = workflow.workDir -summary['Script dir'] = workflow.projectDir -summary['User'] = workflow.userName -summary['Config Profile'] = workflow.profile -if(params.config_profile_description) summary['Config Description'] = params.config_profile_description -if(params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact -if(params.config_profile_url) summary['Config URL'] = params.config_profile_url -if(workflow.profile == 'awsbatch'){ - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue -} -log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") -log.info "\033[2m----------------------------------------------------\033[0m" - -process download_base_image { - publishDir "${params.outdir}", mode: 'copy' - - when: - params.download_all - - output: - file "rnafusion_v${workflow.manifest.version}.img" - - script: - """ - singularity pull --name "rnafusion_v${workflow.manifest.version}.img" docker://nfcore/rnafusion:${workflow.manifest.version} - """ -} - -process download_arriba { - publishDir "${params.outdir}", mode: 'copy' - - when: - params.arriba || params.download_all - - output: - file "rnafusion_arriba_v${params.arriba_version}.img" - - script: - """ - singularity pull --name "rnafusion_arriba_v${params.arriba_version}.img" docker://nfcore/rnafusion:arriba_v${params.arriba_version} - """ -} - -process download_ericscript { - publishDir "${params.outdir}", mode: 'copy' - - when: - params.ericscript || params.download_all - - output: - file "rnafusion_ericscript_v${params.ericscript_version}.img" - - script: - """ - singularity pull --name "rnafusion_ericscript_v${params.ericscript_version}.img" docker://nfcore/rnafusion:ericscript_v${params.ericscript_version} - """ -} - -process download_fusioncatcher { - publishDir "${params.outdir}", mode: 'copy' - - when: - params.fusioncatcher || params.download_all - - output: - file "rnafusion_fusioncatcher_v${params.fusioncatcher_version}.img" - - script: - """ - singularity pull --name "rnafusion_fusioncatcher_v${params.fusioncatcher_version}.img" docker://nfcore/rnafusion:fusioncatcher_v${params.fusioncatcher_version} - """ -} - -process download_fusion_inspector { - publishDir "${params.outdir}", mode: 'copy' - - when: - params.fusion_inspector || params.download_all - - output: - file "rnafusion_fusion-inspector_v${params.fusion_inspector_version}.img" - - script: - """ - singularity pull --name "rnafusion_fusion-inspector_v${params.fusion_inspector_version}.img" docker://nfcore/rnafusion:fusion-inspector_v${params.fusion_inspector_version} - """ -} - -process download_pizzly { - publishDir "${params.outdir}", mode: 'copy' - - when: - params.pizzly || params.download_all - - output: - file "rnafusion_pizzly_v${params.pizzly_version}.img" - - script: - """ - singularity pull --name "rnafusion_pizzly_v${params.pizzly_version}.img" docker://nfcore/rnafusion:pizzly_v${params.pizzly_version} - """ -} - -process download_squid { - publishDir "${params.outdir}", mode: 'copy' - - when: - params.squid || params.download_all - - output: - file "rnafusion_squid_v${params.squid_version}.img" - - script: - """ - singularity pull --name "rnafusion_squid_v${params.squid_version}.img" docker://nfcore/rnafusion:squid_v${params.squid_version} - """ -} - -process download_star_fusion { - publishDir "${params.outdir}", mode: 'copy' - - when: - params.star_fusion || params.download_all - - output: - file "rnafusion_star-fusion_v${params.star_fusion_version}.img" - - script: - """ - singularity pull --name "rnafusion_star-fusion_v${params.star_fusion_version}.img" docker://nfcore/rnafusion:star-fusion_v${params.star_fusion_version} - """ -} - -/* - * Completion - */ -workflow.onComplete { - log.info "[nf-core/rnafusion] Pipeline Complete" -} - -def nfcoreHeader(){ - // Log colors ANSI codes - c_reset = params.monochrome_logs ? '' : "\033[0m"; - c_dim = params.monochrome_logs ? '' : "\033[2m"; - c_black = params.monochrome_logs ? '' : "\033[0;30m"; - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; - c_blue = params.monochrome_logs ? '' : "\033[0;34m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; - c_white = params.monochrome_logs ? '' : "\033[0;37m"; - - return """ ${c_dim}----------------------------------------------------${c_reset} - ${c_green},--.${c_black}/${c_green},-.${c_reset} - ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} - ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} - ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} - ${c_green}`._,._,\'${c_reset} - ${c_purple} nf-core/rnafusion v${workflow.manifest.version}${c_reset} - ${c_dim}----------------------------------------------------${c_reset} - """.stripIndent() -} \ No newline at end of file diff --git a/environment.yml b/environment.yml deleted file mode 100644 index ebb0984c..00000000 --- a/environment.yml +++ /dev/null @@ -1,17 +0,0 @@ -# You can use this file to create a conda environment for this pipeline: -# conda env create -f environment.yml -name: nf-core-rnafusion-1.0.2 -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - anaconda::openjdk=8.0.152 # Needed for FastQC - conda build hangs without this - - bioconda::star=2.7.0f # update STAR-Fusion and Fusion-Inspector - - conda-forge::r-data.table=1.12.0 - - conda-forge::r-gplots=3.0.1.1 - - bioconda::bioconductor-edger=3.24.1 - - conda-forge::r-markdown=0.9 - - bioconda::fusion-report=1.0.0 - - bioconda::fastqc=0.11.8 - - bioconda::multiqc=1.7 diff --git a/example/custom-docker.config b/example/custom-docker.config deleted file mode 100644 index 5eedfdc4..00000000 --- a/example/custom-docker.config +++ /dev/null @@ -1,37 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for Munin Singularity - * ------------------------------------------------- - */ - -params { - max_cpus = 24 - max_memory = 256.GB - max_time = 72.h - container_version = '1.0.1' - - // Default tool versions - arriba_version = '1.1.0' - star_fusion_version = '1.5.0' - fusioncatcher_version = '1.00' - ericscript_version = '0.5.5' - pizzly_version = '0.37.3' - squid_version = '1.5' - fusion_inspector_version = '1.3.1' - - // Extra parameters for fusion tools - arriba_opt = '' - star_fusion_opt = '' - fusioncatcher_opt = '' - fusion_report_opt = '' - - reference_base = '/path/to/references' - databases = "${params.reference_base}/databases/" - - arriba_ref = "${params.reference_base}/arriba_ref" - fusioncatcher_ref = "${params.reference_base}/fusioncatcher_ref/human_v90" - star_fusion_ref = "${params.reference_base}/star_fusion_ensembl_ref/ctat_genome_lib_build_dir" - ericscript_ref = "${params.reference_base}/ericscript_ref/ericscript_db_homosapiens_ensembl84" - pizzly_fasta = "${params.reference_base}/pizzly_ref/Homo_sapiens.GRCh38.cdna.all.fa.gz" - pizzly_gtf = "${params.reference_base}/pizzly_ref/Homo_sapiens.GRCh38.94.gtf" -} \ No newline at end of file diff --git a/example/custom-singularity.config b/example/custom-singularity.config deleted file mode 100644 index 55691122..00000000 --- a/example/custom-singularity.config +++ /dev/null @@ -1,68 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for Munin Singularity - * ------------------------------------------------- - */ - -params { - max_cpus = 24 - max_memory = 256.GB - max_time = 72.h - container_version = '1.0.1' - - // Default tool versions - arriba_version = '1.1.0' - star_fusion_version = '1.5.0' - fusioncatcher_version = '1.00' - ericscript_version = '0.5.5' - pizzly_version = '0.37.3' - squid_version = '1.5' - fusion_inspector_version = '1.3.1' - - // Extra parameters for fusion tools - arriba_opt = '' - star_fusion_opt = '' - fusioncatcher_opt = '' - fusion_report_opt = '' - - reference_base = '/path/to/references' - containerPath = "file:///path/to/containers/" - databases = "${params.reference_base}/databases/" - - arriba_ref = "${params.reference_base}/arriba_ref" - fusioncatcher_ref = "${params.reference_base}/fusioncatcher_ref/human_v90" - star_fusion_ref = "${params.reference_base}/star_fusion_ensembl_ref/ctat_genome_lib_build_dir" - ericscript_ref = "${params.reference_base}/ericscript_ref/ericscript_db_homosapiens_ensembl84" - pizzly_fasta = "${params.reference_base}/pizzly_ref/Homo_sapiens.GRCh38.cdna.all.fa.gz" - pizzly_gtf = "${params.reference_base}/pizzly_ref/Homo_sapiens.GRCh38.94.gtf" -} - -process { - container = "${params.containerPath}/rnafusion_v${params.container_version}.img" - - withName:arriba { - container = "${params.containerPath}/rnafusion_arriba_v${params.arriba_version}.img" - } - withName:star_fusion { - container = "${params.containerPath}/rnafusion_star-fusion_v${params.star_fusion_version}.img" - } - withName:fusioncatcher { - container = "${params.containerPath}/rnafusion_fusioncatcher_v${params.fusioncatcher_version}.img" - } - withName:fusion_inspector { - container = "${params.containerPath}/rnafusion_fusion-inspector_v${params.fusion_inspector_version}.img" - } - withName:ericscript { - container = "${params.containerPath}/rnafusion_ericscript_v${params.ericscript_version}.img" - } - withName:pizzly { - container = "${params.containerPath}/rnafusion_pizzly_v${params.pizzly_version}.img" - } - withName:squid { - container = "${params.containerPath}/rnafusion_squid_v${params.squid_version}.img" - } - // Download references - withName:download_star_fusion_ensembl { - container = "${params.containerPath}/rnafusion_star-fusion_v${params.star_fusion_version}.img" - } -} diff --git a/main.nf b/main.nf index 04b7f2f0..78781bed 100644 --- a/main.nf +++ b/main.nf @@ -1,973 +1,100 @@ #!/usr/bin/env nextflow /* -======================================================================================== - nf-core/rnafusion -======================================================================================== - nf-core/rnafusion Analysis Pipeline. - #### Homepage / Documentation - https://github.com/nf-core/rnafusion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/rnafusion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Github : https://github.com/nf-core/rnafusion + Website: https://nf-co.re/rnafusion + Slack : https://nfcore.slack.com/channels/rnafusion ---------------------------------------------------------------------------------------- */ -def helpMessage() { - log.info nfcoreHeader() - log.info""" - - Usage: - - The typical command for running the pipeline is as follows: - - nextflow run nf-core/rnafusion --reads '*_R{1,2}.fastq.gz' -profile docker - - Mandatory arguments: - --reads Path to input data (must be surrounded with quotes) - -profile Configuration profile to use. Can use multiple (comma separated) - Available: conda, docker, singularity, awsbatch, test and more. - - Tool flags: - --arriba Run Arriba - --arriba_opt Extra parameter for Arriba - --star_fusion Run STAR-Fusion - --star_fusion_opt Extra parameter for STAR-Fusion - --fusioncatcher Run FusionCatcher - --fusioncatcher_opt Extra parameters for FusionCatcher - --ericscript Run Ericscript - --pizzly Run Pizzly - --squid Run Squid - --debug Flag to run only specific fusion tool/s and not the whole pipeline. Only works on tool flags. - --databases Database path for fusion-report - --fusion_report_opt fusion-report extra parameters - - Visualization flags: - --arriba_vis Generate a PDF visualization per detected fusion - --fusion_inspector Run Fusion-Inspector - - References If not specified in the configuration file or you wish to overwrite any of the references. - --fasta Path to Fasta reference - --gtf Path to GTF annotation - --star_index Path to STAR-Index reference - --star_fusion_ref Path to STAR-Fusion reference - --fusioncatcher_ref Path to Fusioncatcher reference - --ericscript_ref Path to Ericscript reference - --pizzly_fasta Path to Pizzly FASTA reference - --pizzly_gtf Path to Pizzly GTF annotation - - Options: - --genome Name of iGenomes reference - --read_length Length of the reads. Default: 100 - --singleEnd Specifies that the input is single end reads - - Other Options: - --outdir The output directory where the results will be saved - --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits - --maxMultiqcEmailFileSize Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) - -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. - - AWSBatch options: - --awsqueue The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion The AWS Region for your AWS Batch job to run on - """.stripIndent() -} - -/* - * SET UP CONFIGURATION VARIABLES - */ - -params.running_tools = [] -params.visualization_tools = [] - -// Show help emssage -if (params.help){ - helpMessage() - exit 0 -} - -// Check if genome exists in the config file -if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" -} - -// Configurable reference genomes -params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false -params.gtf = params.genome ? params.genomes[ params.genome ].gtf ?: false : false - -Channel - .fromPath(params.fasta) - .ifEmpty { exit 1, "Fasta file not found: ${params.fasta}" } - .into { fasta; fasta_arriba } - -Channel - .fromPath(params.gtf) - .ifEmpty { exit 1, "GTF annotation file not found: ${params.gtf}" } - .into { gtf; gtf_arriba; gtf_arriba_vis; gtf_squid } - -if (!params.star_index && (!params.fasta && !params.gtf)) { - exit 1, "Either specify STAR-INDEX or fasta and gtf file!" -} - -if (!params.databases) { - exit 1, "Database path for fusion-report has to be specified!" -} - -arriba_ref = false -if (params.arriba) { - params.running_tools.add("Arriba") - params.visualization_tools.add("Arriba") - arriba_ref = Channel - .fromPath(params.arriba_ref) - .ifEmpty { exit 1, "Arriba reference directory not found!" } -} - -arriba_vis_ref = false -if (params.arriba_vis) { - params.running_tools.add("ArribaVisualization") - arriba_vis_ref = Channel - .fromPath(params.arriba_ref) - .ifEmpty { exit 1, "Arriba reference directory not found!" } -} - -star_fusion_ref = false -if (params.star_fusion) { - params.running_tools.add("STAR-Fusion") - if (!params.star_fusion_ref) { - exit 1, "Star-Fusion reference not specified!" - } else { - star_fusion_ref = Channel - .fromPath(params.star_fusion_ref) - .ifEmpty { exit 1, "Star-Fusion reference directory not found!" } - } -} - -fusioncatcher_ref = false -if (params.fusioncatcher) { - params.running_tools.add("Fusioncatcher") - if (!params.fusioncatcher_ref) { - exit 1, "Fusioncatcher data directory not specified!" - } else { - fusioncatcher_ref = Channel - .fromPath(params.fusioncatcher_ref) - .ifEmpty { exit 1, "Fusioncatcher data directory not found!" } - } -} - -ericscript_ref = false -if (params.ericscript) { - params.running_tools.add("Ericscript") - if (!params.ericscript_ref) { - exit 1, "Reference not specified!" - } else { - ericscript_ref = Channel - .fromPath(params.ericscript_ref) - .ifEmpty { exit 1, "Ericscript reference not found" } - } -} - -pizzly_fasta = false -pizzly_gtf = false -if (params.pizzly) { - params.running_tools.add("Pizzly") - if (params.pizzly_fasta) { - pizzly_fasta = Channel - .fromPath(params.pizzly_fasta) - .ifEmpty { exit 1, "Pizzly FASTA file not found!" } - } - - if (params.pizzly_gtf) { - pizzly_gtf = Channel - .fromPath(params.pizzly_gtf) - .ifEmpty { exit 1, "Pizzly GTF file not found!" } - } -} - -if (params.squid) { - params.running_tools.add("Squid") - if (!gtf_squid) { - exit 1, "Missing GTF annotation file for squid!" - } -} - -fusion_inspector_ref = false -if (params.fusion_inspector) { - params.visualization_tools.add("FusionInspector") - if (!params.star_fusion_ref) { - exit 1, "Reference not specified (using star-fusion reference path)!" - } else { - fusion_inspector_ref = Channel - .fromPath(params.star_fusion_ref) - .ifEmpty { exit 1, "Fusion-Inspector reference not found" } - } -} - -// Has the run name been specified by the user? -// this has the bonus effect of catching both -name and --name -custom_runName = params.name -if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){ - custom_runName = workflow.runName -} - -if( workflow.profile == 'awsbatch') { - // AWSBatch sanity checking - if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - // related: https://github.com/nextflow-io/nextflow/issues/813 - if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - // Prevent trace files to be stored on S3 since S3 does not support rolling files. - if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." -} - -// Stage config files -ch_multiqc_config = Channel.fromPath(params.multiqc_config) -ch_output_docs = Channel.fromPath("$baseDir/docs/output.md") - -/* - * Create a channel for input read files - */ -if(params.readPaths){ - if(params.singleEnd){ - Channel - .from(params.readPaths) - .map { row -> [ row[0], [file(row[1][0])]] } - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .into { read_files_fastqc; read_files_summary; read_files_multiqc; read_files_star_fusion; read_files_fusioncatcher; - read_files_gfusion; read_files_fusion_inspector; read_files_ericscript; read_files_pizzly; read_files_squid; read_files_arriba } - } else { - Channel - .from(params.readPaths) - .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] } - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .into { read_files_fastqc; read_files_summary; read_files_multiqc; read_files_star_fusion; read_files_fusioncatcher; - read_files_gfusion; read_files_fusion_inspector; read_files_ericscript; read_files_pizzly; read_files_squid; read_files_arriba } - } -} else { - Channel - .fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 ) - .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nIf this is single-end data, please specify --singleEnd on the command line." } - .into { read_files_fastqc; read_files_summary; read_files_multiqc; read_files_star_fusion; read_files_fusioncatcher; - read_files_gfusion; read_files_fusion_inspector; read_files_ericscript; read_files_pizzly; read_files_squid; read_files_arriba } -} - -// Header log info -log.info nfcoreHeader() -def summary = [:] -if(workflow.revision) summary['Pipeline Release'] = workflow.revision -summary['Run Name'] = custom_runName ?: workflow.runName -summary['Reads'] = params.reads -summary['Fasta Ref'] = params.fasta -summary['GTF Ref'] = params.gtf -summary['STAR Index'] = params.star_index ? params.star_index : 'Not specified, building' -summary['Fusion tools'] = params.running_tools.size() == 0 ? 'None' : params.running_tools.join(", ") -summary['Visualization tools'] = params.visualization_tools.size() == 0 ? 'None': params.visualization_tools.join(", ") -summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -if(workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" -summary['Output dir'] = params.outdir -summary['Launch dir'] = workflow.launchDir -summary['Working dir'] = workflow.workDir -summary['Script dir'] = workflow.projectDir -summary['User'] = workflow.userName -if(workflow.profile == 'awsbatch'){ - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue -} -summary['Config Profile'] = workflow.profile -if(params.config_profile_description) summary['Config Description'] = params.config_profile_description -if(params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact -if(params.config_profile_url) summary['Config URL'] = params.config_profile_url -if(params.email) { - summary['E-mail Address'] = params.email - summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize -} -log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") -log.info "\033[2m----------------------------------------------------\033[0m" - -// Check the hostnames against configured profiles -checkHostname() - -def create_workflow_summary(summary) { - def yaml_file = workDir.resolve('workflow_summary_mqc.yaml') - yaml_file.text = """ - id: 'nf-core-rnafusion-summary' - description: " - this information is collected when the pipeline is started." - section_name: 'nf-core/rnafusion Workflow Summary' - section_href: 'https://github.com/nf-core/rnafusion' - plot_type: 'html' - data: | -
-${summary.collect { k,v -> "
$k
${v ?: 'N/A'}
" }.join("\n")} -
- """.stripIndent() - - return yaml_file -} - -/************************************************************* - * PREPROCESSING - ************************************************************/ - -/* - * Build STAR index - */ -if (params.star_index) { - Channel - .fromPath(params.star_index) - .ifEmpty { exit 1, "STAR index not found: ${params.star_index}" } - .into { star_index_squid; star_index_star_fusion; star_index_arriba } -} else { - process build_star_index { - tag "$fasta" - publishDir "${params.outdir}/star_index", mode: 'copy' - - input: - file fasta - file gtf - - output: - file "star" into star_index_squid, star_index_star_fusion, star_index_arriba - - script: - def avail_mem = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' - """ - mkdir star - STAR \\ - --runMode genomeGenerate \\ - --runThreadN ${task.cpus} \\ - --sjdbGTFfile ${gtf} \\ - --sjdbOverhang ${params.read_length - 1} \\ - --genomeDir star/ \\ - --genomeFastaFiles ${fasta} \\ - $avail_mem - """ - } -} - -/************************************************************* - * Fusion pipeline - ************************************************************/ - -/* - * Arriba - */ -process arriba { - tag "$name" - publishDir "${params.outdir}/tools/Arriba", mode: 'copy' - - when: - params.arriba && (!params.singleEnd || params.debug) - - input: - set val(name), file(reads) from read_files_arriba - file reference from arriba_ref - file star_index_arriba - file fasta_arriba - file gtf_arriba - - output: - file 'fusions.tsv' optional true into arriba_fusions1, arriba_fusions2 - file 'Aligned.out.bam' optional true into arriba_bam - file '*.{tsv,txt}' into arriba_output - - script: - def extra_params = params.arriba_opt ? "${params.arriba_opt}" : '' - """ - STAR \\ - --genomeDir ${star_index_arriba} \\ - --runThreadN ${task.cpus} \\ - --readFilesIn ${reads} \\ - --outStd BAM_Unsorted \\ - --outSAMtype BAM Unsorted \\ - --outSAMunmapped Within \\ - --outBAMcompression 0 \\ - --outFilterMultimapNmax 1 \\ - --outFilterMismatchNmax 3 \\ - --chimSegmentMin 10 \\ - --chimOutType WithinBAM SoftClip \\ - --chimJunctionOverhangMin 10 \\ - --chimScoreMin 1 \\ - --chimScoreDropMax 30 \\ - --chimScoreJunctionNonGTAG 0 \\ - --chimScoreSeparation 1 \\ - --alignSJstitchMismatchNmax 5 -1 5 5 \\ - --chimSegmentReadGapMax 3 \\ - --readFilesCommand zcat \\ - --sjdbOverhang ${params.read_length - 1} | - - tee Aligned.out.bam | - - arriba \\ - -x /dev/stdin \\ - -a ${fasta_arriba} \\ - -g ${gtf_arriba} \\ - -b ${reference}/blacklist_hg38_GRCh38_2018-11-04.tsv \\ - -o fusions.tsv -O fusions.discarded.tsv \\ - -T -P \\ - ${extra_params} - """ -} - -/* - * STAR-Fusion - */ -process star_fusion { - tag "$name" - publishDir "${params.outdir}/tools/StarFusion", mode: 'copy' - - when: - params.star_fusion || (params.star_fusion && params.debug) - - input: - set val(name), file(reads) from read_files_star_fusion - file star_index_star_fusion - file reference from star_fusion_ref - - output: - file '*fusion_predictions.tsv' optional true into star_fusion_fusions - file '*.{tsv,txt}' into star_fusion_output - - script: - def avail_mem = task.memory ? "--limitBAMsortRAM ${task.memory.toBytes() - 100000000}" : '' - option = params.singleEnd ? "--left_fq ${reads[0]}" : "--left_fq ${reads[0]} --right_fq ${reads[1]}" - def extra_params = params.star_fusion_opt ? "${params.star_fusion_opt}" : '' - """ - STAR \\ - --genomeDir ${star_index_star_fusion} \\ - --readFilesIn ${reads} \\ - --twopassMode Basic \\ - --outReadsUnmapped None \\ - --chimSegmentMin 12 \\ - --chimJunctionOverhangMin 12 \\ - --alignSJDBoverhangMin 10 \\ - --alignMatesGapMax 100000 \\ - --alignIntronMax 100000 \\ - --chimSegmentReadGapMax 3 \\ - --alignSJstitchMismatchNmax 5 -1 5 5 \\ - --runThreadN ${task.cpus} \\ - --outSAMstrandField intronMotif ${avail_mem} \\ - --outSAMunmapped Within \\ - --outSAMtype BAM Unsorted \\ - --outSAMattrRGline ID:GRPundef \\ - --chimMultimapScoreRange 10 \\ - --chimMultimapNmax 10 \\ - --chimNonchimScoreDropMin 10 \\ - --peOverlapNbasesMin 12 \\ - --peOverlapMMp 0.1 \\ - --readFilesCommand zcat \\ - --sjdbOverhang ${params.read_length - 1} \\ - --chimOutJunctionFormat 1 - - STAR-Fusion \\ - --genome_lib_dir ${reference} \\ - -J Chimeric.out.junction \\ - ${option} \\ - --CPU ${task.cpus} \\ - --examine_coding_effect \\ - --output_dir . ${extra_params} - """ -} - -/* - * Fusioncatcher - */ -process fusioncatcher { - tag "$name" - publishDir "${params.outdir}/tools/Fusioncatcher", mode: 'copy' - - when: - params.fusioncatcher || (params.fusioncatcher && params.debug) - - input: - set val(name), file(reads) from read_files_fusioncatcher - file data_dir from fusioncatcher_ref - - output: - file 'final-list_candidate-fusion-genes.txt' optional true into fusioncatcher_fusions - file '*.{txt,zip,log}' into fusioncatcher_output - - script: - option = params.singleEnd ? reads[0] : "${reads[0]},${reads[1]}" - def extra_params = params.fusioncatcher_opt ? "${params.fusioncatcher_opt}" : '' - """ - fusioncatcher \\ - -d ${data_dir} \\ - -i ${option} \\ - --threads ${task.cpus} \\ - -o . \\ - --skip-blat ${extra_params} - """ -} - -/* - * Ericscript - */ -process ericscript { - tag "$name" - publishDir "${params.outdir}/tools/Ericscript", mode: 'copy' - - when: - params.ericscript && (!params.singleEnd || params.debug) - - input: - set val(name), file(reads) from read_files_ericscript - file reference from ericscript_ref - - output: - file './tmp/fusions.results.filtered.tsv' optional true into ericscript_fusions - file './tmp/fusions.results.total.tsv' optional true into ericscript_output - - script: - """ - ericscript.pl \\ - -db ${reference} \\ - -name fusions \\ - -p ${task.cpus} \\ - -o ./tmp \\ - ${reads[0]} \\ - ${reads[1]} - """ -} - -/* - * Pizzly - */ -process pizzly { - tag "$name" - publishDir "${params.outdir}/tools/Pizzly", mode: 'copy' - - when: - params.pizzly && (!params.singleEnd || params.debug) - - input: - set val(name), file(reads) from read_files_pizzly - file fasta from pizzly_fasta - file gtf from pizzly_gtf - - output: - file 'pizzly_fusions.txt' optional true into pizzly_fusions - file '*.{json,txt}' into pizzly_output - - script: - """ - kallisto index -i index.idx -k ${params.pizzly_k} ${fasta} - kallisto quant -t ${task.cpus} -i index.idx --fusion -o output ${reads[0]} ${reads[1]} - pizzly -k ${params.pizzly_k} \\ - --gtf ${gtf} \\ - --cache index.cache.txt \\ - --align-score 2 \\ - --insert-size 400 \\ - --fasta ${fasta} \\ - --output pizzly_fusions output/fusion.txt - pizzly_flatten_json.py pizzly_fusions.json pizzly_fusions.txt - """ -} - -/* - * Squid - */ -process squid { - tag "$name" - publishDir "${params.outdir}/tools/Squid", mode: 'copy' - - when: - params.squid && (!params.singleEnd || params.debug) - - input: - set val(name), file(reads) from read_files_squid - file star_index_squid - file gtf from gtf_squid - - output: - file '*_annotated.txt' optional true into squid_fusions - file '*.txt' into squid_output - - script: - def avail_mem = task.memory ? "--limitBAMsortRAM ${task.memory.toBytes() - 100000000}" : '' - """ - STAR \\ - --genomeDir ${star_index_squid} \\ - --sjdbGTFfile ${gtf} \\ - --runThreadN ${task.cpus} \\ - --readFilesIn ${reads[0]} ${reads[1]} \\ - --twopassMode Basic \\ - --chimOutType SeparateSAMold --chimSegmentMin 20 --chimJunctionOverhangMin 12 --alignSJDBoverhangMin 10 --outReadsUnmapped Fastx --outSAMstrandField intronMotif \\ - --outSAMtype BAM SortedByCoordinate ${avail_mem} \\ - --readFilesCommand zcat - mv Aligned.sortedByCoord.out.bam ${name}Aligned.sortedByCoord.out.bam - samtools view -bS Chimeric.out.sam > ${name}Chimeric.out.bam - squid -b ${name}Aligned.sortedByCoord.out.bam -c ${name}Chimeric.out.bam -o fusions - AnnotateSQUIDOutput.py ${gtf} fusions_sv.txt fusions_annotated.txt - """ -} - -/************************************************************* - * Summarizing results from tools - ************************************************************/ -process summary { - tag "$name" - publishDir "${params.outdir}/Report-${name}", mode: 'copy' - - when: - !params.debug && (params.arriba || params.fusioncatcher || params.star_fusion || params.ericscript || params.pizzly || params.squid) - - input: - set val(name), file(reads) from read_files_summary - file arriba from arriba_fusions1.ifEmpty('') - file fusioncatcher from fusioncatcher_fusions.ifEmpty('') - file starfusion from star_fusion_fusions.ifEmpty('') - file ericscript from ericscript_fusions.ifEmpty('') - file pizzly from pizzly_fusions.ifEmpty('') - file squid from squid_fusions.ifEmpty('') - - output: - file 'fusions_list.txt' into fusion_inspector_input_list - file 'fusion_genes_mqc.json' into summary_fusions_mq - file '*' into report - - script: - def extra_params = params.fusion_report_opt ? "${params.fusion_report_opt}" : '' - def tools = !arriba.empty() ? "--arriba ${arriba} " : '' - tools += !fusioncatcher.empty() ? "--fusioncatcher ${fusioncatcher} " : '' - tools += !starfusion.empty() ? "--starfusion ${starfusion} " : '' - tools += !ericscript.empty() ? "--ericscript ${ericscript} " : '' - tools += !pizzly.empty() ? "--pizzly ${pizzly} " : '' - tools += !squid.empty() ? "--squid ${squid} " : '' - """ - fusion_report run ${name} . ${params.databases} \\ - ${tools} ${extra_params} - """ -} - -/************************************************************* - * Visualization - ************************************************************/ - -/* - * Arriba Visualization - */ -process arriba_visualization { - tag "$name" - publishDir "${params.outdir}/tools/Arriba", mode: 'copy' - - when: - params.arriba_vis && (!params.singleEnd || params.debug) - - input: - file reference from arriba_vis_ref - file fusions from arriba_fusions2 - file bam from arriba_bam - file gtf from gtf_arriba_vis - - output: - file 'visualization.pdf' optional true into arriba_visualization_output - - script: - def suff_mem = ("${(task.memory.toBytes() - 6000000000) / task.cpus}" > 2000000000) ? 'true' : 'false' - def avail_mem = (task.memory && suff_mem) ? "-m" + "${(task.memory.toBytes() - 6000000000) / task.cpus}" : '' - """ - samtools sort -@ ${task.cpus} ${avail_mem} -O bam ${bam} > Aligned.sortedByCoord.out.bam - samtools index Aligned.sortedByCoord.out.bam - draw_fusions.R \\ - --fusions=${fusions} \\ - --alignments=Aligned.sortedByCoord.out.bam \\ - --output=visualization.pdf \\ - --annotation=${gtf} \\ - --cytobands=${reference}/cytobands_hg38_GRCh38_2018-02-23.tsv \\ - --proteinDomains=${reference}/protein_domains_hg38_GRCh38_2018-03-06.gff3 - """ -} - /* - * Fusion Inspector - */ -process fusion_inspector { - tag "$name" - publishDir "${params.outdir}/tools/FusionInspector", mode: 'copy' - - when: - params.fusion_inspector && (!params.singleEnd || params.debug) - - input: - set val(name), file(reads) from read_files_fusion_inspector - file reference from fusion_inspector_ref - file fusion_inspector_input_list - - output: - file '*.{fa,gtf,bed,bam,bai,txt}' into fusion_inspector_output - - script: - """ - FusionInspector \\ - --fusions ${fusion_inspector_input_list} \\ - --genome_lib ${reference} \\ - --left_fq ${reads[0]} \\ - --right_fq ${reads[1]} \\ - --CPU ${task.cpus} \\ - --out_dir . \\ - --out_prefix finspector \\ - --prep_for_IGV - """ -} - -/************************************************************* - * Quality check & software verions - ************************************************************/ - -/* - * Parse software version numbers - */ -process get_software_versions { - publishDir "${params.outdir}/pipeline_info", mode: 'copy', - saveAs: {filename -> - if (filename.indexOf(".csv") > 0) filename - else null - } - - when: - !params.debug +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - output: - file 'software_versions_mqc.yaml' into software_versions_yaml - file "software_versions.csv" +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_rnafusion_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_rnafusion_pipeline' +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_rnafusion_pipeline' +include { RNAFUSION } from './workflows/rnafusion' - script: - """ - echo $workflow.manifest.version > v_pipeline.txt - echo $workflow.nextflow.version > v_nextflow.txt - fastqc --version > v_fastqc.txt - multiqc --version > v_multiqc.txt - cat $baseDir/tools/arriba/environment.yml > v_arriba.txt - cat $baseDir/tools/fusioncatcher/environment.yml > v_fusioncatcher.txt - cat $baseDir/tools/fusion-inspector/environment.yml > v_fusion_inspector.txt - cat $baseDir/tools/star-fusion/environment.yml > v_star_fusion.txt - cat $baseDir/tools/ericscript/environment.yml > v_ericscript.txt - cat $baseDir/tools/pizzly/environment.yml > v_pizzly.txt - cat $baseDir/tools/squid/environment.yml > v_squid.txt - cat $baseDir/environment.yml > v_fusion_report.txt - scrape_software_versions.py &> software_versions_mqc.yaml - """ -} /* - * FastQC - */ -process fastqc { - tag "$name" - publishDir "${params.outdir}/fastqc", mode: 'copy', - saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GENOME PARAMETER VALUES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - when: - !params.debug - input: - set val(name), file(reads) from read_files_fastqc - output: - file "*_fastqc.{zip,html}" into fastqc_results - script: - """ - fastqc -q $reads - """ -} /* - * MultiQC - */ -process multiqc { - tag "$name" - publishDir "${params.outdir}/MultiQC", mode: 'copy' - - when: - !params.debug - - input: - file multiqc_config from ch_multiqc_config - file ('fastqc/*') from fastqc_results.collect().ifEmpty([]) - file ('software_versions/*') from software_versions_yaml.collect() - file workflow_summary from create_workflow_summary(summary) - file fusions_mq from summary_fusions_mq.ifEmpty('') - - output: - file "*multiqc_report.html" into multiqc_report - file "*_data" - file "multiqc_plots" - - script: - rtitle = custom_runName ? "--title \"$custom_runName\"" : '' - rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' - """ - multiqc -f $rtitle $rfilename --config $multiqc_config . - """ -} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOWS FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ -/* - * Output Description HTML - */ -process output_documentation { - publishDir "${params.outdir}/pipeline_info", mode: 'copy' +// +// WORKFLOW: Run main analysis pipeline depending on type of input +// +workflow NFCORE_RNAFUSION { + take: + samplesheet - when: - !params.debug + main: - input: - file output_docs from ch_output_docs + // + // WORKFLOW: Run pipeline + // - output: - file "results_description.html" + RNAFUSION(samplesheet) - script: - """ - markdown_to_html.r $output_docs results_description.html - """ + emit: + multiqc_report = RNAFUSION.out.multiqc_report } - /* - * Completion e-mail notification - */ -workflow.onComplete { - - // Set up the e-mail variables - def subject = "[nf-core/rnafusion] Successful: $workflow.runName" - if(!workflow.success){ - subject = "[nf-core/rnafusion] FAILED: $workflow.runName" - } - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = custom_runName ?: workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary - email_fields['summary']['Date Started'] = workflow.start - email_fields['summary']['Date Completed'] = workflow.complete - email_fields['summary']['Pipeline script file path'] = workflow.scriptFile - email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId - if(workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository - if(workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId - if(workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision - if(workflow.container) email_fields['summary']['Docker image'] = workflow.container - email_fields['summary']['Nextflow Version'] = workflow.nextflow.version - email_fields['summary']['Nextflow Build'] = workflow.nextflow.build - email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList){ - log.warn "[nf-core/rnafusion] Found multiple reports from process 'multiqc', will use only one" - mqc_report = mqc_report[0] - } - } - } catch (all) { - log.warn "[nf-core/rnafusion] Could not attach MultiQC report to summary email" - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$baseDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$baseDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.maxMultiqcEmailFileSize.toBytes() ] - def sf = new File("$baseDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - if (params.email) { - try { - if( params.plaintext_email ){ throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/rnafusion] Sent summary e-mail to $params.email (sendmail)" - } catch (all) { - // Catch failures and try with plaintext - [ 'mail', '-s', subject, params.email ].execute() << email_txt - log.info "[nf-core/rnafusion] Sent summary e-mail to $params.email (mail)" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File( "${params.outdir}/pipeline_info/" ) - if( !output_d.exists() ) { - output_d.mkdirs() - } - def output_hf = new File( output_d, "pipeline_report.html" ) - output_hf.withWriter { w -> w << email_html } - def output_tf = new File( output_d, "pipeline_report.txt" ) - output_tf.withWriter { w -> w << email_txt } - - c_reset = params.monochrome_logs ? '' : "\033[0m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_red = params.monochrome_logs ? '' : "\033[0;31m"; - - if (workflow.stats.ignoredCountFmt > 0 && workflow.success) { - log.info "${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}" - log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCountFmt} ${c_reset}" - log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCountFmt} ${c_reset}" - } - - if(workflow.success){ - log.info "${c_purple}[nf-core/rnafusion]${c_green} Pipeline completed successfully${c_reset}" - } else { - checkHostname() - log.info "${c_purple}[nf-core/rnafusion]${c_red} Pipeline completed with errors${c_reset}" - } - -} - -def nfcoreHeader(){ - // Log colors ANSI codes - c_reset = params.monochrome_logs ? '' : "\033[0m"; - c_dim = params.monochrome_logs ? '' : "\033[2m"; - c_black = params.monochrome_logs ? '' : "\033[0;30m"; - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; - c_blue = params.monochrome_logs ? '' : "\033[0;34m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; - c_white = params.monochrome_logs ? '' : "\033[0;37m"; - - return """ ${c_dim}----------------------------------------------------${c_reset} - ${c_green},--.${c_black}/${c_green},-.${c_reset} - ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} - ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} - ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} - ${c_green}`._,._,\'${c_reset} - ${c_purple} nf-core/rnafusion v${workflow.manifest.version}${c_reset} - ${c_dim}----------------------------------------------------${c_reset} - """.stripIndent() -} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ -def checkHostname(){ - def c_reset = params.monochrome_logs ? '' : "\033[0m" - def c_white = params.monochrome_logs ? '' : "\033[0;37m" - def c_red = params.monochrome_logs ? '' : "\033[1;91m" - def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" - if(params.hostnames){ - def hostname = "hostname".execute().text.trim() - params.hostnames.each { prof, hnames -> - hnames.each { hname -> - if(hostname.contains(hname) && !workflow.profile.contains(prof)){ - log.error "====================================================\n" + - " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + - " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + - " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + - "============================================================" - } - } - } - } -} +workflow { + + main: + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.validate_params, + args, + params.outdir, + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_RNAFUSION (PIPELINE_INITIALISATION.out.samplesheet) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_RNAFUSION.out.multiqc_report, + ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/modules.json b/modules.json new file mode 100644 index 00000000..b41d5fa5 --- /dev/null +++ b/modules.json @@ -0,0 +1,172 @@ +{ + "name": "nf-core/rnafusion", + "homePage": "https://github.com/nf-core/rnafusion", + "repos": { + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "agat/convertspgff2tsv": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "arriba/arriba": { + "branch": "master", + "git_sha": "7741dfc830e77a8ead2fcb50b01461ee09d0cdfe", + "installed_by": ["modules"] + }, + "arriba/download": { + "branch": "master", + "git_sha": "467c202a876d26af544fa8c4b22a050a535462a7", + "installed_by": ["modules"] + }, + "bedops/convert2bed": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "cat/cat": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "cat/fastq": { + "branch": "master", + "git_sha": "a1abf90966a2a4016d3c3e41e228bfcbd4811ccc", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "fastqc": { + "branch": "master", + "git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164", + "installed_by": ["modules"] + }, + "gatk4/bedtointervallist": { + "branch": "master", + "git_sha": "1999eff2c530b2b185a25cc42117a1686f09b685", + "installed_by": ["modules"] + }, + "gatk4/createsequencedictionary": { + "branch": "master", + "git_sha": "1999eff2c530b2b185a25cc42117a1686f09b685", + "installed_by": ["modules"] + }, + "gatk4/markduplicates": { + "branch": "master", + "git_sha": "1999eff2c530b2b185a25cc42117a1686f09b685", + "installed_by": ["modules"] + }, + "gffread": { + "branch": "master", + "git_sha": "bd5f75ccaf2345269810e66e85de8a70e4de8764", + "installed_by": ["modules"] + }, + "multiqc": { + "branch": "master", + "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", + "installed_by": ["modules"] + }, + "picard/collectinsertsizemetrics": { + "branch": "master", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] + }, + "picard/collectrnaseqmetrics": { + "branch": "master", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] + }, + "picard/collectwgsmetrics": { + "branch": "master", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] + }, + "rrnatranscripts": { + "branch": "master", + "git_sha": "812edf8cf702de42d2d8c7314d6f03b97e20abeb", + "installed_by": ["modules"], + "patch": "modules/nf-core/rrnatranscripts/rrnatranscripts.diff" + }, + "salmon/index": { + "branch": "master", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] + }, + "salmon/quant": { + "branch": "master", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", + "installed_by": ["modules"] + }, + "samtools/view": { + "branch": "master", + "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "installed_by": ["modules"] + }, + "star/align": { + "branch": "master", + "git_sha": "a5ad53288c79fa52c5ae708c317e09ec2dd149ab", + "installed_by": ["modules"] + }, + "star/genomegenerate": { + "branch": "master", + "git_sha": "a5ad53288c79fa52c5ae708c317e09ec2dd149ab", + "installed_by": ["modules"] + }, + "stringtie/merge": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "stringtie/stringtie": { + "branch": "master", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] + }, + "ucsc/gtftogenepred": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + } + } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", + "installed_by": ["subworkflows"] + }, + "utils_nfschema_plugin": { + "branch": "master", + "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", + "installed_by": ["subworkflows"] + } + } + } + } + } +} diff --git a/modules/local/arriba/visualisation/main.nf b/modules/local/arriba/visualisation/main.nf new file mode 100644 index 00000000..f1aa097b --- /dev/null +++ b/modules/local/arriba/visualisation/main.nf @@ -0,0 +1,53 @@ +process ARRIBA_VISUALISATION { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::arriba=2.4.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/arriba:2.4.0--h0033a41_2' : + 'biocontainers/arriba:2.4.0--h0033a41_2' }" + + input: + tuple val(meta), path(bam), path(bai), path(fusions) + tuple val(meta2), path(gtf) + tuple val(meta3), path(protein_domains) + tuple val(meta4), path(cytobands) + + output: + tuple val(meta), path("*.pdf") , emit: pdf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def arg_cytobands = cytobands ? " --cytobands=$cytobands" : "" + def arg_protein_domains = protein_domains ? "--proteinDomains=$protein_domains" : "" + def prefix = task.ext.prefix ?: "${meta.id}" + """ + draw_fusions.R \\ + --fusions=$fusions \\ + --alignments=$bam \\ + --output=${prefix}.pdf \\ + --annotation=${gtf} \\ + $arg_cytobands \\ + $arg_protein_domains \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.pdf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ +} diff --git a/modules/local/arriba/visualisation/meta.yml b/modules/local/arriba/visualisation/meta.yml new file mode 100644 index 00000000..a7418ca2 --- /dev/null +++ b/modules/local/arriba/visualisation/meta.yml @@ -0,0 +1,54 @@ +name: arriba_visualisation +description: Arriba is a command-line tool for the detection of gene fusions from RNA-Seq data. +keywords: + - visualisation + - arriba +tools: + - arriba: + description: Fast and accurate gene fusion detection from RNA-Seq data + homepage: https://github.com/suhrig/arriba + documentation: https://arriba.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/suhrig/arriba + doi: "10.1101/gr.257246.119" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAMindex file + pattern: "*.{bai}" + - fusions: + type: file + description: Arriba fusions file + pattern: "*.{tsv}" + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - pdf: + type: file + description: File contains fusions visualisation + pattern: "*.{pdf}" + +authors: + - "@rannick" diff --git a/modules/local/ctatsplicing/startocancerintrons/main.nf b/modules/local/ctatsplicing/startocancerintrons/main.nf new file mode 100644 index 00000000..a8d683ec --- /dev/null +++ b/modules/local/ctatsplicing/startocancerintrons/main.nf @@ -0,0 +1,72 @@ +process CTATSPLICING_STARTOCANCERINTRONS { + tag "$meta.id" + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://data.broadinstitute.org/Trinity/CTAT_SINGULARITY/CTAT-SPLICING/ctat_splicing.v0.0.2.simg' : + 'docker.io/trinityctat/ctat_splicing:0.0.2' }" + + input: + tuple val(meta), path(split_junction), path(junction), path(bam), path(bai) + tuple val(meta2), path(genome_lib) + + output: + tuple val(meta), path("*.cancer_intron_reads.sorted.bam") , emit: cancer_introns_sorted_bam + tuple val(meta), path("*.cancer_intron_reads.sorted.bam.bai") , emit: cancer_introns_sorted_bai + tuple val(meta), path("*.gene_reads.sorted.sifted.bam") , emit: gene_reads_sorted_bam + tuple val(meta), path("*.gene_reads.sorted.sifted.bam.bai") , emit: gene_reads_sorted_bai + tuple val(meta), path("*.cancer.introns") , emit: cancer_introns + tuple val(meta), path("*.cancer.introns.prelim") , emit: cancer_introns_prelim + tuple val(meta), path("*${prefix}.introns") , emit: introns + tuple val(meta), path("*.introns.for_IGV.bed") , emit: introns_igv_bed, optional: true + tuple val(meta), path("*.ctat-splicing.igv.html") , emit: igv_html, optional: true + tuple val(meta), path("*.igv.tracks") , emit: igv_tracks, optional: true + tuple val(meta), path("*.chckpts") , emit: chckpts + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def bam_arg = bam ? "--bam_file ${bam}" : "" + def VERSION = '0.0.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def create_index = bam && !bai ? "samtools index ${bam}" : "" + """ + ${create_index} + + /usr/local/src/CTAT-SPLICING/STAR_to_cancer_introns.py \\ + --SJ_tab_file ${split_junction} \\ + --chimJ_file ${junction} \\ + ${bam_arg} \\ + --output_prefix ${prefix} \\ + --ctat_genome_lib ${genome_lib} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ctat-splicing: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.0.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def create_igv_files = args.contains("--vis") ? "touch ${prefix}.introns.for_IGV.bed && touch ${prefix}.ctat-splicing.igv.html && touch ${prefix}.igv.tracks" : "" + """ + ${create_igv_files} + touch ${prefix}.cancer_intron_reads.sorted.bam + touch ${prefix}.cancer_intron_reads.sorted.bam.bai + touch ${prefix}.gene_reads.sorted.sifted.bam + touch ${prefix}.gene_reads.sorted.sifted.bam.bai + touch ${prefix}.cancer.introns + touch ${prefix}.cancer.introns.prelim + touch ${prefix}.introns + touch ${prefix}.chckpts + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ctat-splicing: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test b/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test new file mode 100644 index 00000000..dad961c4 --- /dev/null +++ b/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process CTATSPLICING_STARTOCANCERINTRONS" + script "../main.nf" + process "CTATSPLICING_STARTOCANCERINTRONS" + options "-stub" + + test("test without BAM") { + + when { + params { + outdir = "tests/results" + } + process { + """ + input[0] = [ + [id:"test"], + file("test.SJ.out.tab"), + file("test.Chimeric.out.junctions"), + [], + [] + ] + input[1] = [ + [id:"reference"], + file("ctat_genome_lib") + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, value -> !key.isNumber() }).match() } + ) + } + } + + test("test with BAM") { + + when { + params { + outdir = "tests/results" + } + process { + """ + input[0] = [ + [id:"test"], + file("test.SJ.out.tab"), + file("test.Chimeric.out.junctions"), + file("test.Aligned.sortedByCoord.out.bam"), + [] + ] + input[1] = [ + [id:"reference"], + file("ctat_genome_lib") + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, value -> !key.isNumber() }).match() } + ) + } + } +} diff --git a/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test.snap b/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test.snap new file mode 100644 index 00000000..b0ee3416 --- /dev/null +++ b/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test.snap @@ -0,0 +1,191 @@ +{ + "test without BAM": { + "content": [ + { + "cancer_introns": [ + [ + { + "id": "test" + }, + "test.cancer.introns:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_prelim": [ + [ + { + "id": "test" + }, + "test.cancer.introns.prelim:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_sorted_bai": [ + [ + { + "id": "test" + }, + "test.cancer_intron_reads.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_sorted_bam": [ + [ + { + "id": "test" + }, + "test.cancer_intron_reads.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "chckpts": [ + [ + { + "id": "test" + }, + "test.chckpts:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gene_reads_sorted_bai": [ + [ + { + "id": "test" + }, + "test.gene_reads.sorted.sifted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gene_reads_sorted_bam": [ + [ + { + "id": "test" + }, + "test.gene_reads.sorted.sifted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "igv_html": [ + + ], + "igv_tracks": [ + + ], + "introns": [ + [ + { + "id": "test" + }, + "test.introns:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "introns_igv_bed": [ + + ], + "versions": [ + "versions.yml:md5,fcf861a15f9951342a874b6bc476a37e" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-17T13:35:13.723215847" + }, + "test with BAM": { + "content": [ + { + "cancer_introns": [ + [ + { + "id": "test" + }, + "test.cancer.introns:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_prelim": [ + [ + { + "id": "test" + }, + "test.cancer.introns.prelim:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_sorted_bai": [ + [ + { + "id": "test" + }, + "test.cancer_intron_reads.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_sorted_bam": [ + [ + { + "id": "test" + }, + "test.cancer_intron_reads.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "chckpts": [ + [ + { + "id": "test" + }, + "test.chckpts:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gene_reads_sorted_bai": [ + [ + { + "id": "test" + }, + "test.gene_reads.sorted.sifted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gene_reads_sorted_bam": [ + [ + { + "id": "test" + }, + "test.gene_reads.sorted.sifted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "igv_html": [ + [ + { + "id": "test" + }, + "test.ctat-splicing.igv.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "igv_tracks": [ + [ + { + "id": "test" + }, + "test.igv.tracks:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "introns": [ + [ + { + "id": "test" + }, + "test.introns:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "introns_igv_bed": [ + [ + { + "id": "test" + }, + "test.introns.for_IGV.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,fcf861a15f9951342a874b6bc476a37e" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-17T13:33:27.36677449" + } +} \ No newline at end of file diff --git a/modules/local/fusioncatcher/build/main.nf b/modules/local/fusioncatcher/build/main.nf new file mode 100644 index 00000000..4aaaf504 --- /dev/null +++ b/modules/local/fusioncatcher/build/main.nf @@ -0,0 +1,41 @@ +process FUSIONCATCHER_BUILD { + tag "fusioncatcher_build" + label 'process_medium' + + container "docker.io/rannickscilifelab/fusioncatcher:1.34" + + input: + val genome_gencode_version + + output: + path "human_v${genome_gencode_version}" , emit: reference + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + + def args = task.ext.args ?: '' + """ + fusioncatcher-build.py \\ + -g homo_sapiens \\ + -o human_v${genome_gencode_version} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1)) + END_VERSIONS + """ + + stub: + """ + mkdir human_v${genome_gencode_version} + touch human_v${genome_gencode_version}/ensembl_fully_overlapping_genes.txt + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/local/fusioncatcher/build/meta.yml b/modules/local/fusioncatcher/build/meta.yml new file mode 100644 index 00000000..202be7e1 --- /dev/null +++ b/modules/local/fusioncatcher/build/meta.yml @@ -0,0 +1,24 @@ +name: fusioncatcher_build +description: Build genome for fusioncatcher +keywords: + - sort +tools: + - fusioncatcher: + description: Build genome for fusioncatcher + homepage: https://github.com/ndaniel/fusioncatcher/ + documentation: https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md + tool_dev_url: https://github.com/ndaniel/fusioncatcher/ + doi: "10.1101/011650" + licence: ["GPL v3"] + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reference: + type: directory + description: Path to fusioncatcher references + +authors: + - "@praveenraj2018, @rannick" diff --git a/modules/local/fusioncatcher/detect/environment.yml b/modules/local/fusioncatcher/detect/environment.yml new file mode 100644 index 00000000..31299585 --- /dev/null +++ b/modules/local/fusioncatcher/detect/environment.yml @@ -0,0 +1,4 @@ +channels: + - bioconda +dependencies: + - bioconda::fusioncatcher=1.33 diff --git a/modules/local/fusioncatcher/detect/main.nf b/modules/local/fusioncatcher/detect/main.nf new file mode 100644 index 00000000..d2c6b929 --- /dev/null +++ b/modules/local/fusioncatcher/detect/main.nf @@ -0,0 +1,57 @@ +process FUSIONCATCHER { + tag "$meta.id" + label 'process_high' + + container "docker.io/rannickscilifelab/fusioncatcher:1.34" + + input: + tuple val(meta), path(fasta) + path reference + + output: + tuple val(meta), path("*.fusioncatcher.fusion-genes.txt") , optional:true , emit: fusions + tuple val(meta), path("*.fusioncatcher.summary.txt") , optional:true , emit: summary + tuple val(meta), path("*.fusioncatcher.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reads = fasta.toString().replace(" ", ",") + def single_end = meta.single_end ? "--single-end" : "" + """ + fusioncatcher.py \\ + -d $reference \\ + -i $reads \\ + -p $task.cpus \\ + -o . \\ + --skip-blat \\ + $single_end \\ + $args + + mv final-list_candidate-fusion-genes.txt ${prefix}.fusioncatcher.fusion-genes.txt + mv summary_candidate_fusions.txt ${prefix}.fusioncatcher.summary.txt + mv fusioncatcher.log ${prefix}.fusioncatcher.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: \$(echo \$(fusioncatcher.py --version 2>&1)| sed 's/fusioncatcher.py //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.fusioncatcher.fusion-genes.txt + touch ${prefix}.fusioncatcher.summary.txt + touch ${prefix}.fusioncatcher.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: \$(echo \$(fusioncatcher.py --version 2>&1)| sed 's/fusioncatcher.py //') + END_VERSIONS + """ +} diff --git a/modules/local/fusioncatcher/detect/meta.yml b/modules/local/fusioncatcher/detect/meta.yml new file mode 100644 index 00000000..7c8ee425 --- /dev/null +++ b/modules/local/fusioncatcher/detect/meta.yml @@ -0,0 +1,53 @@ +name: fusioncatcher +description: FusionCatcher searches for novel/known somatic fusion genes, translocations, and chimeras in RNA-seq data +keywords: + - fusioncatcher +tools: + - fusioncatcher: + description: FusionCatcher searches for novel/known somatic fusion genes, translocations, and chimeras in RNA-seq data + homepage: https://github.com/ndaniel/fusioncatcher + documentation: https://github.com/ndaniel/fusioncatcher/wiki + tool_dev_url: https://github.com/ndaniel/fusioncatcher + doi: "10.1101/011650v1" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ file + pattern: "*.{fastq}" + - reference: + type: directory + description: Path to fusioncatcher references + pattern: "*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fusions: + type: file + description: Final list of candidate fusion genes + pattern: "*.fusioncatcher.fusion-genes.txt" + - summary: + type: file + description: Summary of fusion results + pattern: "*.fusioncatcher_summary.txt" + - log: + type: file + description: Log of fusion results + pattern: "*.fusioncatcher.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@praveenraj2018. @rannick" diff --git a/modules/local/fusioncatcher/download/environment.yml b/modules/local/fusioncatcher/download/environment.yml new file mode 100644 index 00000000..31299585 --- /dev/null +++ b/modules/local/fusioncatcher/download/environment.yml @@ -0,0 +1,4 @@ +channels: + - bioconda +dependencies: + - bioconda::fusioncatcher=1.33 diff --git a/modules/local/fusioncatcher/download/main.nf b/modules/local/fusioncatcher/download/main.nf new file mode 100644 index 00000000..c7ea28ec --- /dev/null +++ b/modules/local/fusioncatcher/download/main.nf @@ -0,0 +1,48 @@ +process FUSIONCATCHER_DOWNLOAD { + tag "fusioncatcher_download" + label 'process_medium' + + container "docker.io/rannickscilifelab/fusioncatcher:1.34" + + + input: + val genome_gencode_version + + + output: + path "human_v${genome_gencode_version}" , emit: reference + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + + def args = task.ext.args ?: '' + // TODO: move to S3 + + // def url = + """ + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.aa + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.ab + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.ac + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.ad + cat human_${genome_gencode_version}.tar.gz.* | tar xz + rm human_${genome_gencode_version}.tar* + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1)) + END_VERSIONS + """ + + stub: + """ + mkdir human_v${genome_gencode_version} + touch human_v${genome_gencode_version}/ensembl_fully_overlapping_genes.txt + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/local/fusioncatcher/download/meta.yml b/modules/local/fusioncatcher/download/meta.yml new file mode 100644 index 00000000..40421a4e --- /dev/null +++ b/modules/local/fusioncatcher/download/meta.yml @@ -0,0 +1,25 @@ +name: fusioncatcher_download +description: Build genome for fusioncatcher +keywords: + - sort +tools: + - fusioncatcher: + description: Build genome for fusioncatcher + homepage: https://github.com/ndaniel/fusioncatcher/ + documentation: https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md + tool_dev_url: https://github.com/ndaniel/fusioncatcher/ + doi: "10.1101/011650" + licence: ["GPL v3"] + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reference: + type: directory + description: Path to fusioncatcher references + pattern: "*" + +authors: + - "@praveenraj2018, @rannick" diff --git a/modules/local/fusioninspector/environment.yml b/modules/local/fusioninspector/environment.yml new file mode 100644 index 00000000..ef7f9316 --- /dev/null +++ b/modules/local/fusioninspector/environment.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::dfam=3.7 + - bioconda::hmmer=3.4 + - bioconda::minimap2=2.28 + - bioconda::star-fusion=1.14.0 diff --git a/modules/local/fusioninspector/main.nf b/modules/local/fusioninspector/main.nf new file mode 100644 index 00000000..ade7c174 --- /dev/null +++ b/modules/local/fusioninspector/main.nf @@ -0,0 +1,58 @@ +process FUSIONINSPECTOR { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/be/bed86145102fdf7e381e1a506a4723676f98b4bbe1db5085d02213cef18525c9/data' : + 'community.wave.seqera.io/library/dfam_hmmer_minimap2_star-fusion:aa3a8e3951498552'}" + + input: + tuple val(meta), path(reads), path(fusion_list) + path reference + + output: + tuple val(meta), path("*FusionInspector.fusions.tsv") , emit: tsv + tuple val(meta), path("*.coding_effect") , optional:true, emit: tsv_coding_effect + tuple val(meta), path("*.gtf") , optional:true, emit: out_gtf + path "*" , emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def fasta = meta.single_end ? "--left_fq ${reads[0]}" : "--left_fq ${reads[0]} --right_fq ${reads[1]}" + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + """ + FusionInspector \\ + --fusions $fusion_list \\ + --genome_lib ${reference} \\ + $fasta \\ + --CPU ${task.cpus} \\ + -O . \\ + --out_prefix $prefix \\ + --vis $args $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.FusionInspector.log + touch ${prefix}.FusionInspector.fusions.tsv + touch ${prefix}.FusionInspector.fusions.tsv.annotated.coding_effect + touch ${prefix}.gtf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ +} diff --git a/modules/local/fusioninspector/meta.yml b/modules/local/fusioninspector/meta.yml new file mode 100644 index 00000000..cc03239b --- /dev/null +++ b/modules/local/fusioninspector/meta.yml @@ -0,0 +1,40 @@ +name: fusioninspector +description: Validation of Fusion Transcript Predictions +keywords: + - fusioninspector +tools: + - fusioninspector: + description: Validation of Fusion Transcript Predictions + homepage: https://github.com/FusionInspector/FusionInspector + documentation: https://github.com/FusionInspector/FusionInspector/wiki + tool_dev_url: https://github.com/FusionInspector/FusionInspector + doi: 10.1101/2021.08.02.454639" + licence: https://github.com/FusionInspector/FusionInspector/blob/master/LICENSE.txt + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ file + pattern: "*.{fastq*}" + - reference: + type: directory + description: Path to ctat references + pattern: "*" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reference: + type: directory + description: Genome resource path + pattern: "*" + +authors: + - "@rannick" diff --git a/modules/local/fusionreport/detect/main.nf b/modules/local/fusionreport/detect/main.nf new file mode 100644 index 00000000..56a29ab4 --- /dev/null +++ b/modules/local/fusionreport/detect/main.nf @@ -0,0 +1,63 @@ +process FUSIONREPORT { + tag "$meta.id" + label 'process_medium' + + container "docker.io/clinicalgenomics/fusion-report:3.1.0" + + + input: + tuple val(meta), path(reads), path(arriba_fusions), path(starfusion_fusions), path(fusioncatcher_fusions) + tuple val(meta2), path(fusionreport_ref) + val(tools_cutoff) + + output: + tuple val(meta), path("*fusionreport.tsv") , emit: fusion_list + tuple val(meta), path("*fusionreport_filtered.tsv") , emit: fusion_list_filtered + tuple val(meta), path("*index.html") , emit: report + tuple val(meta), path("*_*.html") , optional:true, emit: html + tuple val(meta), path("*.csv") , optional:true, emit: csv + tuple val(meta), path("*.json") , optional:true, emit: json + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def tools = params.arriba || params.all ? "--arriba ${arriba_fusions} " : '' + tools += params.starfusion || params.all ? "--starfusion ${starfusion_fusions} " : '' + tools += params.fusioncatcher || params.all ? "--fusioncatcher ${fusioncatcher_fusions} " : '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + fusion_report run $meta.id . $fusionreport_ref $tools --allow-multiple-gene-symbols --tool-cutoff $tools_cutoff $args $args2 + + mv fusion_list.tsv ${prefix}.fusionreport.tsv + mv fusion_list_filtered.tsv ${prefix}.fusionreport_filtered.tsv + mv index.html ${prefix}_fusionreport_index.html + [ ! -f fusions.csv ] || mv fusions.csv ${prefix}.fusions.csv + [ ! -f fusions.json ] || mv fusions.json ${prefix}.fusions.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusion_report: \$(fusion_report --version | sed 's/fusion-report //') + fusion_report DB retrieval: \$(cat $fusionreport_ref/DB-timestamp.txt) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fusionreport_filtered.tsv + touch ${prefix}.fusionreport.tsv + touch ${prefix}_fusionreport_index.html + touch AAA_BBB.html + touch ${prefix}.fusions.csv + touch ${prefix}.fusions.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusion_report: \$(fusion_report --version | sed 's/fusion-report //') + END_VERSIONS + """ +} diff --git a/modules/local/fusionreport/detect/meta.yml b/modules/local/fusionreport/detect/meta.yml new file mode 100644 index 00000000..ae3601dc --- /dev/null +++ b/modules/local/fusionreport/detect/meta.yml @@ -0,0 +1,51 @@ +name: fusionreport +description: fusionreport +keywords: + - sort +tools: + - fusionreport: + description: Tool for parsing outputs from fusion detection tools + homepage: https://github.com/Clinical-Genomics/fusion-report + documentation: https://matq007.github.io/fusion-report/#/ + doi: "10.1101/011650" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reference: + type: path + description: Path to fusionreport references + pattern: "*" + - arriba_fusions: + type: path + description: File + pattern: "*.fusions.tsv" + - starfusion_fusions: + type: path + description: File containing fusions from STARfusion + pattern: "*.starfusion.fusion_predictions.tsv" + - fusioncatcher_fusions: + type: path + description: File containing fusions from fusioncatcher + pattern: "*.fusions.tsv" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fusion_list: + type: file + description: File containing the summary of all fusions fed-in + pattern: "*.tsv" + - report: + type: file + description: HTML files + pattern: "*.html" + +authors: + - "@praveenraj2018, @rannick" diff --git a/modules/local/fusionreport/detect/tests/main.nf.test b/modules/local/fusionreport/detect/tests/main.nf.test new file mode 100644 index 00000000..9fc89540 --- /dev/null +++ b/modules/local/fusionreport/detect/tests/main.nf.test @@ -0,0 +1,116 @@ +nextflow_process { + + name "Test Process FUSIONREPORT" + script "../main.nf" + process "FUSIONREPORT" + tag "modules" + tag "modules_local" + tag "fusionreport" + + + test("FUSIONREPORT - arriba - starfusion - fusioncatcher") { + config './nextflow.config' + + setup { + run("FUSIONREPORT_DOWNLOAD") { + script "../../../fusionreport/download/main.nf" + process { + """ + """ + } + } + } + + when { + process { + """ + + input[0] = [ + [ id:'test_sample' ], // meta map + file("https://github.com/nf-core/test-datasets/raw/rnafusion/testdata/human/reads_1.fq.gz"), + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/arriba.tsv"), + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/starfusion.tsv"), + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/fusioncatcher.txt") + ] + + input[1] = FUSIONREPORT_DOWNLOAD.out.fusionreport_ref + input[2] = 1 + """ + } + params { + arriba = true + starfusion = true + fusioncatcher = true + no_cosmic = true + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.fusion_list, + process.out.fusion_list_filtered, + process.out.csv, + file(process.out.report[0][1]).name, + process.out.html[0][1].collect { file(it).name }, + process.out.json, + process.out.versions.flatten().first().toString().split('/')[-1] // md5sum not stable as versions contains DB timestamp + ).match() } + ) + } + + } + + test("FUSIONREPORT - arriba - starfusion - fusioncatcher - stub") { + config './nextflow.config' + + setup { + run("FUSIONREPORT_DOWNLOAD") { + script "../../../fusionreport/download/main.nf" + process { + """ + """ + } + } + } + + options "-stub" + + when { + process { + """ + + input[0] = [ + [ id:'test_sample' ], // meta map + file("https://github.com/nf-core/test-datasets/raw/rnafusion/testdata/human/reads_1.fq.gz"), + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/arriba.tsv"), + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/starfusion.tsv"), + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/fusioncatcher.txt") + ] + + input[1] = FUSIONREPORT_DOWNLOAD.out.fusionreport_ref + + input[2] = 1 + """ + } + params { + arriba = true + starfusion = true + fusioncatcher = true + no_cosmic = true + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + +} diff --git a/modules/local/fusionreport/detect/tests/main.nf.test.snap b/modules/local/fusionreport/detect/tests/main.nf.test.snap new file mode 100644 index 00000000..c20e3ec4 --- /dev/null +++ b/modules/local/fusionreport/detect/tests/main.nf.test.snap @@ -0,0 +1,186 @@ +{ + "FUSIONREPORT - arriba - starfusion - fusioncatcher": { + "content": [ + [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport.tsv:md5,3593b7021f26cc5427fdc96f0d1c72f0" + ] + ], + [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport_filtered.tsv:md5,3593b7021f26cc5427fdc96f0d1c72f0" + ] + ], + [ + [ + { + "id": "test_sample" + }, + "test_sample.fusions.csv:md5,49f378c2112d7e0b3b17d9095c79e6bd" + ] + ], + "test_sample_fusionreport_index.html", + [ + "AKAP9_BRAF.html", + "BRD4-1_NUTM1.html", + "BRD4_NUTM1.html", + "CD74_AL132671.2.html", + "CD74_ROS1.html", + "CIC_DUX4.html", + "DUX4_IGH@.html", + "EML4_ALK.html", + "ETV6_NTRK3.html", + "EWSR1_ATF1.html", + "EWSR1_FLI1.html", + "FGFR3_TACC3.html", + "FIP1L1_PDGFRA.html", + "GOPC_ROS1.html", + "HOOK3_RET.html", + "IGH@_CRLF2.html", + "MALT1_IGH@.html", + "NPM1_ALK.html", + "TMPRSS2_ETV1.html", + "test_sample_fusionreport_index.html" + ], + [ + [ + { + "id": "test_sample" + }, + "fusion_genes_mqc.json:md5,b196dc8d064a47d17fd3a032b8fbed4f" + ] + ], + "versions.yml" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-06T14:23:44.59690452" + }, + "FUSIONREPORT - arriba - starfusion - fusioncatcher - stub": { + "content": [ + { + "0": [ + "versions.yml:md5,6bd28f2526774f519a7627a30c6a7f2f" + ], + "1": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport_filtered.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test_sample" + }, + "test_sample_fusionreport_index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test_sample" + }, + [ + "AAA_BBB.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_sample_fusionreport_index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusions.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusions.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csv": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusions.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fusion_list": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fusion_list_filtered": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport_filtered.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "html": [ + [ + { + "id": "test_sample" + }, + [ + "AAA_BBB.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_sample_fusionreport_index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "json": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusions.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "report": [ + [ + { + "id": "test_sample" + }, + "test_sample_fusionreport_index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,6bd28f2526774f519a7627a30c6a7f2f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T15:13:58.414161" + } +} \ No newline at end of file diff --git a/modules/local/fusionreport/detect/tests/nextflow.config b/modules/local/fusionreport/detect/tests/nextflow.config new file mode 100644 index 00000000..a1c32707 --- /dev/null +++ b/modules/local/fusionreport/detect/tests/nextflow.config @@ -0,0 +1,10 @@ +process { + withName: 'FUSIONREPORT_DOWNLOAD' { + ext.args = "--no-cosmic" + } + + withName: 'FUSIONREPORT' { + ext.args = { {params.no_cosmic} ? "--no-cosmic" : "" } + ext.args2 = "--export csv" + } +} diff --git a/modules/local/fusionreport/download/main.nf b/modules/local/fusionreport/download/main.nf new file mode 100644 index 00000000..89f631db --- /dev/null +++ b/modules/local/fusionreport/download/main.nf @@ -0,0 +1,42 @@ +process FUSIONREPORT_DOWNLOAD { + tag 'fusionreport' + label 'process_medium' + + conda "bioconda::star=2.7.9a" + container "docker.io/clinicalgenomics/fusion-report:3.1.0" + + output: + tuple val(meta), path("fusion_report_db"), emit: fusionreport_ref + path "versions.yml" , emit: versions + + script: + meta = [id: 'fusion_report_db'] + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + """ + fusion_report download $args ./ + mkdir fusion_report_db + mv *.txt *.log *.db fusion_report_db/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusion_report: \$(fusion_report --version | sed 's/fusion-report //') + END_VERSIONS + """ + + stub: + meta = [id: 'fusion_report_db'] + """ + mkdir fusion_report_db + touch fusion_report_db/cosmic.db + touch fusion_report_db/fusiongdb2.db + touch fusion_report_db/mitelman.db + touch fusion_report_db/DB-timestamp.txt + touch fusion_report_db/fusion_report.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusion_report: \$(fusion_report --version | sed 's/fusion-report //') + END_VERSIONS + """ +} diff --git a/modules/local/fusionreport/download/meta.yml b/modules/local/fusionreport/download/meta.yml new file mode 100644 index 00000000..21a15a89 --- /dev/null +++ b/modules/local/fusionreport/download/meta.yml @@ -0,0 +1,35 @@ +name: fusionreport_download +description: Build DB for fusionreport +keywords: + - sort +tools: + - fusioncatcher: + description: Build DB for fusionreport + homepage: https://github.com/ndaniel/fusioncatcher/ + documentation: https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md + tool_dev_url: https://github.com/ndaniel/fusioncatcher/ + doi: "10.1101/011650" + licence: ["GPL v3"] + +input: + - username: + type: value + description: Organism for which the data is downloaded from Ensembl database and built + pattern: "*" + - passwd: + type: value + description: Organism for which the data is downloaded from Ensembl database and built + pattern: "*" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reference: + type: directory + description: directory containing the genome resource files required for fusioncatcher + pattern: "fusioncatcher-genome" + +authors: + - "@praveenraj2018" diff --git a/modules/local/fusionreport/download/tests/main.nf.test b/modules/local/fusionreport/download/tests/main.nf.test new file mode 100644 index 00000000..35af2a52 --- /dev/null +++ b/modules/local/fusionreport/download/tests/main.nf.test @@ -0,0 +1,51 @@ +nextflow_process { + + name "Test Process FUSIONREPORT_DOWNLOAD" + script "../main.nf" + process "FUSIONREPORT_DOWNLOAD" + + test("Download fusionreport databases") { + config './nextflow.config' + + when { + process { + """ + // This process doesn't have any inputs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.fusionreport_ref[0][1]).resolve("fusiongdb2.db"), + path(process.out.fusionreport_ref[0][1]).resolve("mitelman.db"), + path(process.out.fusionreport_ref[0][1]).resolve("DB-timestamp.txt").exists(), + path(process.out.fusionreport_ref[0][1]).resolve("fusion_report.log").exists(), + process.out.versions + ).match() } + ) + } + } + + test("Create stub files") { + + options "-stub" + + when { + process { + """ + // This process doesn't have any inputs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/fusionreport/download/tests/main.nf.test.snap b/modules/local/fusionreport/download/tests/main.nf.test.snap new file mode 100644 index 00000000..722d8c68 --- /dev/null +++ b/modules/local/fusionreport/download/tests/main.nf.test.snap @@ -0,0 +1,63 @@ +{ + "Download fusionreport databases": { + "content": [ + "fusiongdb2.db:md5,e1ac123a744e515d3e5f85b8344d526a", + "mitelman.db:md5,1363795c97f77c641065ecd9ad0e484a", + true, + true, + [ + "versions.yml:md5,fa5f13c563f431912048c1802b5a0c74" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T19:27:38.99855171" + }, + "Create stub files": { + "content": [ + { + "0": [ + [ + { + "id": "fusion_report_db" + }, + [ + "DB-timestamp.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "cosmic.db:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_report.log:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusiongdb2.db:md5,d41d8cd98f00b204e9800998ecf8427e", + "mitelman.db:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,fa5f13c563f431912048c1802b5a0c74" + ], + "fusionreport_ref": [ + [ + { + "id": "fusion_report_db" + }, + [ + "DB-timestamp.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "cosmic.db:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_report.log:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusiongdb2.db:md5,d41d8cd98f00b204e9800998ecf8427e", + "mitelman.db:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,fa5f13c563f431912048c1802b5a0c74" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T15:05:22.781845" + } +} \ No newline at end of file diff --git a/modules/local/fusionreport/download/tests/nextflow.config b/modules/local/fusionreport/download/tests/nextflow.config new file mode 100644 index 00000000..7c1ffb50 --- /dev/null +++ b/modules/local/fusionreport/download/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'FUSIONREPORT_DOWNLOAD' { + ext.args = "--no-cosmic" + } +} diff --git a/modules/local/gencode_download/main.nf b/modules/local/gencode_download/main.nf new file mode 100644 index 00000000..1f466d26 --- /dev/null +++ b/modules/local/gencode_download/main.nf @@ -0,0 +1,49 @@ +process GENCODE_DOWNLOAD { + tag "gencode_download" + label 'process_low' + + conda "bioconda::gnu-wget=1.18" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h5bf99c6_5' : + 'quay.io/biocontainers/gnu-wget:1.18--h5bf99c6_5' }" + + input: + val genome_gencode_version + val genome + + output: + path "*.fa" , emit: fasta + path "*.gtf" , emit: gtf + path "versions.yml", emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def folder_gencode = genome.contains("38") ? "" : "/${genome}_mapping" + def gtf_file_name = genome.contains("38") ? "gencode.v${genome_gencode_version}.primary_assembly.annotation.gtf.gz" : "gencode.v${genome_gencode_version}lift${genome_gencode_version}.annotation.gtf.gz" + """ + wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_${genome_gencode_version}/${folder_gencode}${genome}.primary_assembly.genome.fa.gz -O Homo_sapiens_${genome}_${genome_gencode_version}_dna_primary_assembly.fa.gz + gunzip Homo_sapiens_${genome}_${genome_gencode_version}_dna_primary_assembly.fa.gz + wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_${genome_gencode_version}/${folder_gencode}${gtf_file_name} -O Homo_sapiens_${genome}_${genome_gencode_version}.gtf.gz + gunzip Homo_sapiens_${genome}_${genome_gencode_version}.gtf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3) + END_VERSIONS + """ + + stub: + """ + touch Homo_sapiens.${genome}.${genome_gencode_version}_dna_primary_assembly.fa + touch Homo_sapiens.${genome}.${genome_gencode_version}.gtf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3) + END_VERSIONS + """ + +} diff --git a/modules/local/get_rrna_transcript/environment.yml b/modules/local/get_rrna_transcript/environment.yml new file mode 100644 index 00000000..66b65c3a --- /dev/null +++ b/modules/local/get_rrna_transcript/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::pirate=1.0.5 + - bioconda::perl-bioperl=1.7.8 diff --git a/modules/local/get_rrna_transcript/main.nf b/modules/local/get_rrna_transcript/main.nf new file mode 100644 index 00000000..5331f534 --- /dev/null +++ b/modules/local/get_rrna_transcript/main.nf @@ -0,0 +1,43 @@ +process GET_RRNA_TRANSCRIPTS { + tag 'get_rrna_bed' + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pirate:1.0.5--hdfd78af_0' : + 'biocontainers/pirate:1.0.5--hdfd78af_0' }" + + input: + tuple val(meta), path(gtf) + + output: + tuple val(meta), path('rrna.gtf') , emit: rrnagtf + tuple val(meta), path('rrna.bed') , emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + $baseDir/bin/get_rrna_transcripts.py --gtf ${gtf} --output rrna.gtf + + $baseDir/bin/gtf2bed rrna.gtf > rrna.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + get_rrna_transcripts: v1.0 + END_VERSIONS + """ + + stub: + """ + touch rrna.gtf + touch rrna.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + get_rrna_transcripts: v1.0 + END_VERSIONS + """ +} diff --git a/modules/local/hgnc/main.nf b/modules/local/hgnc/main.nf new file mode 100644 index 00000000..aa5c077c --- /dev/null +++ b/modules/local/hgnc/main.nf @@ -0,0 +1,38 @@ +process HGNC_DOWNLOAD { + tag "hgnc" + label 'process_low' + + conda "bioconda::gnu-wget=1.18" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h5bf99c6_5' : + 'biocontainers/gnu-wget:1.18--h5bf99c6_5' }" + + output: + path "hgnc_complete_set.txt" , emit: hgnc_ref + path "HGNC-DB-timestamp.txt" , emit: hgnc_date + path "versions.yml" , emit: versions + + + script: + """ + wget https://storage.googleapis.com/public-download-files/hgnc/tsv/tsv/hgnc_complete_set.txt + date +%Y-%m-%d/%H:%M > HGNC-DB-timestamp.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3) + END_VERSIONS + """ + + stub: + """ + touch "hgnc_complete_set.txt" + touch "HGNC-DB-timestamp.txt" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3) + END_VERSIONS + """ + +} diff --git a/modules/local/hgnc/tests/main.nf.test b/modules/local/hgnc/tests/main.nf.test new file mode 100644 index 00000000..cecc4302 --- /dev/null +++ b/modules/local/hgnc/tests/main.nf.test @@ -0,0 +1,52 @@ +nextflow_process { + + name "Test Process HGNC_DOWNLOAD" + script "../main.nf" + process "HGNC_DOWNLOAD" + + test("Should download HGNC files") { + + when { + process { + """ + // This process doesn't have any inputs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.hgnc_ref, + file(process.out.hgnc_date[0]).name, + process.out.versions + ).match() } + ) + } + } + + test("Should create stub files") { + + options "-stub" + + when { + process { + """ + // This process doesn't have any inputs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.hgnc_ref, + file(process.out.hgnc_date[0]).name, + process.out.versions + ).match() } + ) + } + } +} diff --git a/modules/local/hgnc/tests/main.nf.test.snap b/modules/local/hgnc/tests/main.nf.test.snap new file mode 100644 index 00000000..8512dcf6 --- /dev/null +++ b/modules/local/hgnc/tests/main.nf.test.snap @@ -0,0 +1,34 @@ +{ + "Should create stub files": { + "content": [ + [ + "hgnc_complete_set.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "HGNC-DB-timestamp.txt", + [ + "versions.yml:md5,a7b7ccbd9eda8036baf548cdf1cb6867" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T17:42:04.47884487" + }, + "Should download HGNC files": { + "content": [ + [ + "hgnc_complete_set.txt:md5,29571d88d1648e8764b70791df6a5d2d" + ], + "HGNC-DB-timestamp.txt", + [ + "versions.yml:md5,a7b7ccbd9eda8036baf548cdf1cb6867" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T17:41:49.965512273" + } +} \ No newline at end of file diff --git a/modules/local/starfusion/build/environment.yml b/modules/local/starfusion/build/environment.yml new file mode 100644 index 00000000..ef7f9316 --- /dev/null +++ b/modules/local/starfusion/build/environment.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::dfam=3.7 + - bioconda::hmmer=3.4 + - bioconda::minimap2=2.28 + - bioconda::star-fusion=1.14.0 diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf new file mode 100644 index 00000000..fcd9cf80 --- /dev/null +++ b/modules/local/starfusion/build/main.nf @@ -0,0 +1,129 @@ +process STARFUSION_BUILD { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/be/bed86145102fdf7e381e1a506a4723676f98b4bbe1db5085d02213cef18525c9/data' : + 'community.wave.seqera.io/library/dfam_hmmer_minimap2_star-fusion:aa3a8e3951498552'}" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) + path fusion_annot_lib + val dfam_species + + output: + tuple val(meta), path("ctat_genome_lib_build_dir"), emit: reference + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + """ + prep_genome_lib.pl \\ + --genome_fa $fasta \\ + --gtf $gtf \\ + --dfam_db ${dfam_species} \\ + --pfam_db current \\ + --fusion_annot_lib $fusion_annot_lib \\ + --CPU $task.cpus \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ + + stub: + """ + mkdir -p ctat_genome_lib_build_dir + + touch ctat_genome_lib_build_dir/AnnotFilterRule.pm + gzip -c /dev/null > ctat_genome_lib_build_dir/blast_pairs.dat.gz + touch ctat_genome_lib_build_dir/blast_pairs.idx + + mkdir -p ctat_genome_lib_build_dir/__chkpts + touch ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok + touch ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok + touch ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok + touch ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok + touch ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok + touch ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok + touch ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok + touch ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok + touch ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok + touch ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok + touch ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok + touch ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok + touch ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok + touch ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok + touch ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok + touch ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok + + gzip -c /dev/null > ctat_genome_lib_build_dir/fusion_annot_lib.gz + touch ctat_genome_lib_build_dir/fusion_annot_lib.idx + touch ctat_genome_lib_build_dir/pfam_domains.dbm + gzip -c /dev/null > ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz + + touch ctat_genome_lib_build_dir/ref_annot.cdna.fa + touch ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx + touch ctat_genome_lib_build_dir/ref_annot.cds + touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa + touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx + touch ctat_genome_lib_build_dir/ref_annot.gtf + touch ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans + touch ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu + touch ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed + touch ctat_genome_lib_build_dir/ref_annot.pep + touch ctat_genome_lib_build_dir/ref_annot.prot_info.dbm + + touch ctat_genome_lib_build_dir/ref_genome.fa + touch ctat_genome_lib_build_dir/ref_genome.fa.fai + touch ctat_genome_lib_build_dir/ref_genome.fa.mm2 + touch ctat_genome_lib_build_dir/ref_genome.fa.ndb + touch ctat_genome_lib_build_dir/ref_genome.fa.nhr + touch ctat_genome_lib_build_dir/ref_genome.fa.nin + touch ctat_genome_lib_build_dir/ref_genome.fa.njs + touch ctat_genome_lib_build_dir/ref_genome.fa.not + touch ctat_genome_lib_build_dir/ref_genome.fa.nsq + touch ctat_genome_lib_build_dir/ref_genome.fa.ntf + touch ctat_genome_lib_build_dir/ref_genome.fa.nto + + mkdir -p ctat_genome_lib_build_dir/ref_genome.fa.star.idx + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab + + touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat + touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm + gzip -c /dev/null > ctat_genome_lib_build_dir/trans.blast.dat.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ + +} diff --git a/modules/local/starfusion/build/meta.yml b/modules/local/starfusion/build/meta.yml new file mode 100644 index 00000000..d7f65a0b --- /dev/null +++ b/modules/local/starfusion/build/meta.yml @@ -0,0 +1,38 @@ +name: starfusion_build +description: Download STAR-fusion genome resource required to run STAR-Fusion caller +keywords: + - download +tools: + - star-fusion: + description: Fusion calling algorithm for RNAseq data + homepage: https://github.com/STAR-Fusion/ + documentation: https://github.com/STAR-Fusion/STAR-Fusion/wiki/installing-star-fusion + tool_dev_url: https://github.com/STAR-Fusion/STAR-Fusion + doi: "10.1186/s13059-019-1842-9" + licence: ["GPL v3"] + +input: + - fasta: + type: file + description: genome fasta file + pattern: "*.{fasta}" + - gtf: + type: file + description: genome gtf file + pattern: "*.{gtf}" + - fusion_annot_lib: + type: file + description: Fusion annotation library (key/val pairs, tab-delimited). + pattern: "*.dat.gz" + - dfam_species: + type: string + description: DNA transposable element database (Dfam.hmm), required for repeat masking. Only 'human' or 'mouse' are accepted (will automatically pull the resources from dfam). + +output: + - reference: + type: directory + description: Reference dir + pattern: "ctat_genome_lib_build_dir" + +authors: + - "@praveenraj2018" diff --git a/modules/local/starfusion/build/tests/main.nf.test b/modules/local/starfusion/build/tests/main.nf.test new file mode 100644 index 00000000..8eb1b6f6 --- /dev/null +++ b/modules/local/starfusion/build/tests/main.nf.test @@ -0,0 +1,138 @@ +nextflow_process { + + name "Test Process STARFUSION_BUILD" + script "../main.nf" + process "STARFUSION_BUILD" + + test("STARFUSION_BUILD - human - minigenome") { + + when { + process { + """ + input[0] = [ + [ id:'minigenome fasta' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa") + ] + input[1] = [ + [ id:'minigenome gtf' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf") + ] + + input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz") + input [3] = "human" + + """ + } + } + + then { + assert snapshot( + path(process.out.reference[0][1]).resolve("AnnotFilterRule.pm"), + path(process.out.reference[0][1]).resolve("blast_pairs.dat.gz").exists(), + path(process.out.reference[0][1]).resolve("blast_pairs.idx").exists(), + path(process.out.reference[0][1]).resolve("__chkpts/annotfiltrule_cp.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/blast_pairs.idx.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/cp_gene_blast_pairs.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/cp_pfam_dat.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/cp_ref_annot_cdna.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/fusion_annot_lib.cp.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/_fusion_annot_lib.idx.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/index_pfam_hits.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/index_ref_annot_cdna.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/makeblastdb.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/mm2_genome_idx.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/mm2.splice_bed.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/_prot_info_db.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.gene_spans.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.mini.sortu.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_genome_fai.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_genome.fa.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/trans.blast.dat.cp.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/trans.blast.dat.index.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/validate_ctat_genome_lib.ok"), + path(process.out.reference[0][1]).resolve("fusion_annot_lib.gz"), + path(process.out.reference[0][1]).resolve("fusion_annot_lib.idx").exists(), + path(process.out.reference[0][1]).resolve("pfam_domains.dbm").exists(), + path(process.out.reference[0][1]).resolve("PFAM.domtblout.dat.gz").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cdna.fa").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cdna.fa.idx").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cds").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cdsplus.fa").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cdsplus.fa.idx").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.gtf"), + path(process.out.reference[0][1]).resolve("ref_annot.gtf.gene_spans").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.gtf.mini.sortu"), + path(process.out.reference[0][1]).resolve("ref_annot.gtf.mm2.splice.bed"), + path(process.out.reference[0][1]).resolve("ref_annot.pep").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.prot_info.dbm").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.fai"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.mm2"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.ndb"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.nhr"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.nin").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.njs").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.not"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.nsq"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.ntf"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.nto"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/build.ok"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrLength.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrNameLength.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrName.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrStart.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/exonGeTrInfo.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/exonInfo.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/geneInfo.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/Genome"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/genomeParameters.txt").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/Log.out").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/SA"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/SAindex"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbInfo.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbList.out.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/transcriptInfo.tab"), + path(process.out.reference[0][1]).resolve("trans.blast.align_coords.align_coords.dat"), + path(process.out.reference[0][1]).resolve("trans.blast.align_coords.align_coords.dbm").exists(), + path(process.out.reference[0][1]).resolve("trans.blast.dat.gz"), + process.out.versions + ).match() + } + + } + + test("STARFUSION_BUILD - human - minigenome - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'minigenome fasta' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa") + ] + input[1] = [ + [ id:'minigenome gtf' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf") + ] + + input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz") + input [3] = "human" + + """ + } + } + + then { + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/local/starfusion/build/tests/main.nf.test.snap b/modules/local/starfusion/build/tests/main.nf.test.snap new file mode 100644 index 00000000..b08f2519 --- /dev/null +++ b/modules/local/starfusion/build/tests/main.nf.test.snap @@ -0,0 +1,266 @@ +{ + "STARFUSION_BUILD - human - minigenome": { + "content": [ + "AnnotFilterRule.pm:md5,5391fcc58d9c71cd1f0e45668c5ec597", + true, + true, + "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.gz:md5,23d82a5da81f91ca4e1ecd6481992a12", + true, + true, + true, + true, + true, + true, + true, + true, + "ref_annot.gtf:md5,5ce8afe99ef3940a877a04caeacf9181", + true, + "ref_annot.gtf.mini.sortu:md5,1d29ccecdbb7b40a99c84a02d6c2c1be", + "ref_annot.gtf.mm2.splice.bed:md5,340585ea1843bf06bf555575ddecf28c", + true, + true, + "ref_genome.fa:md5,ad699c56ed38566c7d3e9579486b1706", + "ref_genome.fa.fai:md5,e3f74a27219b33ae80fd5de5cbeaf32b", + "ref_genome.fa.mm2:md5,ce50979ea284748eb9f84ae88cfd930e", + "ref_genome.fa.ndb:md5,6ea574753b557610f62f6e4ab79e19f5", + "ref_genome.fa.nhr:md5,50f28dae71683c4394bfaf94a1ef4392", + true, + true, + "ref_genome.fa.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", + "ref_genome.fa.nsq:md5,d2361e7871ce4cf51181c112a48f191b", + "ref_genome.fa.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", + "ref_genome.fa.nto:md5,33cdeccccebe80329f1fdbee7f5874cb", + true, + "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,e02cd536b7281b894246863b160d5d06", + "chrNameLength.txt:md5,07a67d7ac441d7d30d80840b0927e717", + "chrName.txt:md5,f4d0d6595f423084e6b9472e40dfe6e8", + "chrStart.txt:md5,e2031239a74fe5ee9051e9364e4f608a", + "exonGeTrInfo.tab:md5,3c35618d07a8e35a0f9108699fcdda42", + "exonInfo.tab:md5,bcbb3f32fa31fe504cc737f337ad341c", + "geneInfo.tab:md5,db5db4b6e003904e9908fce7c05f0125", + "Genome:md5,9e3efdd0901cabb5a2d589664a63b372", + true, + true, + "SA:md5,7dd9083264be9c6a2194d990bc10d237", + "SAindex:md5,ac4711df685109e04356db9e9cb9fb7f", + "sjdbInfo.txt:md5,e4cc1bbf8bd687cfc3d7c2c702e6def7", + "sjdbList.fromGTF.out.tab:md5,8f3e8604b00d4067e4eb80aa476a8113", + "sjdbList.out.tab:md5,5d78dd49d5db24ca2c056b7ebe5c2059", + "transcriptInfo.tab:md5,b758c0ccaddcf0453bab5905b3cec4a2", + "trans.blast.align_coords.align_coords.dat:md5,9f6b7a75aea03a9671190be25ecdd4c2", + true, + "trans.blast.dat.gz:md5,85ba5ea96c566f751ad83a3e4b8ab128", + null + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-19T17:03:12.812884291" + }, + "STARFUSION_BUILD - human - minigenome - stub": { + "content": [ + { + "0": [ + [ + { + "id": "minigenome fasta" + }, + [ + "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", + "PFAM.domtblout.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "blast_pairs.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "pfam_domains.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mm2.splice.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.pep:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.prot_info.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.mm2:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ndb:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nin:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.njs:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.not:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.align_coords.align_coords.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "reference": [ + [ + { + "id": "minigenome fasta" + }, + [ + "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", + "PFAM.domtblout.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "blast_pairs.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "pfam_domains.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mm2.splice.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.pep:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.prot_info.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.mm2:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ndb:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nin:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.njs:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.not:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.align_coords.align_coords.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-12T14:57:33.861849482" + } +} \ No newline at end of file diff --git a/modules/local/starfusion/detect/environment.yml b/modules/local/starfusion/detect/environment.yml new file mode 100644 index 00000000..ef7f9316 --- /dev/null +++ b/modules/local/starfusion/detect/environment.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::dfam=3.7 + - bioconda::hmmer=3.4 + - bioconda::minimap2=2.28 + - bioconda::star-fusion=1.14.0 diff --git a/modules/local/starfusion/detect/main.nf b/modules/local/starfusion/detect/main.nf new file mode 100644 index 00000000..00c78efc --- /dev/null +++ b/modules/local/starfusion/detect/main.nf @@ -0,0 +1,58 @@ +process STARFUSION { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/be/bed86145102fdf7e381e1a506a4723676f98b4bbe1db5085d02213cef18525c9/data' : + 'community.wave.seqera.io/library/dfam_hmmer_minimap2_star-fusion:aa3a8e3951498552'}" + + input: + tuple val(meta), path(reads), path(junction) + path reference + + output: + tuple val(meta), path("*.fusion_predictions.tsv") , emit: fusions + tuple val(meta), path("*.abridged.tsv") , emit: abridged + tuple val(meta), path("*.coding_effect.tsv") , optional: true , emit: coding_effect + path "versions.yml" , emit: versions + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def fastq_arg = reads ? (meta.single_end ? "--left_fq ${reads[0]}" : "--left_fq ${reads[0]} --right_fq ${reads[1]}") : "" + def junction_arg = junction ? "-J ${junction}" : "" + def args = task.ext.args ?: '' + """ + STAR-Fusion \\ + --genome_lib_dir $reference \\ + $fastq_arg \\ + $junction_arg \\ + --CPU $task.cpus \\ + --examine_coding_effect \\ + --output_dir . \\ + $args + + mv star-fusion.fusion_predictions.tsv ${prefix}.starfusion.fusion_predictions.tsv + mv star-fusion.fusion_predictions.abridged.tsv ${prefix}.starfusion.abridged.tsv + mv star-fusion.fusion_predictions.abridged.coding_effect.tsv ${prefix}.starfusion.abridged.coding_effect.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.starfusion.fusion_predictions.tsv + touch ${prefix}.starfusion.abridged.tsv + touch ${prefix}.starfusion.abridged.coding_effect.tsv + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ +} + + diff --git a/modules/local/starfusion/detect/meta.yml b/modules/local/starfusion/detect/meta.yml new file mode 100644 index 00000000..7337dad5 --- /dev/null +++ b/modules/local/starfusion/detect/meta.yml @@ -0,0 +1,56 @@ +name: starfusion +description: Fast and Accurate Fusion Transcript Detection from RNA-Seq +keywords: + - Fusion +tools: + - star-fusion: + description: Fast and Accurate Fusion Transcript Detection from RNA-Seq + homepage: https://github.com/STAR-Fusion/STAR-Fusion + documentation: https://github.com/STAR-Fusion/STAR-Fusion/wiki + tool_dev_url: https://github.com/STAR-Fusion/STAR-Fusion/releases + doi: "10.1101/120295v1" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - genome_lib: + type: path + description: STAR-fusion reference genome lib folder + - junction: + type: file + description: Chimeric junction output from STAR aligner + pattern: "*.{out.junction}" + - reference: + type: directory + description: Reference dir + pattern: "ctat_genome_lib_build_dir" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{versions.yml}" + - fusions: + type: file + description: Fusion events from STAR-fusion + pattern: "*.{fusion_predictions.tsv}" + - abridged: + type: file + description: Fusion events from STAR-fusion + pattern: "*.{fusion.abridged.tsv}" + - coding_effect: + type: file + description: Fusion events from STAR-fusion + pattern: "*.{coding_effect.tsv}" + +authors: + - "@praveenraj2018" diff --git a/modules/local/starfusion/detect/tests/main.nf.test b/modules/local/starfusion/detect/tests/main.nf.test new file mode 100644 index 00000000..75e0bfdd --- /dev/null +++ b/modules/local/starfusion/detect/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process STARFUSION" + script "../main.nf" + process "STARFUSION" + + setup { + run("STARFUSION_BUILD") { + script "../../../starfusion/build/main.nf" + process { + """ + input[0] = [ + [ id:'minigenome fasta' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa") + ] + input[1] = [ + [ id:'minigenome gtf' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf") + ] + + input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz") + input [3] = "human" + """ + } + } + } + + test("Should run without failures") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_1.fastq.gz"), + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_2.fastq.gz") + ], // reads + [] // empty list for junction, as we don't have a pre-computed Chimeric.out.junction file + ] + input[1] = STARFUSION_BUILD.out.reference.map { it[1] } + """ + } + } + + then { + assert process.success + assert snapshot( + process.out.fusions, + process.out.abridged, + process.out.coding_effect, + process.out.versions + ).match() + } + + } + + test("Should create stub files") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test_stub', single_end:false ], + [ + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_1.fastq.gz"), + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_2.fastq.gz") + ], + [] // empty list for junction + ] + input[1] = STARFUSION_BUILD.out.reference.map { it[1] } + """ + } + } + + then { + assert process.success + assert snapshot( + process.out + ).match() + } + + } + +} diff --git a/modules/local/starfusion/detect/tests/main.nf.test.snap b/modules/local/starfusion/detect/tests/main.nf.test.snap new file mode 100644 index 00000000..f65f94b0 --- /dev/null +++ b/modules/local/starfusion/detect/tests/main.nf.test.snap @@ -0,0 +1,112 @@ +{ + "Should create stub files": { + "content": [ + { + "0": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.starfusion.fusion_predictions.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.starfusion.abridged.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.starfusion.abridged.coding_effect.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a8fb6344fdf740dde0941048313fc243" + ], + "abridged": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.starfusion.abridged.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "coding_effect": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.starfusion.abridged.coding_effect.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fusions": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.starfusion.fusion_predictions.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,a8fb6344fdf740dde0941048313fc243" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-19T18:42:06.988178092" + }, + "Should run without failures": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.starfusion.fusion_predictions.tsv:md5,82834fffed743afe07da82bd56d50c99" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.starfusion.abridged.tsv:md5,d6d20fdd4b5cba21b9c0ebf8e0ea19ff" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.starfusion.abridged.coding_effect.tsv:md5,95dfce6fdaf3589f23881fe1e855c62b" + ] + ], + [ + "versions.yml:md5,a8fb6344fdf740dde0941048313fc243" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-19T18:41:49.150362156" + } +} \ No newline at end of file diff --git a/modules/local/starfusion/download/environment.yml b/modules/local/starfusion/download/environment.yml new file mode 100644 index 00000000..ef7f9316 --- /dev/null +++ b/modules/local/starfusion/download/environment.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::dfam=3.7 + - bioconda::hmmer=3.4 + - bioconda::minimap2=2.28 + - bioconda::star-fusion=1.14.0 diff --git a/modules/local/starfusion/download/main.nf b/modules/local/starfusion/download/main.nf new file mode 100644 index 00000000..ace18a9e --- /dev/null +++ b/modules/local/starfusion/download/main.nf @@ -0,0 +1,34 @@ +process STARFUSION_DOWNLOAD { + tag 'star-fusion' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/be/bed86145102fdf7e381e1a506a4723676f98b4bbe1db5085d02213cef18525c9/data' : + 'community.wave.seqera.io/library/dfam_hmmer_minimap2_star-fusion:aa3a8e3951498552'}" + + output: + path "ctat_genome_lib_build_dir/*" , emit: reference + + + // TODO: move to S3 + script: + """ + wget https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz --no-check-certificate + + tar xvf GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz + + rm GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz + + mv */ctat_genome_lib_build_dir . + """ + + stub: + """ + mkdir ctat_genome_lib_build_dir + touch ref_annot.cdna.fa + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ +} diff --git a/modules/local/starfusion/download/meta.yml b/modules/local/starfusion/download/meta.yml new file mode 100644 index 00000000..24e84252 --- /dev/null +++ b/modules/local/starfusion/download/meta.yml @@ -0,0 +1,25 @@ +name: starfusion_downloadgenome +description: Download STAR-fusion genome resource required to run STAR-Fusion caller +keywords: + - downoad +tools: + - star-fusion: + description: Fusion calling algorithm for RNAseq data + homepage: https://github.com/STAR-Fusion/ + documentation: https://github.com/STAR-Fusion/STAR-Fusion/wiki/installing-star-fusion + tool_dev_url: https://github.com/STAR-Fusion/STAR-Fusion + doi: "10.1186/s13059-019-1842-9" + licence: ["GPL v3"] + +output: + - reference: + type: directory + description: Genome resource path + pattern: "star-fusion-genome" + - gtf: + type: file + description: genome gtf file + pattern: "*.{gtf}" + +authors: + - "@praveenraj2018,@rannick" diff --git a/modules/local/uscs/custom_gtftogenepred/main.nf b/modules/local/uscs/custom_gtftogenepred/main.nf new file mode 100644 index 00000000..9cc15765 --- /dev/null +++ b/modules/local/uscs/custom_gtftogenepred/main.nf @@ -0,0 +1,39 @@ +process GTF_TO_REFFLAT { + tag "$meta.id" + label 'process_low' + + conda "bioconda::ucsc-gtftogenepred=377" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ucsc-gtftogenepred:377--ha8a8165_5' : + 'quay.io/biocontainers/ucsc-gtftogenepred:377--ha8a8165_5' }" + + input: + tuple val(meta), path (gtf) + + output: + path('*.refflat') , emit: refflat + path "versions.yml" , emit: versions + + script: + def genepred = gtf + '.genepred' + def refflat = gtf + '.refflat' + """ + gtfToGenePred -genePredExt -geneNameAsName2 ${gtf} ${genepred} + paste ${genepred} ${genepred} | cut -f12,16-25 > ${refflat} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gtfToGenePred: 377 + END_VERSIONS + """ + + stub: + def refflat = gtf + '.refflat' + """ + touch ${refflat} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gtfToGenePred: 377 + END_VERSIONS + """ +} diff --git a/modules/local/vcf_collect/main.nf b/modules/local/vcf_collect/main.nf new file mode 100644 index 00000000..3a1bf221 --- /dev/null +++ b/modules/local/vcf_collect/main.nf @@ -0,0 +1,45 @@ +process VCF_COLLECT { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pandas:1.5.2' : + 'biocontainers/pandas:1.5.2' }" + + input: + tuple val(meta), path(fusioninspector_tsv), path(fusioninspector_gtf_tsv), path(fusionreport_report), path(fusionreport_csv) + tuple val(meta2), path(hgnc_ref) + tuple val(meta3), path(hgnc_date) + + output: + path "versions.yml" , emit: versions + tuple val(meta), path("*vcf.gz") , emit: vcf + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + vcf_collect.py --fusioninspector $fusioninspector_tsv --fusionreport $fusionreport_report --fusioninspector_gtf $fusioninspector_gtf_tsv --fusionreport_csv $fusionreport_csv --hgnc $hgnc_ref --sample ${prefix} --out ${prefix}_fusion_data.vcf + gzip ${prefix}_fusion_data.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + HGNC DB retrieval: \$(cat $hgnc_date) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/vcf_collect/meta.yml b/modules/local/vcf_collect/meta.yml new file mode 100644 index 00000000..de4667bb --- /dev/null +++ b/modules/local/vcf_collect/meta.yml @@ -0,0 +1,39 @@ +name: vcf_collect +description: vcf_collect +keywords: + - sort +tools: + - fusionreport: + description: Converts RNA fusion files to SV VCF and collects statistics and metrics in a VCF file. + homepage: Adapted from https://github.com/J35P312/MegaFusion + documentation: https://github.com/J35P312/MegaFusion + doi: "" + licence: [""] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tsv: + type: path + description: Path to FusionInspector tsv output + pattern: "*" + - report: + type: path + description: Path to fusionreport report + pattern: "*.fusions.tsv" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: File containing the summary of all fusions as compressed vcf file + pattern: "*.vcf.gz" + +authors: + - "@rannick" diff --git a/tools/fusioncatcher/environment.yml b/modules/nf-core/agat/convertspgff2tsv/environment.yml similarity index 50% rename from tools/fusioncatcher/environment.yml rename to modules/nf-core/agat/convertspgff2tsv/environment.yml index ba3f4629..0410ee76 100644 --- a/tools/fusioncatcher/environment.yml +++ b/modules/nf-core/agat/convertspgff2tsv/environment.yml @@ -1,6 +1,5 @@ -name: fusioncatcher channels: - - bioconda - conda-forge + - bioconda dependencies: - - bioconda::fusioncatcher=1.00 + - bioconda::agat=1.4.0 diff --git a/modules/nf-core/agat/convertspgff2tsv/main.nf b/modules/nf-core/agat/convertspgff2tsv/main.nf new file mode 100644 index 00000000..a6f73b6c --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/main.nf @@ -0,0 +1,46 @@ +process AGAT_CONVERTSPGFF2TSV { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/agat:1.4.0--pl5321hdfd78af_0' : + 'biocontainers/agat:1.4.0--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(gff) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + agat_convert_sp_gff2tsv.pl \\ + --gff $gff \\ + --output ${prefix}.tsv \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_convert_sp_gff2tsv.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_convert_sp_gff2tsv.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/nf-core/agat/convertspgff2tsv/meta.yml b/modules/nf-core/agat/convertspgff2tsv/meta.yml new file mode 100644 index 00000000..31f09868 --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/meta.yml @@ -0,0 +1,48 @@ +name: agat_convertspgff2tsv +description: | + Converts a GFF/GTF file into a TSV file +keywords: + - genome + - gff + - gtf + - conversion + - tsv +tools: + - agat: + description: "AGAT is a toolkit for manipulation and getting information from + GFF/GTF files" + homepage: "https://github.com/NBISweden/AGAT" + documentation: "https://agat.readthedocs.io/" + tool_dev_url: "https://github.com/NBISweden/AGAT" + doi: "10.5281/zenodo.3552717" + licence: ["GPL v3"] + identifier: biotools:AGAT +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gff: + type: file + description: Annotation file in GFF3/GTF format + pattern: "*.{gff, gtf}" +output: + - tsv: + - meta: + type: file + description: Annotation file in TSV format + pattern: "*.{gtf}" + - "*.tsv": + type: file + description: Annotation file in TSV format + pattern: "*.{gtf}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rannick" +maintainers: + - "@gallvp" diff --git a/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test b/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test new file mode 100644 index 00000000..6a2e8942 --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process AGAT_CONVERTSPGFF2TSV" + script "../main.nf" + process "AGAT_CONVERTSPGFF2TSV" + + tag "modules" + tag "modules_nfcore" + tag "agat" + tag "agat/convertspgff2tsv" + + test("sarscov2 - genome [gff3]") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - genome [gff3] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tsv.collect { file(it[1]).getName() } + + process.out.versions).match() } + ) + } + + } + +} diff --git a/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test.snap b/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test.snap new file mode 100644 index 00000000..71ed6205 --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test.snap @@ -0,0 +1,48 @@ +{ + "sarscov2 - genome [gff3] - stub": { + "content": [ + [ + "test.tsv", + "versions.yml:md5,b81565a6ff8911848806128b3bec8508" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T10:06:55.853319" + }, + "sarscov2 - genome [gff3]": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,8373d2035689d23694f87606116cdccd" + ] + ], + "1": [ + "versions.yml:md5,b81565a6ff8911848806128b3bec8508" + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,8373d2035689d23694f87606116cdccd" + ] + ], + "versions": [ + "versions.yml:md5,b81565a6ff8911848806128b3bec8508" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T10:06:51.415395" + } +} \ No newline at end of file diff --git a/modules/nf-core/agat/convertspgff2tsv/tests/tags.yml b/modules/nf-core/agat/convertspgff2tsv/tests/tags.yml new file mode 100644 index 00000000..d2ee3721 --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/tests/tags.yml @@ -0,0 +1,2 @@ +agat/convertspgff2tsv: + - "modules/nf-core/agat/convertspgff2tsv/**" diff --git a/modules/nf-core/arriba/arriba/environment.yml b/modules/nf-core/arriba/arriba/environment.yml new file mode 100644 index 00000000..c3a88ffb --- /dev/null +++ b/modules/nf-core/arriba/arriba/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::arriba=2.4.0 diff --git a/modules/nf-core/arriba/arriba/main.nf b/modules/nf-core/arriba/arriba/main.nf new file mode 100644 index 00000000..75dbf93a --- /dev/null +++ b/modules/nf-core/arriba/arriba/main.nf @@ -0,0 +1,65 @@ +process ARRIBA_ARRIBA { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/arriba:2.4.0--h0033a41_2' : + 'biocontainers/arriba:2.4.0--h0033a41_2' }" + + input: + tuple val(meta), path(bam) + tuple val(meta2), path(fasta) + tuple val(meta3), path(gtf) + path(blacklist) + path(known_fusions) + path(cytobands) + path(protein_domains) + + output: + tuple val(meta), path("*.fusions.tsv") , emit: fusions + tuple val(meta), path("*.fusions.discarded.tsv"), emit: fusions_fail + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def blacklist = blacklist ? "-b $blacklist" : "-f blacklist" + def known_fusions = known_fusions ? "-k $known_fusions" : "" + def cytobands = cytobands ? "-d $cytobands" : "" + def protein_domains = protein_domains ? "-p $protein_domains" : "" + + """ + arriba \\ + -x $bam \\ + -a $fasta \\ + -g $gtf \\ + -o ${prefix}.fusions.tsv \\ + -O ${prefix}.fusions.discarded.tsv \\ + $blacklist \\ + $known_fusions \\ + $cytobands \\ + $protein_domains \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo stub > ${prefix}.fusions.tsv + echo stub > ${prefix}.fusions.discarded.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/arriba/arriba/meta.yml b/modules/nf-core/arriba/arriba/meta.yml new file mode 100644 index 00000000..91ba2103 --- /dev/null +++ b/modules/nf-core/arriba/arriba/meta.yml @@ -0,0 +1,94 @@ +name: arriba_arriba +description: Arriba is a command-line tool for the detection of gene fusions from + RNA-Seq data. +keywords: + - fusion + - arriba + - detection + - RNA-Seq +tools: + - arriba: + description: Fast and accurate gene fusion detection from RNA-Seq data + homepage: https://github.com/suhrig/arriba + documentation: https://arriba.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/suhrig/arriba + doi: "10.1101/gr.257246.119" + licence: ["MIT"] + identifier: biotools:Arriba +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Assembly FASTA file + pattern: "*.{fasta}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + - - blacklist: + type: file + description: Blacklist file + pattern: "*.{tsv}" + - - known_fusions: + type: file + description: Known fusions file + pattern: "*.{tsv}" + - - cytobands: + type: file + description: Cytobands file + pattern: "*.{tsv}" + - - protein_domains: + type: file + description: Protein domains file + pattern: "*.{gff3}" +output: + - fusions: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fusions.tsv": + type: file + description: File contains fusions which pass all of Arriba's filters. + pattern: "*.{fusions.tsv}" + - fusions_fail: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fusions.discarded.tsv": + type: file + description: File contains fusions that Arriba classified as an artifact or + that are also observed in healthy tissue. + pattern: "*.{fusions.discarded.tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@praveenraj2018" + - "@rannick" +maintainers: + - "@praveenraj2018" + - "@rannick" diff --git a/modules/nf-core/arriba/arriba/tests/main.nf.test b/modules/nf-core/arriba/arriba/tests/main.nf.test new file mode 100644 index 00000000..4cff86e5 --- /dev/null +++ b/modules/nf-core/arriba/arriba/tests/main.nf.test @@ -0,0 +1,107 @@ + +nextflow_process { + + name "Test Process ARRIBA_ARRIBA" + script "../main.nf" + process "ARRIBA_ARRIBA" + + tag "modules" + tag "modules_nfcore" + tag "arriba" + tag "arriba/arriba" + tag "arriba/download" + tag "star/genomegenerate" + tag "star/align" + + setup { + config "./nextflow.config" + options "-stub" + run("ARRIBA_DOWNLOAD") { + script "../../../arriba/download/main.nf" + process { + """ + input[0] = 'GRCh38' + """ + } + } + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + options "-stub" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + run("STAR_ALIGN") { + script "../../../star/align/main.nf" + options "-stub" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + } + + test("homo_sapiens - paired_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = [] + + input[4] = [] + + input[5] = [] + + input[6] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/arriba/arriba/tests/main.nf.test.snap b/modules/nf-core/arriba/arriba/tests/main.nf.test.snap new file mode 100644 index 00000000..14d46f6b --- /dev/null +++ b/modules/nf-core/arriba/arriba/tests/main.nf.test.snap @@ -0,0 +1,55 @@ +{ + "homo_sapiens - paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusions.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusions.discarded.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "2": [ + "versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30" + ], + "fusions": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusions.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "fusions_fail": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusions.discarded.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "versions": [ + "versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-08T15:41:23.945072" + } +} \ No newline at end of file diff --git a/modules/nf-core/arriba/arriba/tests/nextflow.config b/modules/nf-core/arriba/arriba/tests/nextflow.config new file mode 100644 index 00000000..d3d5b00f --- /dev/null +++ b/modules/nf-core/arriba/arriba/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 11' + } + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50' + } +} diff --git a/modules/nf-core/arriba/download/environment.yml b/modules/nf-core/arriba/download/environment.yml new file mode 100644 index 00000000..d0883a0d --- /dev/null +++ b/modules/nf-core/arriba/download/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::arriba=2.4.0 diff --git a/modules/nf-core/arriba/download/main.nf b/modules/nf-core/arriba/download/main.nf new file mode 100644 index 00000000..c45120ad --- /dev/null +++ b/modules/nf-core/arriba/download/main.nf @@ -0,0 +1,49 @@ +process ARRIBA_DOWNLOAD { + tag "arriba" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/arriba:2.4.0--h0033a41_2' : + 'biocontainers/arriba:2.4.0--h0033a41_2' }" + + input: + val(genome) + + output: + path "blacklist*${genome}*.tsv.gz" , emit: blacklist + path "cytobands*${genome}*.tsv" , emit: cytobands + path "protein_domains*${genome}*.gff3" , emit: protein_domains + path "known_fusions*${genome}*.tsv.gz" , emit: known_fusions + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + wget https://github.com/suhrig/arriba/releases/download/v2.4.0/arriba_v2.4.0.tar.gz -O arriba_v2.4.0.tar.gz --no-check-certificate + tar -xzvf arriba_v2.4.0.tar.gz + rm arriba_v2.4.0.tar.gz + mv arriba_v2.4.0/database/* . + rm -r arriba_v2.4.0 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba_download: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ + + stub: + """ + touch blacklist_hg38_GRCh38_v2.4.0.tsv.gz + touch protein_domains_hg38_GRCh38_v2.4.0.gff3 + touch cytobands_hg38_GRCh38_v2.4.0.tsv + touch known_fusions_hg38_GRCh38_v2.4.0.tsv.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba_download: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/arriba/download/meta.yml b/modules/nf-core/arriba/download/meta.yml new file mode 100644 index 00000000..bdf542eb --- /dev/null +++ b/modules/nf-core/arriba/download/meta.yml @@ -0,0 +1,52 @@ +name: arriba_download +description: Arriba is a command-line tool for the detection of gene fusions from + RNA-Seq data. +keywords: + - fusion + - arriba + - reference +tools: + - arriba: + description: Fast and accurate gene fusion detection from RNA-Seq data + homepage: https://github.com/suhrig/arriba + documentation: https://arriba.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/suhrig/arriba + doi: "10.1101/gr.257246.119" + licence: ["MIT"] + identifier: biotools:Arriba + +input: + - - genome: + type: string + description: hg38, hg19, GRCh38, GRCh37 for humans are accepted +output: + - blacklist: + - blacklist*${genome}*.tsv.gz: + type: string + description: The blacklist removes recurrent alignment artifacts and transcripts + which are present in healthy tissue + pattern: ".tsv.gz" + - cytobands: + - cytobands*${genome}*.tsv: + type: file + description: Coordinates of the Giemsa staining bands. This information is used + to draw ideograms + pattern: ".tsv" + - protein_domains: + - protein_domains*${genome}*.gff3: + type: file + description: Protein domain annotations + patter: "*.gff3" + - known_fusions: + - known_fusions*${genome}*.tsv.gz: + type: file + description: Arriba is more sensitive to those fusions to improve the detection + rate of expected or highly relevant events, such as recurrent fusions + patter: "*.tsv.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@peterpru" diff --git a/modules/nf-core/arriba/download/tests/main.nf.test b/modules/nf-core/arriba/download/tests/main.nf.test new file mode 100644 index 00000000..2c32c7a0 --- /dev/null +++ b/modules/nf-core/arriba/download/tests/main.nf.test @@ -0,0 +1,50 @@ + +nextflow_process { + + name "Test Process ARRIBA_DOWNLOAD" + script "../main.nf" + process "ARRIBA_DOWNLOAD" + + tag "modules" + tag "modules_nfcore" + tag "arriba" + tag "arriba/download" + + test("test-arriba-download") { + when { + process { + """ + input[0] = 'GRCh38' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("download - stub") { + + options "-stub" + + when { + process { + """ + input[0] = 'GRCh38' + """ + } + } + + then { + assertAll( + { assert process.success } + ) + } + } + +} diff --git a/modules/nf-core/arriba/download/tests/main.nf.test.snap b/modules/nf-core/arriba/download/tests/main.nf.test.snap new file mode 100644 index 00000000..fe9b18b9 --- /dev/null +++ b/modules/nf-core/arriba/download/tests/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "download": { + "content": [ + { + "0": [ + "blacklist_hg38_GRCh38_v2.4.0.tsv.gz:md5,e3098a4be51aece78aede64b55c39318" + ], + "1": [ + "cytobands_hg38_GRCh38_v2.4.0.tsv:md5,7bd504feefb33fcfc9be0517439a423c" + ], + "2": [ + "protein_domains_hg38_GRCh38_v2.4.0.gff3:md5,43c387a784ebeed71b4147076cebf978" + ], + "3": [ + "known_fusions_hg38_GRCh38_v2.4.0.tsv.gz:md5,4f00f81ccb5f4db283f1a22b8b0da67c" + ], + "4": [ + "versions.yml:md5,98c69df5eaea5caf0b4af7b8d7af4893" + ], + "blacklist": [ + "blacklist_hg38_GRCh38_v2.4.0.tsv.gz:md5,e3098a4be51aece78aede64b55c39318" + ], + "cytobands": [ + "cytobands_hg38_GRCh38_v2.4.0.tsv:md5,7bd504feefb33fcfc9be0517439a423c" + ], + "known_fusions": [ + "known_fusions_hg38_GRCh38_v2.4.0.tsv.gz:md5,4f00f81ccb5f4db283f1a22b8b0da67c" + ], + "protein_domains": [ + "protein_domains_hg38_GRCh38_v2.4.0.gff3:md5,43c387a784ebeed71b4147076cebf978" + ], + "versions": [ + "versions.yml:md5,98c69df5eaea5caf0b4af7b8d7af4893" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-08T11:12:17.010496" + } +} \ No newline at end of file diff --git a/modules/nf-core/arriba/download/tests/tags.yml b/modules/nf-core/arriba/download/tests/tags.yml new file mode 100644 index 00000000..f510bbf1 --- /dev/null +++ b/modules/nf-core/arriba/download/tests/tags.yml @@ -0,0 +1,2 @@ +arriba/download: + - "modules/nf-core/arriba/download/**" diff --git a/modules/nf-core/bedops/convert2bed/environment.yml b/modules/nf-core/bedops/convert2bed/environment.yml new file mode 100644 index 00000000..3c13066f --- /dev/null +++ b/modules/nf-core/bedops/convert2bed/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::bedops=2.4.41" diff --git a/modules/nf-core/bedops/convert2bed/main.nf b/modules/nf-core/bedops/convert2bed/main.nf new file mode 100644 index 00000000..a23efbd5 --- /dev/null +++ b/modules/nf-core/bedops/convert2bed/main.nf @@ -0,0 +1,48 @@ +process BEDOPS_CONVERT2BED { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedops:2.4.41--h4ac6f70_2': + 'biocontainers/bedops:2.4.41--h4ac6f70_2' }" + + input: + tuple val(meta), path(in_file) + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def format = in_file.getExtension() + """ + convert2bed \\ + $args \\ + -i $format \\ + < $in_file \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedops/convert2bed: \$(convert2bed --version | grep vers | sed 's/^.*.version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedops/convert2bed: \$(convert2bed --version | grep vers | sed 's/^.*.version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedops/convert2bed/meta.yml b/modules/nf-core/bedops/convert2bed/meta.yml new file mode 100644 index 00000000..6d84c031 --- /dev/null +++ b/modules/nf-core/bedops/convert2bed/meta.yml @@ -0,0 +1,45 @@ +name: "bedops_convert2bed" +description: Convert BAM/GFF/GTF/GVF/PSL files to bed +keywords: + - convert + - bed + - genomics +tools: + - "bedops": + description: "High-performance genomic feature operations." + homepage: "https://bedops.readthedocs.io/en/latest/content/reference/file-management/conversion/convert2bed.html#convert2bed" + documentation: "https://bedops.readthedocs.io/en/latest/" + tool_dev_url: "https://github.com/bedops" + doi: "10.1093/bioinformatics/bts277" + licence: ["GNU v2"] + identifier: biotools:bedops + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - in_file: + type: file + description: Input file +output: + - bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.bed": + type: file + description: Sorted BED file + pattern: "*.{bed}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rannick" +maintainers: + - "@rannick" diff --git a/modules/nf-core/bedops/convert2bed/tests/main.nf.test b/modules/nf-core/bedops/convert2bed/tests/main.nf.test new file mode 100644 index 00000000..3f2b193a --- /dev/null +++ b/modules/nf-core/bedops/convert2bed/tests/main.nf.test @@ -0,0 +1,59 @@ +// nf-core modules test bedops/convert2bed +nextflow_process { + + name "Test Process BEDOPS_CONVERT2BED" + script "../main.nf" + process "BEDOPS_CONVERT2BED" + + tag "modules" + tag "modules_nfcore" + tag "bedops" + tag "bedops/convert2bed" + + test("sarscov2 - gtf") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - gtf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bedops/convert2bed/tests/main.nf.test.snap b/modules/nf-core/bedops/convert2bed/tests/main.nf.test.snap new file mode 100644 index 00000000..e2482924 --- /dev/null +++ b/modules/nf-core/bedops/convert2bed/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - gtf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,d3d58dde1d9baba3700ff9fb5a45ce5b" + ], + "bed": [ + [ + { + "id": "test" + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,d3d58dde1d9baba3700ff9fb5a45ce5b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-11T12:06:14.755423333" + }, + "sarscov2 - gtf": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,adfdd36e848a62f4b0ea8a694abe9659" + ] + ], + "1": [ + "versions.yml:md5,d3d58dde1d9baba3700ff9fb5a45ce5b" + ], + "bed": [ + [ + { + "id": "test" + }, + "test.bed:md5,adfdd36e848a62f4b0ea8a694abe9659" + ] + ], + "versions": [ + "versions.yml:md5,d3d58dde1d9baba3700ff9fb5a45ce5b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-11T12:06:08.876003152" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedops/convert2bed/tests/tags.yml b/modules/nf-core/bedops/convert2bed/tests/tags.yml new file mode 100644 index 00000000..fd2c2993 --- /dev/null +++ b/modules/nf-core/bedops/convert2bed/tests/tags.yml @@ -0,0 +1,2 @@ +bedops/convert2bed: + - "modules/nf-core/bedops/convert2bed/**" diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 00000000..9b01c865 --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 00000000..2862c64c --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,78 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 00000000..81778a06 --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,43 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" +output: + - file_out: + - meta: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 00000000..9cb16178 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,191 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_name_conflict") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 00000000..b7623ee6 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,147 @@ +{ + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ], + 78, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:46.802978" + }, + "test_cat_name_conflict": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:29.45394" + }, + "test_cat_one_file_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 374, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:52:02.774016" + }, + "test_cat_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 375, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:57.581523" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 00000000..ec26b0fd --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 00000000..fbc79783 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 00000000..37b578f5 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 00000000..71e04c3d --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::coreutils=9.5 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf new file mode 100644 index 00000000..4364a389 --- /dev/null +++ b/modules/nf-core/cat/fastq/main.nf @@ -0,0 +1,79 @@ +process CAT_FASTQ { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data' : + 'community.wave.seqera.io/library/coreutils:9.5--ae99c88a9b28c264' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size >= 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } + """ + cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + echo '' | gzip > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size >= 2) { + """ + echo '' | gzip > ${prefix}_1.merged.fastq.gz + echo '' | gzip > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } +} diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml new file mode 100644 index 00000000..91ff2fb5 --- /dev/null +++ b/modules/nf-core/cat/fastq/meta.yml @@ -0,0 +1,45 @@ +name: cat_fastq +description: Concatenates fastq files +keywords: + - cat + - fastq + - concatenate +tools: + - cat: + description: | + The cat utility reads files sequentially, writing them to the standard output. + documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files to be concatenated. +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Merged fastq file + pattern: "*.{merged.fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test new file mode 100644 index 00000000..f88a78b6 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -0,0 +1,248 @@ +// NOTE The version snaps may not be consistant +// https://github.com/nf-core/modules/pull/4087#issuecomment-1767948035 +nextflow_process { + + name "Test Process CAT_FASTQ" + script "../main.nf" + process "CAT_FASTQ" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/fastq" + + test("test_cat_fastq_single_end") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap new file mode 100644 index 00000000..f8689a1c --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -0,0 +1,376 @@ +{ + "test_cat_fastq_single_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:02:07.519211144" + }, + "test_cat_fastq_single_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:02:31.618628921" + }, + "test_cat_fastq_single_end_single_file": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:02:57.904149581" + }, + "test_cat_fastq_paired_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:02:44.577183829" + }, + "test_cat_fastq_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:03:10.603734777" + }, + "test_cat_fastq_paired_end_same_name - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:03:46.041808828" + }, + "test_cat_fastq_single_end_same_name - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:03:34.13865402" + }, + "test_cat_fastq_paired_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:02:19.64383573" + }, + "test_cat_fastq_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:03:22.597246066" + }, + "test_cat_fastq_single_end_single_file - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:03:58.44849001" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/tests/tags.yml b/modules/nf-core/cat/fastq/tests/tags.yml new file mode 100644 index 00000000..6ac43614 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/tags.yml @@ -0,0 +1,2 @@ +cat/fastq: + - modules/nf-core/cat/fastq/** diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml new file mode 100644 index 00000000..26d4aca5 --- /dev/null +++ b/modules/nf-core/fastp/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fastp=0.23.4 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 00000000..e1b9f565 --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,125 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : + 'biocontainers/fastp:0.23.4--h5f740d0_0' }" + + input: + tuple val(meta), path(reads) + path adapter_fasta + val discard_trimmed_pass + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_1.fastp.fastq.gz" ) + def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_2.fastp.fastq.gz" + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + $out_fq1 \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + $out_fq1 \\ + $out_fq2 \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = (discard_trimmed_pass) ? "" : (is_single_output) ? "echo '' | gzip > ${prefix}.fastp.fastq.gz" : "echo '' | gzip > ${prefix}_1.fastp.fastq.gz ; echo '' | gzip > ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "echo '' | gzip > ${prefix}.merged.fastq.gz" : "" + def touch_fail_fastq = (!save_trimmed_fail) ? "" : meta.single_end ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "echo '' | gzip > ${prefix}.paired.fail.fastq.gz ; echo '' | gzip > ${prefix}_1.fail.fastq.gz ; echo '' | gzip > ${prefix}_2.fail.fastq.gz" + """ + $touch_reads + $touch_fail_fastq + $touch_merged + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 00000000..159404d0 --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,113 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] + identifier: biotools:fastp +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + - - discard_trimmed_pass: + type: boolean + description: Specify true to not write any reads that pass trimming thresholds. + | This can be used to use fastp for the output report only. + - - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds + ending in `*.fail.fastq.gz` + - - save_merged: + type: boolean + description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz` +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastp.fastq.gz": + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: Results in JSON format + pattern: "*.json" + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: Results in HTML format + pattern: "*.html" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: fastq log file + pattern: "*.log" + - reads_fail: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fail.fastq.gz": + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 00000000..30dbb8aa --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,576 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end") { + + when { + + process { + """ + adapter_fasta = [] + save_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("fastp test_fastp_interleaved") { + + config './nextflow.interleaved.config' + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("paired end (151 cycles + 151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert process.out.reads_fail == [] }, + { assert process.out.reads_merged == [] }, + { assert snapshot( + process.out.reads, + process.out.json, + process.out.versions).match() } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + config './nextflow.save_failed.config' + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.json, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total reads: 75") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() }, + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) + input[2] = false + input[3] = false + input[4] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("
") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total bases: 13683") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_single_end_qc_only") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_qc_only") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_single_end - stub") { + + options "-stub" + + when { + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end - stub") { + + options "-stub" + + when { + + process { + """ + adapter_fasta = [] + save_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fastp - stub test_fastp_interleaved") { + + options "-stub" + + config './nextflow.interleaved.config' + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_single_end_trim_fail - stub") { + + options "-stub" + + when { + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_trim_fail - stub") { + + options "-stub" + + config './nextflow.save_failed.config' + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_merged - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) + input[2] = false + input[3] = false + input[4] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_single_end_qc_only - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_qc_only - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 00000000..54be7e45 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,1331 @@ +{ + "test_fastp_single_end_qc_only - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T14:31:10.841098" + }, + "test_fastp_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:28.665779" + }, + "test_fastp_paired_end_merged_adapterlist": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,5914ca3f21ce162123a824e33e8564f6" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:44:18.210375" + }, + "test_fastp_single_end_qc_only": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,5cc5f01e449309e0e689ed6f51a2294a" + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:44:27.380974" + }, + "test_fastp_paired_end_trim_fail": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,6ff32a64c5188b9a9192be1398c262c7", + "test_2.fastp.fastq.gz:md5,db0cb7c9977e94ac2b4b446ebd017a8a" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,409b687c734cedd7a1fec14d316e1366", + "test_1.fail.fastq.gz:md5,4f273cf3159c13f79e8ffae12f5661f6", + "test_2.fail.fastq.gz:md5,f97b9edefb5649aab661fbc9e71fc995" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,4c3268ddb50ea5b33125984776aa3519" + ] + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:58.749589" + }, + "fastp - stub test_fastp_interleaved": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:50:00.270029" + }, + "test_fastp_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:49:42.502789" + }, + "test_fastp_paired_end_merged_adapterlist - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:54:53.458252" + }, + "test_fastp_paired_end_merged - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:50:27.689379" + }, + "test_fastp_paired_end_merged": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,b712fd68ed0322f4bec49ff2a5237fcc" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:44:08.68476" + }, + "test_fastp_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:49:51.679221" + }, + "test_fastp_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:18.834322" + }, + "test_fastp_single_end_trim_fail - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T14:05:36.898142" + }, + "test_fastp_paired_end_trim_fail - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T14:05:49.212847" + }, + "fastp test_fastp_interleaved": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,217d62dc13a23e92513a1bd8e1bcea39" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,b24e0624df5cc0b11cd5ba21b726fb22" + ] + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:38.910832" + }, + "test_fastp_single_end_trim_fail": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,3e4aaadb66a5b8fc9b881bf39c227abd" + ] + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:48.22378" + }, + "test_fastp_paired_end_qc_only": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,623064a45912dac6f2b64e3f2e9901df" + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:44:36.334938" + }, + "test_fastp_paired_end_qc_only - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T14:31:27.096468" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.interleaved.config b/modules/nf-core/fastp/tests/nextflow.interleaved.config new file mode 100644 index 00000000..4be8dbd2 --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.interleaved.config @@ -0,0 +1,5 @@ +process { + withName: FASTP { + ext.args = "--interleaved_in -e 30" + } +} diff --git a/modules/nf-core/fastp/tests/nextflow.save_failed.config b/modules/nf-core/fastp/tests/nextflow.save_failed.config new file mode 100644 index 00000000..53b61b0c --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.save_failed.config @@ -0,0 +1,5 @@ +process { + withName: FASTP { + ext.args = "-e 30" + } +} diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml new file mode 100644 index 00000000..c1afcce7 --- /dev/null +++ b/modules/nf-core/fastp/tests/tags.yml @@ -0,0 +1,2 @@ +fastp: + - modules/nf-core/fastp/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 00000000..691d4c76 --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 00000000..752c3a10 --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,64 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ _old_name, new_name -> new_name }.join(' ') + + // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) + // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 + // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + + fastqc \\ + $args \\ + --threads $task.cpus \\ + --memory $fastqc_memory \\ + $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml new file mode 100644 index 00000000..2b2e62b8 --- /dev/null +++ b/modules/nf-core/fastqc/meta.yml @@ -0,0 +1,67 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] + identifier: biotools:fastqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.zip": + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 00000000..e9d79a07 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,309 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("sarscov2 single-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 interleaved [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 multiple [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 custom_prefix - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 00000000..d5db3092 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,392 @@ +{ + "sarscov2 custom_prefix": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:16.374038" + }, + "sarscov2 single-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:24.993809" + }, + "sarscov2 custom_prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:03:10.93942" + }, + "sarscov2 interleaved [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:42.355718" + }, + "sarscov2 paired-end [bam]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:53.276274" + }, + "sarscov2 multiple [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:05.527626" + }, + "sarscov2 paired-end [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:31.188871" + }, + "sarscov2 paired-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:34.273566" + }, + "sarscov2 multiple [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:03:02.304411" + }, + "sarscov2 single-end [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:19.095607" + }, + "sarscov2 interleaved [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:44.640184" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:53.550742" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 00000000..7834294b --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/gatk4/bedtointervallist/environment.yml b/modules/nf-core/gatk4/bedtointervallist/environment.yml new file mode 100644 index 00000000..1f7d0824 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf new file mode 100644 index 00000000..89960e04 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -0,0 +1,56 @@ +process GATK4_BEDTOINTERVALLIST { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(bed) + tuple val(meta2), path(dict) + + output: + tuple val(meta), path('*.interval_list'), emit: interval_list + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BedToIntervalList \\ + --INPUT $bed \\ + --OUTPUT ${prefix}.interval_list \\ + --SEQUENCE_DICTIONARY $dict \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml new file mode 100644 index 00000000..25348e16 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -0,0 +1,62 @@ +name: gatk4_bedtointervallist +description: Creates an interval list from a bed file and a reference dict +keywords: + - bed + - bedtointervallist + - gatk4 + - interval list +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - bed: + type: file + description: Input bed file + pattern: "*.bed" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: Sequence dictionary + pattern: "*.dict" +output: + - interval_list: + - meta: + type: file + description: gatk interval list file + pattern: "*.interval_list" + - "*.interval_list": + type: file + description: gatk interval list file + pattern: "*.interval_list" + - _list: + type: file + description: gatk interval list file + pattern: "*.interval_list" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@ramprasadn" +maintainers: + - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test new file mode 100644 index 00000000..2289f73f --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process GATK4_BEDTOINTERVALLIST" + script "../main.nf" + process "GATK4_BEDTOINTERVALLIST" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/bedtointervallist" + + test("test_gatk4_bedtointervallist") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + input[1] = [ [ id:'dict' ], // meta map + [file(params.modules_testdata_base_path + + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap new file mode 100644 index 00000000..6936cf97 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test_gatk4_bedtointervallist": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c" + ] + ], + "1": [ + "versions.yml:md5,6b3aa4d49cc3ba433ecf92e31f155d00" + ], + "interval_list": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c" + ] + ], + "versions": [ + "versions.yml:md5,6b3aa4d49cc3ba433ecf92e31f155d00" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:37:25.720782902" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml new file mode 100644 index 00000000..b4d54f12 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/bedtointervallist: + - "modules/nf-core/gatk4/bedtointervallist/**" diff --git a/modules/nf-core/gatk4/createsequencedictionary/environment.yml b/modules/nf-core/gatk4/createsequencedictionary/environment.yml new file mode 100644 index 00000000..1f7d0824 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf new file mode 100644 index 00000000..998622a0 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -0,0 +1,52 @@ +process GATK4_CREATESEQUENCEDICTIONARY { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.dict') , emit: dict + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 6144 + if (!task.memory) { + log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSequenceDictionary \\ + --REFERENCE $fasta \\ + --URI $fasta \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta.baseName}.dict + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml new file mode 100644 index 00000000..7b5156bb --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -0,0 +1,49 @@ +name: gatk4_createsequencedictionary +description: Creates a sequence dictionary for a reference sequence +keywords: + - createsequencedictionary + - dictionary + - fasta + - gatk4 +tools: + - gatk: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fasta,fa}" +output: + - dict: + - meta: + type: file + description: gatk dictionary file + pattern: "*.{dict}" + - "*.dict": + type: file + description: gatk dictionary file + pattern: "*.{dict}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@ramprasadn" +maintainers: + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test new file mode 100644 index 00000000..a8a9c6d2 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process GATK4_CREATESEQUENCEDICTIONARY" + script "../main.nf" + process "GATK4_CREATESEQUENCEDICTIONARY" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/createsequencedictionary" + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap new file mode 100644 index 00000000..e8a600fd --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.dict:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,e993b2c99f7f6b0fcd8428de15c61439" + ], + "dict": [ + [ + { + "id": "test" + }, + "genome.dict:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e993b2c99f7f6b0fcd8428de15c61439" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:51:56.155954077" + }, + "sarscov2 - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.dict:md5,7362679f176e0f52add03c08f457f646" + ] + ], + "1": [ + "versions.yml:md5,e993b2c99f7f6b0fcd8428de15c61439" + ], + "dict": [ + [ + { + "id": "test" + }, + "genome.dict:md5,7362679f176e0f52add03c08f457f646" + ] + ], + "versions": [ + "versions.yml:md5,e993b2c99f7f6b0fcd8428de15c61439" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:51:45.562993875" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml b/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml new file mode 100644 index 00000000..035c5e4c --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/createsequencedictionary: + - "modules/nf-core/gatk4/createsequencedictionary/**" diff --git a/modules/nf-core/gatk4/markduplicates/environment.yml b/modules/nf-core/gatk4/markduplicates/environment.yml new file mode 100644 index 00000000..ec65c32d --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/environment.yml @@ -0,0 +1,15 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.19.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.19.2 diff --git a/modules/nf-core/gatk4/markduplicates/main.nf b/modules/nf-core/gatk4/markduplicates/main.nf new file mode 100644 index 00000000..cf770308 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/main.nf @@ -0,0 +1,85 @@ +process GATK4_MARKDUPLICATES { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:7cc3d06cbf42e28c5e2ebfc7c858654c7340a9d5-0': + 'biocontainers/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:7cc3d06cbf42e28c5e2ebfc7c858654c7340a9d5-0' }" + + input: + tuple val(meta), path(bam) + path fasta + path fasta_fai + + output: + tuple val(meta), path("*cram"), emit: cram, optional: true + tuple val(meta), path("*bam"), emit: bam, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.metrics"), emit: metrics + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.bam" + + // If the extension is CRAM, then change it to BAM + prefix_bam = prefix.tokenize('.')[-1] == 'cram' ? "${prefix.substring(0, prefix.lastIndexOf('.'))}.bam" : prefix + + def input_list = bam.collect{"--INPUT $it"}.join(' ') + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + // Using samtools and not Markduplicates to compress to CRAM speeds up computation: + // https://medium.com/@acarroll.dna/looking-at-trade-offs-in-compression-levels-for-genomics-tools-eec2834e8b94 + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MarkDuplicates \\ + $input_list \\ + --OUTPUT ${prefix_bam} \\ + --METRICS_FILE ${prefix}.metrics \\ + --TMP_DIR . \\ + ${reference} \\ + $args + + # If cram files are wished as output, the run samtools for conversion + if [[ ${prefix} == *.cram ]]; then + samtools view -Ch -T ${fasta} -o ${prefix} ${prefix_bam} + rm ${prefix_bam} + samtools index ${prefix} + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.bam" + prefix_no_suffix = task.ext.prefix ? prefix.tokenize('.')[0] : "${meta.id}" + """ + touch ${prefix_no_suffix}.bam + touch ${prefix_no_suffix}.cram + touch ${prefix_no_suffix}.cram.crai + touch ${prefix_no_suffix}.bai + touch ${prefix}.metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/markduplicates/meta.yml b/modules/nf-core/gatk4/markduplicates/meta.yml new file mode 100644 index 00000000..4772c5f3 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/meta.yml @@ -0,0 +1,102 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where + duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - bam + - gatk4 + - markduplicates + - sort +tools: + - gatk4: + description: Developed in the Data Sciences Platform at the Broad Institute, the + toolkit offers a wide variety of tools with a primary focus on variant discovery + and genotyping. Its powerful processing engine and high-performance computing + features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - - fasta: + type: file + description: Fasta file + pattern: "*.{fasta}" + - - fasta_fai: + type: file + description: Fasta index file + pattern: "*.{fai}" +output: + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*cram": + type: file + description: Marked duplicates CRAM file + pattern: "*.{cram}" + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*bam": + type: file + description: Marked duplicates BAM file + pattern: "*.{bam}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file + pattern: "*.{cram.crai}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM index file + pattern: "*.{bam.bai}" + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.metrics": + type: file + description: Duplicate metrics file generated by GATK + pattern: "*.{metrics.txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/markduplicates/tests/bam.config b/modules/nf-core/gatk4/markduplicates/tests/bam.config new file mode 100644 index 00000000..0bbfbac3 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/bam.config @@ -0,0 +1,8 @@ +process { + + withName: GATK4_MARKDUPLICATES { + ext.args = '--CREATE_INDEX true' + ext.prefix = { "${meta.id}.bam" } + } + +} diff --git a/modules/nf-core/gatk4/markduplicates/tests/cram.config b/modules/nf-core/gatk4/markduplicates/tests/cram.config new file mode 100644 index 00000000..04a9b074 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/cram.config @@ -0,0 +1,8 @@ +process { + + withName: GATK4_MARKDUPLICATES { + ext.args = '--CREATE_INDEX true' + ext.prefix = { "${meta.id}.cram" } + } + +} diff --git a/modules/nf-core/gatk4/markduplicates/tests/main.nf.test b/modules/nf-core/gatk4/markduplicates/tests/main.nf.test new file mode 100644 index 00000000..bbcf74db --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/main.nf.test @@ -0,0 +1,126 @@ +nextflow_process { + + name "Test Process GATK4_MARKDUPLICATES" + script "../main.nf" + process "GATK4_MARKDUPLICATES" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/markduplicates" + + test("sarscov2 - bam") { + config "./bam.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.bam).match("bam") }, + { assert snapshot(process.out.bai).match("bai") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert snapshot(file(process.out.metrics[0][1]).name).match("test.metrics") } + ) + } + } + + test("homo_sapiens - multiple bam") { + config "./bam.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ] + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.bam).match("multi bam") }, + { assert snapshot(process.out.bai).match("multi bai") }, + { assert snapshot(process.out.versions).match("multi versions") }, + { assert snapshot(file(process.out.metrics[0][1]).name).match("multi test.metrics") } + ) + } + + } + + test("homo_sapiens - multiple cram") { + config "./cram.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("multi cram")}, + { assert snapshot(file(process.out.crai[0][1]).name).match("multi crai") }, + { assert snapshot(process.out.versions).match("multi cram versions") }, + { assert snapshot(file(process.out.metrics[0][1]).name).match("multi cram test.metrics") } + ) + } + + } + + test("stub") { + config "./bam.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [] + ] + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/markduplicates/tests/main.nf.test.snap b/modules/nf-core/gatk4/markduplicates/tests/main.nf.test.snap new file mode 100644 index 00000000..336bb373 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/main.nf.test.snap @@ -0,0 +1,160 @@ +{ + "multi bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,8a808b1a94d2627c4d659a2151c4cb9f" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T15:21:36.059923" + }, + "multi crai": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:43:37.780426007" + }, + "multi bai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bai:md5,38b99c5f771895ecf5324c3186b9d452" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T15:21:36.09642" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,c58bf16c6e3786cc4d17bb7249f9ffe5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T15:21:08.710549" + }, + "multi test.metrics": { + "content": [ + "test.bam.metrics" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:43:11.732892667" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bai:md5,26001bcdbce12e9f07557d8f7b8d360e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:42:39.651888758" + }, + "multi cram versions": { + "content": [ + [ + "versions.yml:md5,c58bf16c6e3786cc4d17bb7249f9ffe5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T15:21:56.966376" + }, + "multi versions": { + "content": [ + [ + "versions.yml:md5,c58bf16c6e3786cc4d17bb7249f9ffe5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T15:21:36.138095" + }, + "multi cram test.metrics": { + "content": [ + "test.cram.metrics" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:43:37.798977444" + }, + "multi cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:43:37.771137858" + }, + "bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,75d914ba8804eaf2acf02ab432197ec9" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T15:21:08.645892" + }, + "test.metrics": { + "content": [ + "test.bam.metrics" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:42:39.672508385" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/markduplicates/tests/tags.yml b/modules/nf-core/gatk4/markduplicates/tests/tags.yml new file mode 100644 index 00000000..8632e32b --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/markduplicates: + - "modules/nf-core/gatk4/markduplicates/**" diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml new file mode 100644 index 00000000..ee239841 --- /dev/null +++ b/modules/nf-core/gffread/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gffread=0.12.7 diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf new file mode 100644 index 00000000..da55cbab --- /dev/null +++ b/modules/nf-core/gffread/main.nf @@ -0,0 +1,60 @@ +process GFFREAD { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gffread:0.12.7--hdcf5f25_4' : + 'biocontainers/gffread:0.12.7--hdcf5f25_4' }" + + input: + tuple val(meta), path(gff) + path fasta + + output: + tuple val(meta), path("*.gtf") , emit: gtf , optional: true + tuple val(meta), path("*.gff3") , emit: gffread_gff , optional: true + tuple val(meta), path("*.fasta"), emit: gffread_fasta , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) + def fasta_arg = fasta ? "-g $fasta" : '' + def output_name = "${prefix}.${extension}" + def output = extension == "fasta" ? "$output_name" : "-o $output_name" + def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim() + // args_sorted = Move '-w', '-x', and '-y' to the end of the args string as gffread expects the file name after these parameters + if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + gffread \\ + $gff \\ + $fasta_arg \\ + $args_sorted \\ + $output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) + def output_name = "${prefix}.${extension}" + if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch $output_name + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml new file mode 100644 index 00000000..bebe7f57 --- /dev/null +++ b/modules/nf-core/gffread/meta.yml @@ -0,0 +1,75 @@ +name: gffread +description: Validate, filter, convert and perform various other operations on GFF + files +keywords: + - gff + - conversion + - validation +tools: + - gffread: + description: GFF/GTF utility providing format conversions, region filtering, FASTA + sequence extraction and more. + homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + tool_dev_url: https://github.com/gpertea/gffread + doi: 10.12688/f1000research.23297.1 + licence: ["MIT"] + identifier: biotools:gffread +input: + - - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - gff: + type: file + description: A reference file in either the GFF3, GFF2 or GTF format. + pattern: "*.{gff, gtf}" + - - fasta: + type: file + description: A multi-fasta file with the genomic sequences + pattern: "*.{fasta,fa,faa,fas,fsa}" +output: + - gtf: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.gtf": + type: file + description: GTF file resulting from the conversion of the GFF input file if + '-T' argument is present + pattern: "*.{gtf}" + - gffread_gff: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.gff3": + type: file + description: GFF3 file resulting from the conversion of the GFF input file if + '-T' argument is absent + pattern: "*.gff3" + - gffread_fasta: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.fasta": + type: file + description: Fasta file produced when either of '-w', '-x', '-y' parameters + is present + pattern: "*.fasta" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" +maintainers: + - "@edmundmiller" + - "@gallvp" diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test new file mode 100644 index 00000000..d039f367 --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -0,0 +1,224 @@ +nextflow_process { + + name "Test Process GFFREAD" + script "../main.nf" + process "GFFREAD" + + tag "gffread" + tag "modules_nfcore" + tag "modules" + + test("sarscov2-gff3-gtf") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gffread_gff == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gtf-stub") { + + options '-stub' + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gffread_gff == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gff3") { + + config "./nextflow-gff3.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gff3-stub") { + + options '-stub' + config "./nextflow-gff3.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-fasta") { + + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-fasta-stub") { + + options '-stub' + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-fasta-fail-catch") { + + options '-stub' + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'genome'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert ! process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap new file mode 100644 index 00000000..15262320 --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test.snap @@ -0,0 +1,272 @@ +{ + "sarscov2-gff3-gtf": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" + ] + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:48:56.496187" + }, + "sarscov2-gff3-gff3": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" + ] + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:49:00.892782" + }, + "sarscov2-gff3-gtf-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:26.975666" + }, + "sarscov2-gff3-fasta-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gffread_gff": [ + + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:44.34792" + }, + "sarscov2-gff3-gff3-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:35.221671" + }, + "sarscov2-gff3-fasta": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" + ] + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + [ + { + "id": "test" + }, + "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" + ] + ], + "gffread_gff": [ + + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:54:02.88143" + } +} \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/nextflow-fasta.config b/modules/nf-core/gffread/tests/nextflow-fasta.config new file mode 100644 index 00000000..ac6cb148 --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow-fasta.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-w -S' + } +} diff --git a/modules/nf-core/gffread/tests/nextflow-gff3.config b/modules/nf-core/gffread/tests/nextflow-gff3.config new file mode 100644 index 00000000..afe0830e --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow-gff3.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '' + } +} diff --git a/modules/nf-core/gffread/tests/nextflow.config b/modules/nf-core/gffread/tests/nextflow.config new file mode 100644 index 00000000..74b25094 --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-T' + } +} diff --git a/modules/nf-core/gffread/tests/tags.yml b/modules/nf-core/gffread/tests/tags.yml new file mode 100644 index 00000000..05576065 --- /dev/null +++ b/modules/nf-core/gffread/tests/tags.yml @@ -0,0 +1,2 @@ +gffread: + - modules/nf-core/gffread/** diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..6f5b867b --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::multiqc=1.25.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 00000000..cc0643e1 --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,63 @@ +process MULTIQC { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + path(replace_names) + path(sample_names) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $prefix \\ + $extra_config \\ + $logo \\ + $replace \\ + $samples \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + mkdir multiqc_data + mkdir multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml new file mode 100644 index 00000000..b16c1879 --- /dev/null +++ b/modules/nf-core/multiqc/meta.yml @@ -0,0 +1,78 @@ +name: multiqc +description: Aggregate results from bioinformatics analyses across many samples into + a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] + identifier: biotools:multiqc +input: + - - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + - - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" +output: + - report: + - "*multiqc_report.html": + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..33316a7d --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + config "./nextflow.config" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..2fcbb5ff --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:51:46.317523" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:52:20.680978" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:52:09.185842" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config new file mode 100644 index 00000000..c537a6a3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = null + } +} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/picard/collectinsertsizemetrics/environment.yml b/modules/nf-core/picard/collectinsertsizemetrics/environment.yml new file mode 100644 index 00000000..1d715d56 --- /dev/null +++ b/modules/nf-core/picard/collectinsertsizemetrics/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::picard=3.3.0 diff --git a/modules/nf-core/picard/collectinsertsizemetrics/main.nf b/modules/nf-core/picard/collectinsertsizemetrics/main.nf new file mode 100644 index 00000000..c3014d80 --- /dev/null +++ b/modules/nf-core/picard/collectinsertsizemetrics/main.nf @@ -0,0 +1,65 @@ +process PICARD_COLLECTINSERTSIZEMETRICS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : + 'biocontainers/picard:3.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.txt"), emit: metrics + tuple val(meta), path("*.pdf"), emit: histogram + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectInsertSizeMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + picard \\ + -Xmx${avail_mem}M \\ + CollectInsertSizeMetrics \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.txt \\ + --Histogram_FILE ${prefix}.pdf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectInsertSizeMetrics --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectInsertSizeMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } + """ + touch ${prefix}.pdf + touch ${prefix}.txt + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectInsertSizeMetrics --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + + +} diff --git a/modules/nf-core/picard/collectinsertsizemetrics/meta.yml b/modules/nf-core/picard/collectinsertsizemetrics/meta.yml new file mode 100644 index 00000000..0947048d --- /dev/null +++ b/modules/nf-core/picard/collectinsertsizemetrics/meta.yml @@ -0,0 +1,56 @@ +name: "picard_collectinsertsizemetrics" +description: Collect metrics about the insert size distribution of a paired-end library. +keywords: + - metrics + - alignment + - insert + - statistics + - bam +tools: + - "picard": + description: "Java tools for working with NGS data in the BAM format" + homepage: "https://broadinstitute.github.io/picard/" + documentation: "https://broadinstitute.github.io/picard/" + tool_dev_url: "https://github.com/broadinstitute/picard" + licence: ["MIT"] + identifier: biotools:picard_tools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: Values used by Picard to generate the insert size histograms + pattern: "*.txt" + - histogram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.pdf": + type: file + description: Insert size histogram in PDF format + pattern: "*.pdf" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FerriolCalvet" +maintainers: + - "@FerriolCalvet" diff --git a/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test b/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test new file mode 100644 index 00000000..4cf7a332 --- /dev/null +++ b/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test @@ -0,0 +1,62 @@ + +nextflow_process { + + name "Test Process PICARD_COLLECTINSERTSIZEMETRICS" + script "../main.nf" + process "PICARD_COLLECTINSERTSIZEMETRICS" + + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/collectinsertsizemetrics" + + test("test-picard-collectinsertsizemetrics") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.metrics[0][1]).readLines()[5..8], + file(process.out.histogram[0][1]).name, + process.out.versions + ).match() + } + ) + } + } + + test("test-picard-collectinsertsizemetrics-stub") { + options '-stub' + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test.snap b/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test.snap new file mode 100644 index 00000000..cbe9329d --- /dev/null +++ b/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "test-picard-collectinsertsizemetrics": { + "content": [ + [ + "## METRICS CLASS\tpicard.analysis.InsertSizeMetrics", + "MEDIAN_INSERT_SIZE\tMODE_INSERT_SIZE\tMEDIAN_ABSOLUTE_DEVIATION\tMIN_INSERT_SIZE\tMAX_INSERT_SIZE\tMEAN_INSERT_SIZE\tSTANDARD_DEVIATION\tREAD_PAIRS\tPAIR_ORIENTATION\tWIDTH_OF_10_PERCENT\tWIDTH_OF_20_PERCENT\tWIDTH_OF_30_PERCENT\tWIDTH_OF_40_PERCENT\tWIDTH_OF_50_PERCENT\tWIDTH_OF_60_PERCENT\tWIDTH_OF_70_PERCENT\tWIDTH_OF_80_PERCENT\tWIDTH_OF_90_PERCENT\tWIDTH_OF_95_PERCENT\tWIDTH_OF_99_PERCENT\tSAMPLE\tLIBRARY\tREAD_GROUP", + "209\t159\t46\t77\t364\t207.659794\t66.769018\t97\tFR\t25\t49\t59\t77\t93\t123\t145\t183\t223\t255\t311\t\t\t", + "" + ], + "test.pdf", + [ + "versions.yml:md5,38d39e9882afe7ac015213c286745056" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:55:45.769771444" + }, + "test-picard-collectinsertsizemetrics-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,38d39e9882afe7ac015213c286745056" + ], + "histogram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,38d39e9882afe7ac015213c286745056" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:56:09.914953495" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/collectrnaseqmetrics/environment.yml b/modules/nf-core/picard/collectrnaseqmetrics/environment.yml new file mode 100644 index 00000000..1d715d56 --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::picard=3.3.0 diff --git a/modules/nf-core/picard/collectrnaseqmetrics/main.nf b/modules/nf-core/picard/collectrnaseqmetrics/main.nf new file mode 100644 index 00000000..eb80fdc7 --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/main.nf @@ -0,0 +1,62 @@ +process PICARD_COLLECTRNASEQMETRICS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : + 'biocontainers/picard:3.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + path ref_flat + path fasta + path rrna_intervals + + output: + tuple val(meta), path("*.rna_metrics") , emit: metrics + tuple val(meta), path("*.pdf") , emit: pdf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + def rrna = rrna_intervals ? "--RIBOSOMAL_INTERVALS ${rrna_intervals}" : "" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectRnaSeqMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + picard \\ + -Xmx${avail_mem}M \\ + CollectRnaSeqMetrics \\ + $args \\ + $reference \\ + $rrna \\ + --REF_FLAT $ref_flat \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.rna_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectRnaSeqMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.rna_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectRnaSeqMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/picard/collectrnaseqmetrics/meta.yml b/modules/nf-core/picard/collectrnaseqmetrics/meta.yml new file mode 100644 index 00000000..15d146ba --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/meta.yml @@ -0,0 +1,68 @@ +name: "picard_collectrnaseqmetrics" +description: Collect metrics from a RNAseq BAM file +keywords: + - rna + - bam + - metrics + - alignment + - statistics + - quality +tools: + - "picard": + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: "https://broadinstitute.github.io/picard/" + documentation: "https://broadinstitute.github.io/picard/" + tool_dev_url: "https://github.com/broadinstitute/picard" + licence: ["MIT"] + identifier: biotools:picard_tools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false, strandedness:true ] + - bam: + type: file + description: BAM/SAM file + pattern: "*.{bam,sam}" + - - ref_flat: + type: file + description: Genome ref_flat file + - - fasta: + type: file + description: Genome fasta file + - - rrna_intervals: + type: file + description: Interval file of ribosomal RNA regions +output: + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.rna_metrics": + type: file + description: RNA alignment metrics files generated by picard + pattern: "*.rna_metrics" + - pdf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.pdf": + type: file + description: Plot normalized position vs. coverage in a pdf file generated by + picard + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test new file mode 100644 index 00000000..9ab18552 --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process PICARD_COLLECTRNASEQMETRICS" + script "../main.nf" + process "PICARD_COLLECTRNASEQMETRICS" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/collectrnaseqmetrics" + tag "ucsc/gtftogenepred" + + setup { + run("UCSC_GTFTOGENEPRED") { + script "../../../ucsc/gtftogenepred/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + test("sarscov2 - fasta - gtf") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false, strandedness:'forward' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = UCSC_GTFTOGENEPRED.out.refflat.map{ it[1] } + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.metrics[0][1]).text.contains('CollectRnaSeqMetrics') }, + { assert snapshot( + process.out.versions, + process.out.pdf + ).match() } + ) + } + } + + test("sarscov2 - fasta - gtf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false, strandedness:'forward' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = UCSC_GTFTOGENEPRED.out.refflat.map{ it[1] } + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test.snap b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test.snap new file mode 100644 index 00000000..ad6503af --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "sarscov2 - fasta - gtf": { + "content": [ + [ + "versions.yml:md5,8d0867f89947c081711de09338138c2e" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T16:10:39.199344417" + }, + "sarscov2 - fasta - gtf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false, + "strandedness": "forward" + }, + "test.rna_metrics:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,8d0867f89947c081711de09338138c2e" + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false, + "strandedness": "forward" + }, + "test.rna_metrics:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pdf": [ + + ], + "versions": [ + "versions.yml:md5,8d0867f89947c081711de09338138c2e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T16:10:57.248132065" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/collectrnaseqmetrics/tests/nextflow.config b/modules/nf-core/picard/collectrnaseqmetrics/tests/nextflow.config new file mode 100644 index 00000000..bc82e10c --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/tests/nextflow.config @@ -0,0 +1,13 @@ +process { + withName:UCSC_GTFTOGENEPRED { + ext.args = "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" + } + withName:PICARD_COLLECTRNASEQMETRICS { + ext.args = { ( meta.strandedness == "forward" || meta.single_end ) ? + "--STRAND_SPECIFICITY FIRST_READ_TRANSCRIPTION_STRAND" : + meta.strandedness == "reverse" ? + "--STRAND_SPECIFICITY SECOND_READ_TRANSCRIPTION_STRAND" : + "--STRAND_SPECIFICITY NONE" + } + } +} diff --git a/modules/nf-core/picard/collectwgsmetrics/environment.yml b/modules/nf-core/picard/collectwgsmetrics/environment.yml new file mode 100644 index 00000000..13265842 --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::picard=3.3.0 + - conda-forge::r-base=4.4.1 diff --git a/modules/nf-core/picard/collectwgsmetrics/main.nf b/modules/nf-core/picard/collectwgsmetrics/main.nf new file mode 100644 index 00000000..39cf7d2b --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/main.nf @@ -0,0 +1,60 @@ +process PICARD_COLLECTWGSMETRICS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : + 'biocontainers/picard:3.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + path intervallist + + output: + tuple val(meta), path("*_metrics"), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3072 + def interval = intervallist ? "--INTERVALS ${intervallist}" : '' + if (!task.memory) { + log.info '[Picard CollectWgsMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + picard \\ + -Xmx${avail_mem}M \\ + CollectWgsMetrics \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectWgsMetrics.coverage_metrics \\ + --REFERENCE_SEQUENCE ${fasta} \\ + $interval + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectWgsMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectWgsMetrics.coverage_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectWgsMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/picard/collectwgsmetrics/meta.yml b/modules/nf-core/picard/collectwgsmetrics/meta.yml new file mode 100644 index 00000000..bb748080 --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/meta.yml @@ -0,0 +1,80 @@ +name: picard_collectwgsmetrics +description: Collect metrics about coverage and performance of whole genome sequencing + (WGS) experiments. +keywords: + - alignment + - metrics + - statistics + - quality + - bam +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] + identifier: biotools:picard_tools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Aligned reads file + pattern: "*.{bam, cram}" + - bai: + type: file + description: (Optional) Aligned reads file index + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta,fna}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Genome fasta file index + pattern: "*.{fai}" + - - intervallist: + type: file + description: Picard Interval List. Defines which contigs to include. Can be + generated from a BED file with GATK BedToIntervalList. +output: + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_metrics": + type: file + description: Alignment metrics files generated by picard + pattern: "*_{metrics}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@flowuenne" + - "@lassefolkersen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@flowuenne" + - "@lassefolkersen" + - "@ramprasadn" diff --git a/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test new file mode 100644 index 00000000..a3984566 --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test @@ -0,0 +1,83 @@ + +nextflow_process { + + name "Test Process PICARD_COLLECTWGSMETRICS" + script "../main.nf" + process "PICARD_COLLECTWGSMETRICS" + + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/collectwgsmetrics" + + test("test-picard-collectwgsmetrics") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + ] + input[1] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.metrics[0][1]).text.contains('coverage high_quality_coverage_count'), + process.out.versions + ).match() + } + ) + } + } + + test("test-picard-collectwgsmetrics-with-interval") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ] + input[1] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/picard/baits.interval_list', checkIfExists: true) + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.metrics[0][1]).text.contains('coverage high_quality_coverage_count'), + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test.snap b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test.snap new file mode 100644 index 00000000..1958fcde --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test.snap @@ -0,0 +1,28 @@ +{ + "test-picard-collectwgsmetrics-with-interval": { + "content": [ + true, + [ + "versions.yml:md5,9927db69fdd55176be5cdbd427d000c2" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:15:18.13771243" + }, + "test-picard-collectwgsmetrics": { + "content": [ + true, + [ + "versions.yml:md5,9927db69fdd55176be5cdbd427d000c2" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:14:57.786056996" + } +} \ No newline at end of file diff --git a/modules/nf-core/rrnatranscripts/environment.yml b/modules/nf-core/rrnatranscripts/environment.yml new file mode 100644 index 00000000..6f09494b --- /dev/null +++ b/modules/nf-core/rrnatranscripts/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - python=3.12.2 diff --git a/modules/nf-core/rrnatranscripts/main.nf b/modules/nf-core/rrnatranscripts/main.nf new file mode 100644 index 00000000..982c53cd --- /dev/null +++ b/modules/nf-core/rrnatranscripts/main.nf @@ -0,0 +1,43 @@ +process RRNATRANSCRIPTS { + tag "$gtf" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.12' : + 'biocontainers/python:3.12' }" + + input: + path(gtf) + + output: + path("*rrna_intervals.gtf") , emit: rrna_gtf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${gtf.baseName}" + """ + grep -E '^#|rRNA' ${gtf} > ${prefix}_rrna_intervals.gtf || true + if [ ! -s ${prefix}_rrna_intervals.gtf ]; then + rm ${prefix}_rrna_intervals.gtf + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed -e "s/Python //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${gtf.baseName}" + """ + touch ${prefix}_rrna_intervals.gtf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed -e "s/Python //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rrnatranscripts/meta.yml b/modules/nf-core/rrnatranscripts/meta.yml new file mode 100644 index 00000000..93f6a10e --- /dev/null +++ b/modules/nf-core/rrnatranscripts/meta.yml @@ -0,0 +1,34 @@ +name: rrnatranscripts +description: Ribosomal RNA extraction from a GTF file. +keywords: + - ribosomal + - rna + - genomics +tools: + - rrnatranscripts: + description: | + Extraction of ribosomal RNA + homepage: https://github.com/nf-core/rnafusion + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - gtf: + type: file + description: GTF file + pattern: "*.{gtf}" +output: + # + - rrna_gtf: + - "*rrna_intervals.gtf": + type: file + description: GTF file with ribosomal RNA only + pattern: "*.{gtf}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rannick" +maintainers: + - "@rannick" diff --git a/modules/nf-core/rrnatranscripts/templates/get_rrna_transcripts.py b/modules/nf-core/rrnatranscripts/templates/get_rrna_transcripts.py new file mode 100644 index 00000000..ea3ce0f4 --- /dev/null +++ b/modules/nf-core/rrnatranscripts/templates/get_rrna_transcripts.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 + +import logging +import platform +import sys +from pathlib import Path + +# Configure logging +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def get_rrna_intervals(gtf: str, rrna_transcripts: str): + """ + Get lines containing ``#`` or ``gene_type rRNA`` or ```` or ``gene_type rRNA_pseudogene`` or ``gene_type MT_rRNA`` + Create output file + + Args: + file_in (pathlib.Path): The given GTF file. + file_out (pathlib.Path): Where the ribosomal RNA GTF file should + be created; always in GTF format. + """ + patterns = { + "#", + 'transcript_biotype "Mt_rRNA"', + 'transcript_biotype "rRNA"', + 'transcript_biotype "rRNA_pseudogene"', + } + line_starts = {"MT", "1", "2", "3", "4", "5", "6", "7", "8", "9"} + out_lines = [] + path_gtf = Path(gtf) + path_rrna_transcripts = Path(rrna_transcripts) + if not path_gtf.is_file(): + logger.error(f"The given input file {gtf} was not found!") + sys.exit(2) + with path_gtf.open() as f: + data = f.readlines() + for line in data: + for pattern in patterns: + if pattern in line: + for line_start in line_starts: + if line.startswith(line_start): + out_lines.append(line) + if out_lines != []: + with path_rrna_transcripts.open(mode="w") as out_file: + out_file.writelines(out_lines) + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + + +if __name__ == "__main__": + if "${task.ext.prefix}" != "null": + prefix = "${task.ext.prefix}." + else: + prefix = "${task.ext.gtf}." + + if not get_rrna_intervals("$gtf", f"{prefix}_rrna_intervals.gtf"): + logging.error("Failed to extract rrna transcipts.") + + # Write the versions + versions_this_module = {} + versions_this_module["${task.process}"] = {"python": platform.python_version()} + with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions_this_module)) diff --git a/modules/nf-core/rrnatranscripts/tests/main.nf.test b/modules/nf-core/rrnatranscripts/tests/main.nf.test new file mode 100644 index 00000000..db0d5565 --- /dev/null +++ b/modules/nf-core/rrnatranscripts/tests/main.nf.test @@ -0,0 +1,53 @@ +nextflow_process { + + name "Test Process RRNATRANSCRIPTS" + script "../main.nf" + process "RRNATRANSCRIPTS" + + tag "modules" + tag "modules_nfcore" + tag "rrnatranscripts" + + test("homo_sapiens - gtf") { + + when { + process { + """ + input[0] = [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + ] """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - gtf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + ] """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/rrnatranscripts/tests/main.nf.test.snap b/modules/nf-core/rrnatranscripts/tests/main.nf.test.snap new file mode 100644 index 00000000..a190ddcb --- /dev/null +++ b/modules/nf-core/rrnatranscripts/tests/main.nf.test.snap @@ -0,0 +1,40 @@ +{ + "homo_sapiens - gtf": { + "content": [ + { + "0": [ + + ], + "1": [ + "versions.yml:md5,fa2f785dbbe87a180f4254910957e01e" + ], + "rrna_gtf": [ + + ], + "versions": [ + "versions.yml:md5,fa2f785dbbe87a180f4254910957e01e" + ] + } + ], + "timestamp": "2024-05-21T13:55:44.831609" + }, + "homo_sapiens - gtf - stub": { + "content": [ + { + "0": [ + "genome_rrna_intervals.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "versions.yml:md5,fa2f785dbbe87a180f4254910957e01e" + ], + "rrna_gtf": [ + "genome_rrna_intervals.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,fa2f785dbbe87a180f4254910957e01e" + ] + } + ], + "timestamp": "2024-05-21T13:55:56.134136" + } +} \ No newline at end of file diff --git a/modules/nf-core/rrnatranscripts/tests/tags.yml b/modules/nf-core/rrnatranscripts/tests/tags.yml new file mode 100644 index 00000000..ade15226 --- /dev/null +++ b/modules/nf-core/rrnatranscripts/tests/tags.yml @@ -0,0 +1,2 @@ +rrnatranscripts: + - "modules/nf-core/rrnatranscripts/**" diff --git a/modules/nf-core/salmon/index/environment.yml b/modules/nf-core/salmon/index/environment.yml new file mode 100644 index 00000000..b3f75777 --- /dev/null +++ b/modules/nf-core/salmon/index/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::salmon=1.10.3 diff --git a/modules/nf-core/salmon/index/main.nf b/modules/nf-core/salmon/index/main.nf new file mode 100644 index 00000000..3d653c0d --- /dev/null +++ b/modules/nf-core/salmon/index/main.nf @@ -0,0 +1,72 @@ +process SALMON_INDEX { + tag "$transcript_fasta" + label "process_medium" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/salmon:1.10.3--h6dccd9a_2' : + 'biocontainers/salmon:1.10.3--h6dccd9a_2' }" + + input: + path genome_fasta + path transcript_fasta + + output: + path "salmon" , emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt" + def gentrome = "gentrome.fa" + if (genome_fasta.endsWith('.gz')) { + get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt" + gentrome = "gentrome.fa.gz" + } + """ + $get_decoy_ids + sed -i.bak -e 's/>//g' decoys.txt + cat $transcript_fasta $genome_fasta > $gentrome + + salmon \\ + index \\ + --threads $task.cpus \\ + -t $gentrome \\ + -d decoys.txt \\ + $args \\ + -i salmon + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS + """ + + stub: + """ + mkdir salmon + touch salmon/complete_ref_lens.bin + touch salmon/ctable.bin + touch salmon/ctg_offsets.bin + touch salmon/duplicate_clusters.tsv + touch salmon/info.json + touch salmon/mphf.bin + touch salmon/pos.bin + touch salmon/pre_indexing.log + touch salmon/rank.bin + touch salmon/refAccumLengths.bin + touch salmon/ref_indexing.log + touch salmon/reflengths.bin + touch salmon/refseq.bin + touch salmon/seq.bin + touch salmon/versionInfo.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/salmon/index/meta.yml b/modules/nf-core/salmon/index/meta.yml new file mode 100644 index 00000000..48486a2b --- /dev/null +++ b/modules/nf-core/salmon/index/meta.yml @@ -0,0 +1,40 @@ +name: salmon_index +description: Create index for salmon +keywords: + - index + - fasta + - genome + - reference +tools: + - salmon: + description: | + Salmon is a tool for wicked-fast transcript quantification from RNA-seq data + homepage: https://salmon.readthedocs.io/en/latest/salmon.html + manual: https://salmon.readthedocs.io/en/latest/salmon.html + doi: 10.1038/nmeth.4197 + licence: ["GPL-3.0-or-later"] + identifier: biotools:salmon +input: + - - genome_fasta: + type: file + description: Fasta file of the reference genome + - - transcript_fasta: + type: file + description: Fasta file of the reference transcriptome +output: + - index: + - salmon: + type: directory + description: Folder containing the star index files + pattern: "salmon" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/salmon/index/tests/main.nf.test b/modules/nf-core/salmon/index/tests/main.nf.test new file mode 100644 index 00000000..16b3c1a7 --- /dev/null +++ b/modules/nf-core/salmon/index/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process SALMON_INDEX" + script "../main.nf" + process "SALMON_INDEX" + tag "modules" + tag "modules_nfcore" + tag "salmon" + tag "salmon/index" + + test("sarscov2") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true)]) + input[1] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.index.get(0)).exists() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true)]) + input[1] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.index.get(0)).exists() }, + { assert snapshot(process.out.versions).match("versions stub") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/salmon/index/tests/main.nf.test.snap b/modules/nf-core/salmon/index/tests/main.nf.test.snap new file mode 100644 index 00000000..e5899b51 --- /dev/null +++ b/modules/nf-core/salmon/index/tests/main.nf.test.snap @@ -0,0 +1,26 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,85337fa0a286ea35073ee5260974e307" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:00:47.087293189" + }, + "versions stub": { + "content": [ + [ + "versions.yml:md5,85337fa0a286ea35073ee5260974e307" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:01:03.89824494" + } +} \ No newline at end of file diff --git a/modules/nf-core/salmon/index/tests/tags.yml b/modules/nf-core/salmon/index/tests/tags.yml new file mode 100644 index 00000000..02997890 --- /dev/null +++ b/modules/nf-core/salmon/index/tests/tags.yml @@ -0,0 +1,2 @@ +salmon/index: + - modules/nf-core/salmon/index/** diff --git a/modules/nf-core/salmon/quant/environment.yml b/modules/nf-core/salmon/quant/environment.yml new file mode 100644 index 00000000..b3f75777 --- /dev/null +++ b/modules/nf-core/salmon/quant/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::salmon=1.10.3 diff --git a/modules/nf-core/salmon/quant/main.nf b/modules/nf-core/salmon/quant/main.nf new file mode 100644 index 00000000..f1e3b5cd --- /dev/null +++ b/modules/nf-core/salmon/quant/main.nf @@ -0,0 +1,96 @@ +process SALMON_QUANT { + tag "$meta.id" + label "process_medium" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/salmon:1.10.3--h6dccd9a_2' : + 'biocontainers/salmon:1.10.3--h6dccd9a_2' }" + + input: + tuple val(meta), path(reads) + path index + path gtf + path transcript_fasta + val alignment_mode + val lib_type + + output: + tuple val(meta), path("${prefix}") , emit: results + tuple val(meta), path("*info.json") , emit: json_info, optional: true + tuple val(meta), path("*lib_format_counts.json"), emit: lib_format_counts, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def reference = "--index $index" + def reads1 = [], reads2 = [] + meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } + def input_reads = meta.single_end ? "-r ${reads1.join(" ")}" : "-1 ${reads1.join(" ")} -2 ${reads2.join(" ")}" + if (alignment_mode) { + reference = "-t $transcript_fasta" + input_reads = "-a $reads" + } + + def strandedness_opts = [ + 'A', 'U', 'SF', 'SR', + 'IS', 'IU' , 'ISF', 'ISR', + 'OS', 'OU' , 'OSF', 'OSR', + 'MS', 'MU' , 'MSF', 'MSR' + ] + def strandedness = 'A' + if (lib_type) { + if (strandedness_opts.contains(lib_type)) { + strandedness = lib_type + } else { + log.info "[Salmon Quant] Invalid library type specified '--libType=${lib_type}', defaulting to auto-detection with '--libType=A'." + } + } else { + strandedness = meta.single_end ? 'U' : 'IU' + if (meta.strandedness == 'forward') { + strandedness = meta.single_end ? 'SF' : 'ISF' + } else if (meta.strandedness == 'reverse') { + strandedness = meta.single_end ? 'SR' : 'ISR' + } + } + """ + salmon quant \\ + --geneMap $gtf \\ + --threads $task.cpus \\ + --libType=$strandedness \\ + $reference \\ + $input_reads \\ + $args \\ + -o $prefix + + if [ -f $prefix/aux_info/meta_info.json ]; then + cp $prefix/aux_info/meta_info.json "${prefix}_meta_info.json" + fi + if [ -f $prefix/lib_format_counts.json ]; then + cp $prefix/lib_format_counts.json "${prefix}_lib_format_counts.json" + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + touch ${prefix}_meta_info.json + touch ${prefix}_lib_format_counts.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/salmon/quant/meta.yml b/modules/nf-core/salmon/quant/meta.yml new file mode 100644 index 00000000..4cacde0f --- /dev/null +++ b/modules/nf-core/salmon/quant/meta.yml @@ -0,0 +1,84 @@ +name: salmon_quant +description: gene/transcript quantification with Salmon +keywords: + - index + - fasta + - genome + - reference +tools: + - salmon: + description: | + Salmon is a tool for wicked-fast transcript quantification from RNA-seq data + homepage: https://salmon.readthedocs.io/en/latest/salmon.html + manual: https://salmon.readthedocs.io/en/latest/salmon.html + doi: 10.1038/nmeth.4197 + licence: ["GPL-3.0-or-later"] + identifier: biotools:salmon +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files for single-end or paired-end data. + Multiple single-end fastqs or pairs of paired-end fastqs are + handled. + - - index: + type: directory + description: Folder containing the star index files + - - gtf: + type: file + description: GTF of the reference transcriptome + - - transcript_fasta: + type: file + description: Fasta file of the reference transcriptome + - - alignment_mode: + type: boolean + description: whether to run salmon in alignment mode + - - lib_type: + type: string + description: | + Override library type inferred based on strandedness defined in meta object +output: + - results: + - meta: + type: directory + description: Folder containing the quantification results for a specific sample + pattern: "${prefix}" + - ${prefix}: + type: directory + description: Folder containing the quantification results for a specific sample + pattern: "${prefix}" + - json_info: + - meta: + type: file + description: File containing meta information from Salmon quant + pattern: "*info.json" + - "*info.json": + type: file + description: File containing meta information from Salmon quant + pattern: "*info.json" + - lib_format_counts: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*lib_format_counts.json": + type: file + description: File containing the library format counts + pattern: "*lib_format_counts.json" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/salmon/quant/tests/main.nf.test b/modules/nf-core/salmon/quant/tests/main.nf.test new file mode 100644 index 00000000..2964cc3d --- /dev/null +++ b/modules/nf-core/salmon/quant/tests/main.nf.test @@ -0,0 +1,320 @@ +nextflow_process { + + name "Test Process SALMON_QUANT" + script "../main.nf" + process "SALMON_QUANT" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "salmon" + tag "salmon/quant" + tag "salmon/index" + + setup { + run("SALMON_INDEX") { + script "../../../salmon/index/main.nf" + process { + """ + input[0] = Channel.of([file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true)]) + input[1] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + """ + } + } + } + + test("sarscov2 - single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - single_end stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - single_end lib type A") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = 'A' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - single_end lib type A stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = 'A' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - pair_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - pair_end stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - pair_end multiple") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - pair_end multiple stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } +} diff --git a/modules/nf-core/salmon/quant/tests/main.nf.test.snap b/modules/nf-core/salmon/quant/tests/main.nf.test.snap new file mode 100644 index 00000000..ea22a80c --- /dev/null +++ b/modules/nf-core/salmon/quant/tests/main.nf.test.snap @@ -0,0 +1,170 @@ +{ + "sarscov2 - single_end": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lib_format_counts.json:md5,c7999dfccd32c090d94e5951522eecd4" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:01:16.989080539" + }, + "sarscov2 - single_end stub": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lib_format_counts.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:01:29.340996235" + }, + "sarscov2 - single_end lib type A": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lib_format_counts.json:md5,c7999dfccd32c090d94e5951522eecd4" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:01:43.056167576" + }, + "sarscov2 - pair_end multiple": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_lib_format_counts.json:md5,4a2ee0fac91a4a3471872808d8bd3ff8" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:03:05.500792631" + }, + "sarscov2 - pair_end multiple stub": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_lib_format_counts.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:03:26.428959203" + }, + "sarscov2 - single_end lib type A stub": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lib_format_counts.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:02:03.420850208" + }, + "sarscov2 - pair_end": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lib_format_counts.json:md5,e9516e73c9fb39145513b2a41a0af95f" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:02:16.130074696" + }, + "sarscov2 - pair_end stub": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lib_format_counts.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:02:39.470004547" + } +} \ No newline at end of file diff --git a/modules/nf-core/salmon/quant/tests/nextflow.config b/modules/nf-core/salmon/quant/tests/nextflow.config new file mode 100644 index 00000000..37c08212 --- /dev/null +++ b/modules/nf-core/salmon/quant/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SALMON_QUANT { + ext.args = '--minAssignedFrags 1' + } + +} diff --git a/modules/nf-core/salmon/quant/tests/tags.yml b/modules/nf-core/salmon/quant/tests/tags.yml new file mode 100644 index 00000000..048d8164 --- /dev/null +++ b/modules/nf-core/salmon/quant/tests/tags.yml @@ -0,0 +1,2 @@ +salmon/quant: + - modules/nf-core/salmon/quant/** diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 00000000..28c0a81c --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,50 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + + output: + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + faidx \\ + $fasta \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + """ + ${fastacmd} + touch ${fasta}.fai + + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 00000000..6721b2cb --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,80 @@ +name: samtools_faidx +description: Index FASTA file +keywords: + - index + - fasta + - faidx +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" +output: + - fa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{fa,fasta}": + type: file + description: FASTA file + pattern: "*.{fa}" + - fai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fai": + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gzi": + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@phue" +maintainers: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test new file mode 100644 index 00000000..17244ef2 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FAIDX" + script "../main.nf" + process "SAMTOOLS_FAIDX" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/faidx" + + test("test_samtools_faidx") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_bgzip") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fasta") { + + config "./nextflow2.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fai") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap new file mode 100644 index 00000000..1bbb3ec2 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -0,0 +1,249 @@ +{ + "test_samtools_faidx": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:57:47.450887871" + }, + "test_samtools_faidx_bgzip": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:04.804905659" + }, + "test_samtools_faidx_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:23.831268154" + }, + "test_samtools_faidx_stub_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:35.600243706" + }, + "test_samtools_faidx_stub_fai": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:54.705460167" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config new file mode 100644 index 00000000..f76a3ba0 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = 'MT192765.1 -o extract.fa' + } + +} diff --git a/modules/nf-core/samtools/faidx/tests/nextflow2.config b/modules/nf-core/samtools/faidx/tests/nextflow2.config new file mode 100644 index 00000000..33ebbd5d --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow2.config @@ -0,0 +1,6 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = '-o extract.fa' + } +} diff --git a/modules/nf-core/samtools/faidx/tests/tags.yml b/modules/nf-core/samtools/faidx/tests/tags.yml new file mode 100644 index 00000000..e4a83948 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/faidx: + - modules/nf-core/samtools/faidx/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 00000000..31175610 --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" + """ + touch ${input}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 00000000..db8df0d5 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,71 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file +output: + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 00000000..0ed260ef --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 00000000..ca34fb5c --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,140 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi") { + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..72d65e81 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,250 @@ +{ + "csi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:25.261127166" + }, + "crai - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:12.653194876" + }, + "bai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:01.854932651" + }, + "csi": { + "content": [ + "test.paired_end.sorted.bam.csi", + [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:51.485364222" + }, + "crai": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:40.518873972" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:21.184050361" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 00000000..e0f58a7a --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf new file mode 100644 index 00000000..caf3c61a --- /dev/null +++ b/modules/nf-core/samtools/sort/main.nf @@ -0,0 +1,72 @@ +process SAMTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta) , path(bam) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + def reference = fasta ? "--reference ${fasta}" : "" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + """ + samtools cat \\ + ${bam} \\ + | \\ + samtools sort \\ + $args \\ + -T ${prefix} \\ + --threads $task.cpus \\ + ${reference} \\ + -o ${prefix}.${extension} \\ + - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + """ + touch ${prefix}.${extension} + if [ "${extension}" == "bam" ]; + then + touch ${prefix}.${extension}.csi + elif [ "${extension}" == "cram" ]; + then + touch ${prefix}.${extension}.crai + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml new file mode 100644 index 00000000..a9dbec5a --- /dev/null +++ b/modules/nf-core/samtools/sort/meta.yml @@ -0,0 +1,92 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file(s) + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + optional: true +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Sorted CRAM file + pattern: "*.{cram}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@matthdsm" +maintainers: + - "@drpatelh" + - "@ewels" + - "@matthdsm" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 00000000..b05e6691 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,192 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("multiple bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("cram") { + + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.cram.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.crai.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("bam - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("multiple bam - stub") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("cram - stub") { + + options "-stub" + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..469891fe --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,287 @@ +{ + "cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:49:58.207549273" + }, + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:50:08.630951018" + }, + "cram - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:50:19.061912443" + }, + "multiple bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:59:55.479443" + }, + "multiple bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:36:13.781404" + }, + "bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,34aa85e86abefe637f7a4a9887f016fc" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:59:46.372244" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 00000000..f642771f --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index" + } + +} diff --git a/modules/nf-core/samtools/sort/tests/nextflow_cram.config b/modules/nf-core/samtools/sort/tests/nextflow_cram.config new file mode 100644 index 00000000..3a8c0188 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow_cram.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index --output-fmt cram" + } + +} diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml new file mode 100644 index 00000000..cd63ea20 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/tags.yml @@ -0,0 +1,3 @@ +samtools/sort: + - modules/nf-core/samtools/sort/** + - tests/modules/nf-core/samtools/sort/** diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 00000000..02cda6e6 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf new file mode 100644 index 00000000..a6941e63 --- /dev/null +++ b/modules/nf-core/samtools/view/main.nf @@ -0,0 +1,77 @@ +process SAMTOOLS_VIEW { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9e/9edc2564215d5cd137a8b25ca8a311600987186d406b092022444adf3c4447f7/data' : + 'community.wave.seqera.io/library/htslib_samtools:1.21--6cb89bfd40cbaabf' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta) + path qname + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}.{bai,csi,crsi}"), emit: unselected_index, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": "" + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + view \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + ${readnames} \\ + $args \\ + -o ${prefix}.${file_type} \\ + $input \\ + $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + index = args.contains("--write-index") ? "touch ${prefix}.${file_type}.csi" : "" + + """ + touch ${prefix}.${file_type} + ${index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml new file mode 100644 index 00000000..caa7b015 --- /dev/null +++ b/modules/nf-core/samtools/view/meta.yml @@ -0,0 +1,141 @@ +name: samtools_view +description: filter/convert SAM/BAM/CRAM file +keywords: + - view + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + - unselected: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}: + type: file + description: optional file with unselected alignments + pattern: "*.unselected.{bam,cram,sam}" + - unselected_index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}.{bai,csi,crsi}: + type: file + description: index for the "unselected" file + pattern: "*.unselected.{bai,csi,crai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/view/tests/bam.config b/modules/nf-core/samtools/view/tests/bam.config new file mode 100644 index 00000000..c10d1081 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/bam_index.config b/modules/nf-core/samtools/view/tests/bam_index.config new file mode 100644 index 00000000..771ae033 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam --write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test new file mode 100644 index 00000000..37b81a91 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -0,0 +1,214 @@ +nextflow_process { + + name "Test Process SAMTOOLS_VIEW" + script "../main.nf" + process "SAMTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/view" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.bai).match("bam_bai") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.sam).match("bam_sam") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_cram") }, + { assert snapshot(process.out.bai).match("cram_bai") }, + { assert snapshot(process.out.bam).match("cram_bam") }, + { assert snapshot(process.out.crai).match("cram_crai") }, + { assert snapshot(process.out.csi).match("cram_csi") }, + { assert snapshot(process.out.sam).match("cram_sam") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } + + test("cram_to_bam") { + + config "./bam.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_bam") }, + { assert snapshot(process.out.bai).match("cram_to_bam_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_cram") }, + { assert snapshot(process.out.csi).match("cram_to_bam_csi") }, + { assert snapshot(process.out.sam).match("cram_to_bam_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + ) + } + } + + test("cram_to_bam_index") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_versions") } + ) + } + } + + test("cram_to_bam_index_qname") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_qname_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_qname_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_qname_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(file(process.out.unselected[0][1]).name).match("cram_to_bam_index_qname_unselected") }, + { assert snapshot(file(process.out.unselected_index[0][1]).name).match("cram_to_bam_index_qname_unselected_csi") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } + ) + } + } + + test("bam_stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bam_stub_csi") }, + { assert snapshot(process.out.bai).match("bam_stub_bai") }, + { assert snapshot(process.out.crai).match("bam_stub_crai") }, + { assert snapshot(process.out.cram).match("bam_stub_cram") }, + { assert snapshot(process.out.sam).match("bam_stub_sam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap new file mode 100644 index 00000000..63849b03 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -0,0 +1,528 @@ +{ + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.256068" + }, + "cram_to_bam_index_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.958617" + }, + "bam_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.065301" + }, + "bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.258578" + }, + "bam_stub_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.071284" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:26:24.461775464" + }, + "cram_to_bam_index_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.972288" + }, + "cram_to_bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.999247" + }, + "cram_to_bam_index_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.976457" + }, + "cram_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.497581" + }, + "cram_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.50038" + }, + "cram_to_bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.992239" + }, + "cram_to_bam_index_qname_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.325496" + }, + "bam_stub_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.079529" + }, + "cram_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.490286" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.262882" + }, + "cram_to_bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.989247" + }, + "cram_to_bam_index_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.967681" + }, + "cram_to_bam_index_qname_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:25:51.953436682" + }, + "cram_to_bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.982361" + }, + "cram_to_bam_index_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.95456" + }, + "cram_to_bam_index_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:25:14.475388399" + }, + "cram_to_bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.98601" + }, + "cram_to_bam_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:24:49.673441798" + }, + "cram_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.495512" + }, + "bam_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.076908" + }, + "cram_to_bam_index_qname_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "cram_to_bam_index_qname_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.330789" + }, + "cram_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.493129" + }, + "bam_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.074313" + }, + "cram_to_bam_index_qname_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:23:27.151650338" + }, + "cram_to_bam_index_qname_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.333248" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.259774" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.261287" + }, + "cram_to_bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.995454" + }, + "cram_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.502625" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:24:12.95416913" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.264651" + }, + "cram_to_bam_index_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.962863" + }, + "cram_to_bam_index_qname_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.337634" + }, + "bam_stub_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.068596" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/tags.yml b/modules/nf-core/samtools/view/tests/tags.yml new file mode 100644 index 00000000..4fdf1dd1 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/view: + - "modules/nf-core/samtools/view/**" diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml new file mode 100644 index 00000000..7c57530a --- /dev/null +++ b/modules/nf-core/star/align/environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.20 + - bioconda::samtools=1.20 + - bioconda::star=2.7.11b + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf new file mode 100644 index 00000000..417071ba --- /dev/null +++ b/modules/nf-core/star/align/main.nf @@ -0,0 +1,110 @@ +process STAR_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b425bc2a95806d878993f9a66dae3ae80ac2dafff4c208c5ae01b7a90a32fa91/data' : + 'community.wave.seqera.io/library/star_samtools_htslib_gawk:10c6e8c834460019' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + tuple val(meta2), path(index) + tuple val(meta3), path(gtf) + val star_ignore_sjdbgtf + val seq_platform + val seq_center + + output: + tuple val(meta), path('*Log.final.out') , emit: log_final + tuple val(meta), path('*Log.out') , emit: log_out + tuple val(meta), path('*Log.progress.out'), emit: log_progress + path "versions.yml" , emit: versions + + tuple val(meta), path('*d.out.bam') , optional:true, emit: bam + tuple val(meta), path("${prefix}.sortedByCoord.out.bam") , optional:true, emit: bam_sorted + tuple val(meta), path("${prefix}.Aligned.sortedByCoord.out.bam") , optional:true, emit: bam_sorted_aligned + tuple val(meta), path('*toTranscriptome.out.bam') , optional:true, emit: bam_transcript + tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted + tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq + tuple val(meta), path('*.tab') , optional:true, emit: tab + tuple val(meta), path('*.SJ.out.tab') , optional:true, emit: spl_junc_tab + tuple val(meta), path('*.ReadsPerGene.out.tab') , optional:true, emit: read_per_gene_tab + tuple val(meta), path('*.out.junction') , optional:true, emit: junction + tuple val(meta), path('*.out.sam') , optional:true, emit: sam + tuple val(meta), path('*.wig') , optional:true, emit: wig + tuple val(meta), path('*.bg') , optional:true, emit: bedgraph + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def reads1 = [], reads2 = [] + meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } + def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" + def seq_platform = seq_platform ? "'PL:$seq_platform'" : "" + def seq_center = seq_center ? "'CN:$seq_center'" : "" + attrRG = args.contains("--outSAMattrRGline") ? "" : "--outSAMattrRGline 'ID:$prefix' $seq_center 'SM:$prefix' $seq_platform" + def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' + mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' + """ + STAR \\ + --genomeDir $index \\ + --readFilesIn ${reads1.join(",")} ${reads2.join(",")} \\ + --runThreadN $task.cpus \\ + --outFileNamePrefix $prefix. \\ + $out_sam_type \\ + $ignore_gtf \\ + $attrRG \\ + $args + + $mv_unsorted_bam + + if [ -f ${prefix}.Unmapped.out.mate1 ]; then + mv ${prefix}.Unmapped.out.mate1 ${prefix}.unmapped_1.fastq + gzip ${prefix}.unmapped_1.fastq + fi + if [ -f ${prefix}.Unmapped.out.mate2 ]; then + mv ${prefix}.Unmapped.out.mate2 ${prefix}.unmapped_2.fastq + gzip ${prefix}.unmapped_2.fastq + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.unmapped_1.fastq.gz + echo "" | gzip > ${prefix}.unmapped_2.fastq.gz + touch ${prefix}Xd.out.bam + touch ${prefix}.Log.final.out + touch ${prefix}.Log.out + touch ${prefix}.Log.progress.out + touch ${prefix}.sortedByCoord.out.bam + touch ${prefix}.toTranscriptome.out.bam + touch ${prefix}.Aligned.unsort.out.bam + touch ${prefix}.Aligned.sortedByCoord.out.bam + touch ${prefix}.tab + touch ${prefix}.SJ.out.tab + touch ${prefix}.ReadsPerGene.out.tab + touch ${prefix}.Chimeric.out.junction + touch ${prefix}.out.sam + touch ${prefix}.Signal.UniqueMultiple.str1.out.wig + touch ${prefix}.Signal.UniqueMultiple.str1.out.bg + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml new file mode 100644 index 00000000..5cfe763e --- /dev/null +++ b/modules/nf-core/star/align/meta.yml @@ -0,0 +1,230 @@ +name: star_align +description: Align reads to a reference genome using STAR +keywords: + - align + - fasta + - genome + - reference +tools: + - star: + description: | + STAR is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/alexdobin/STAR + manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + doi: 10.1093/bioinformatics/bts635 + licence: ["MIT"] + identifier: biotools:star +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - index: + type: directory + description: STAR genome index + pattern: "star" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + - - star_ignore_sjdbgtf: + type: boolean + description: Ignore annotation GTF file + - - seq_platform: + type: string + description: Sequencing platform + - - seq_center: + type: string + description: Sequencing center +output: + - log_final: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*Log.final.out": + type: file + description: STAR final log file + pattern: "*Log.final.out" + - log_out: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*Log.out": + type: file + description: STAR lot out file + pattern: "*Log.out" + - log_progress: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*Log.progress.out": + type: file + description: STAR log progress file + pattern: "*Log.progress.out" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*d.out.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - bam_sorted: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.sortedByCoord.out.bam: + type: file + description: Sorted BAM file of read alignments (optional) + pattern: "*sortedByCoord.out.bam" + - bam_sorted_aligned: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.Aligned.sortedByCoord.out.bam: + type: file + description: Sorted BAM file of read alignments (optional) + pattern: "*.Aligned.sortedByCoord.out.bam" + - bam_transcript: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*toTranscriptome.out.bam": + type: file + description: Output BAM file of transcriptome alignment (optional) + pattern: "*toTranscriptome.out.bam" + - bam_unsorted: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*Aligned.unsort.out.bam": + type: file + description: Unsorted BAM file of read alignments (optional) + pattern: "*Aligned.unsort.out.bam" + - fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*fastq.gz": + type: file + description: Unmapped FastQ files (optional) + pattern: "*fastq.gz" + - tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tab": + type: file + description: STAR output tab file(s) (optional) + pattern: "*.tab" + - spl_junc_tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.SJ.out.tab": + type: file + description: STAR output splice junction tab file + pattern: "*.SJ.out.tab" + - read_per_gene_tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ReadsPerGene.out.tab": + type: file + description: STAR output read per gene tab file + pattern: "*.ReadsPerGene.out.tab" + - junction: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.out.junction": + type: file + description: STAR chimeric junction output file (optional) + pattern: "*.out.junction" + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.out.sam" + - "*.out.sam": + type: file + description: STAR output SAM file(s) (optional) + pattern: "*.out.sam" + - wig: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.wig": + type: file + description: STAR output wiggle format file(s) (optional) + pattern: "*.wig" + - bedgraph: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bg": + type: file + description: STAR output bedGraph format file(s) (optional) + pattern: "*.bg" +authors: + - "@kevinmenden" + - "@drpatelh" + - "@praveenraj2018" +maintainers: + - "@kevinmenden" + - "@drpatelh" + - "@praveenraj2018" diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test new file mode 100644 index 00000000..a62c17db --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test @@ -0,0 +1,593 @@ +nextflow_process { + + name "Test Process STAR_ALIGN" + script "../main.nf" + process "STAR_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/align" + tag "star/genomegenerate" + + test("homo_sapiens - single_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + bam(process.out.bam_sorted_aligned[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + bam(process.out.bam_sorted_aligned[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - arriba") { + config "./nextflow.arriba.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - starfusion") { + config "./nextflow.starfusion.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + file(process.out.junction[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - multiple") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + bam(process.out.bam_sorted_aligned[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - single_end - stub") { + options "-stub" + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - stub") { + options "-stub" + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - arriba - stub") { + options "-stub" + config "./nextflow.arriba.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - starfusion - stub") { + options "-stub" + config "./nextflow.starfusion.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - multiple - stub") { + options "-stub" + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/star/align/tests/main.nf.test.snap b/modules/nf-core/star/align/tests/main.nf.test.snap new file mode 100644 index 00000000..b533fb8b --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test.snap @@ -0,0 +1,1913 @@ +{ + "homo_sapiens - single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": true + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": true + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": true + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": true + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:09:08.738074176" + }, + "homo_sapiens - paired_end - arriba - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:09:36.122131869" + }, + "homo_sapiens - single_end": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "9f76be49a6607613a64f760101bdddce", + "9f76be49a6607613a64f760101bdddce", + [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Signal.Unique.str1.out.bg:md5,c56fc1472776fb927eaf62d973da5f9a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,e93373cf6f2a2a9506e2efdb260cdd4f" + ] + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:01:22.197991909" + }, + "homo_sapiens - paired_end": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "db9a8324b5163b025bcc0c33e848486", + "db9a8324b5163b025bcc0c33e848486", + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:02:06.988663857" + }, + "homo_sapiens - paired_end - multiple - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:10:12.005468781" + }, + "homo_sapiens - paired_end - multiple": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "3e54e45f5dc3e9c1f2fc55bc41531a87", + "3e54e45f5dc3e9c1f2fc55bc41531a87", + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:08:54.877286681" + }, + "homo_sapiens - paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:09:20.911466345" + }, + "homo_sapiens - paired_end - starfusion": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "test.Chimeric.out.junction", + "caee9dcda13882d4913456973c25b57a", + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:07:25.0639914" + }, + "homo_sapiens - paired_end - arriba": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "1a3abe88fb2490589c58497d39921bcc", + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:04:00.685784211" + }, + "homo_sapiens - paired_end - starfusion - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:09:53.173671551" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/align/tests/nextflow.arriba.config b/modules/nf-core/star/align/tests/nextflow.arriba.config new file mode 100644 index 00000000..cf09323f --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.arriba.config @@ -0,0 +1,11 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50' + } + +} diff --git a/modules/nf-core/star/align/tests/nextflow.config b/modules/nf-core/star/align/tests/nextflow.config new file mode 100644 index 00000000..18bc2ee8 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --outWigType bedGraph --outWigStrand Unstranded' + } + +} diff --git a/modules/nf-core/star/align/tests/nextflow.starfusion.config b/modules/nf-core/star/align/tests/nextflow.starfusion.config new file mode 100644 index 00000000..7880bfcf --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.starfusion.config @@ -0,0 +1,11 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outReadsUnmapped None --twopassMode Basic --outSAMstrandField intronMotif --outSAMunmapped Within --chimSegmentMin 12 --chimJunctionOverhangMin 8 --chimOutJunctionFormat 1 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 --alignSJstitchMismatchNmax 5 -1 5 5 --chimMultimapScoreRange 3 --chimScoreJunctionNonGTAG -4 --chimMultimapNmax 20 --chimNonchimScoreDropMin 10 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --alignInsertionFlush Right --alignSplicedMateMapLminOverLmate 0 --alignSplicedMateMapLmin 30' + } + +} diff --git a/modules/nf-core/star/align/tests/tags.yml b/modules/nf-core/star/align/tests/tags.yml new file mode 100644 index 00000000..8beace16 --- /dev/null +++ b/modules/nf-core/star/align/tests/tags.yml @@ -0,0 +1,2 @@ +star/align: + - modules/nf-core/star/align/** diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml new file mode 100644 index 00000000..7c57530a --- /dev/null +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.20 + - bioconda::samtools=1.20 + - bioconda::star=2.7.11b + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf new file mode 100644 index 00000000..8f0c67e7 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -0,0 +1,119 @@ +process STAR_GENOMEGENERATE { + tag "$fasta" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b425bc2a95806d878993f9a66dae3ae80ac2dafff4c208c5ae01b7a90a32fa91/data' : + 'community.wave.seqera.io/library/star_samtools_htslib_gawk:10c6e8c834460019' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) + + output: + tuple val(meta), path("star") , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' + if (args_list.contains('--genomeSAindexNbases')) { + """ + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + $include_gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + samtools faidx $fasta + NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` + + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + $include_gtf \\ + --runThreadN $task.cpus \\ + --genomeSAindexNbases \$NUM_BASES \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } + + stub: + if (gtf) { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml new file mode 100644 index 00000000..33c1f65f --- /dev/null +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -0,0 +1,56 @@ +name: star_genomegenerate +description: Create index for STAR +keywords: + - index + - fasta + - genome + - reference +tools: + - star: + description: | + STAR is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/alexdobin/STAR + manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + doi: 10.1093/bioinformatics/bts635 + licence: ["MIT"] + identifier: biotools:star +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Fasta file of the reference genome + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: GTF file of the reference genome +output: + - index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - star: + type: directory + description: Folder containing the star index files + pattern: "star" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test new file mode 100644 index 00000000..4d619c47 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -0,0 +1,114 @@ +nextflow_process { + + name "Test Process STAR_GENOMEGENERATE" + script "../main.nf" + process "STAR_GENOMEGENERATE" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/genomegenerate" + + test("fasta_gtf") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString(), + process.out.versions) + .match() } + ) + } + } + + test("fasta") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString(), + process.out.versions + ).match() } + ) + } + } + + test("fasta_gtf_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fasta_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap new file mode 100644 index 00000000..3db25678 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -0,0 +1,148 @@ +{ + "fasta_gtf": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]", + [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:37:47.410432728" + }, + "fasta_gtf_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ], + "index": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:38:09.165234795" + }, + "fasta_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ], + "index": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:38:19.530862664" + }, + "fasta": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]", + [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:37:58.667436398" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml new file mode 100644 index 00000000..79f619bf --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/tags.yml @@ -0,0 +1,2 @@ +star/genomegenerate: + - modules/nf-core/star/genomegenerate/** diff --git a/modules/nf-core/stringtie/merge/environment.yml b/modules/nf-core/stringtie/merge/environment.yml new file mode 100644 index 00000000..0556de41 --- /dev/null +++ b/modules/nf-core/stringtie/merge/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::stringtie=2.2.1 diff --git a/modules/nf-core/stringtie/merge/main.nf b/modules/nf-core/stringtie/merge/main.nf new file mode 100644 index 00000000..c2568219 --- /dev/null +++ b/modules/nf-core/stringtie/merge/main.nf @@ -0,0 +1,46 @@ +process STRINGTIE_MERGE { + label 'process_medium' + + // Note: 2.7X indices incompatible with AWS iGenomes. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/stringtie:2.2.1--hecb563c_2' : + 'biocontainers/stringtie:2.2.1--hecb563c_2' }" + + input: + path stringtie_gtf + path annotation_gtf + + output: + path "stringtie.merged.gtf", emit: gtf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def reference = annotation_gtf ? "-G $annotation_gtf" : "" + """ + stringtie \\ + --merge $stringtie_gtf \\ + $reference \\ + -o stringtie.merged.gtf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS + """ + + stub: + """ + touch stringtie.merged.gtf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/stringtie/merge/meta.yml b/modules/nf-core/stringtie/merge/meta.yml new file mode 100644 index 00000000..cf6902b3 --- /dev/null +++ b/modules/nf-core/stringtie/merge/meta.yml @@ -0,0 +1,40 @@ +name: stringtie_merge +description: Merges the annotation gtf file and the stringtie output gtf files +keywords: + - merge + - gtf + - reference +tools: + - stringtie2: + description: | + Transcript assembly and quantification for RNA-Seq + homepage: https://ccb.jhu.edu/software/stringtie/index.shtml + documentation: https://ccb.jhu.edu/software/stringtie/index.shtml?t=manual + licence: ["MIT"] + identifier: biotools:stringtie +input: + - - stringtie_gtf: + type: file + description: | + Stringtie transcript gtf output(s). + pattern: "*.gtf" + - - annotation_gtf: + type: file + description: | + Annotation gtf file (optional). + pattern: "*.gtf" +output: + - gtf: + - stringtie.merged.gtf: + type: file + description: Merged gtf file + pattern: "stringtie.merged.gtf" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@yuukiiwa" +maintainers: + - "@yuukiiwa" diff --git a/modules/nf-core/stringtie/merge/tests/main.nf.test b/modules/nf-core/stringtie/merge/tests/main.nf.test new file mode 100644 index 00000000..bcc648bc --- /dev/null +++ b/modules/nf-core/stringtie/merge/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + + name "Test Process STRINGTIE_MERGE" + script "../main.nf" + process "STRINGTIE_MERGE" + tag "modules" + tag "modules_nfcore" + tag "stringtie" + tag "stringtie/merge" + tag "stringtie/stringtie" + + setup { + run("STRINGTIE_STRINGTIE") { + script "../../stringtie/main.nf" + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + """ + } + } + } + + test("homo_sapiens - forward strandedness") { + + when { + process { + """ + input[0] = STRINGTIE_STRINGTIE.out.transcript_gtf.map { it -> it[1] } + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.gtf).match("fs_gtf") }, + { assert snapshot(process.out.versions).match("fs_versions") } + ) + } + } + + test("homo_sapiens - reverse strandedness") { + + when { + process { + """ + input[0] = STRINGTIE_STRINGTIE.out.transcript_gtf.map { it -> it[1] } + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.gtf).match("rs_gtf") }, + { assert snapshot(process.out.versions).match("rs_versions") } + ) + } + } +} diff --git a/modules/nf-core/stringtie/merge/tests/main.nf.test.snap b/modules/nf-core/stringtie/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..e1040696 --- /dev/null +++ b/modules/nf-core/stringtie/merge/tests/main.nf.test.snap @@ -0,0 +1,50 @@ +{ + "rs_versions": { + "content": [ + [ + "versions.yml:md5,b73d45fdebf4c8c446bb01817db1665d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2023-11-23T14:14:39.697712988" + }, + "rs_gtf": { + "content": [ + [ + "stringtie.merged.gtf:md5,6da479298d73d5b3216d4e1576a2bdf4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2023-11-23T14:14:39.691894799" + }, + "fs_gtf": { + "content": [ + [ + "stringtie.merged.gtf:md5,6da479298d73d5b3216d4e1576a2bdf4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T16:43:48.130184" + }, + "fs_versions": { + "content": [ + [ + "versions.yml:md5,b73d45fdebf4c8c446bb01817db1665d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2023-11-23T14:14:20.883140097" + } +} \ No newline at end of file diff --git a/modules/nf-core/stringtie/merge/tests/tags.yml b/modules/nf-core/stringtie/merge/tests/tags.yml new file mode 100644 index 00000000..58cef46b --- /dev/null +++ b/modules/nf-core/stringtie/merge/tests/tags.yml @@ -0,0 +1,2 @@ +stringtie/merge: + - modules/nf-core/stringtie/merge/** diff --git a/modules/nf-core/stringtie/stringtie/environment.yml b/modules/nf-core/stringtie/stringtie/environment.yml new file mode 100644 index 00000000..906b7486 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::stringtie=2.2.3 diff --git a/modules/nf-core/stringtie/stringtie/main.nf b/modules/nf-core/stringtie/stringtie/main.nf new file mode 100644 index 00000000..4635c8c5 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/main.nf @@ -0,0 +1,68 @@ +process STRINGTIE_STRINGTIE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/stringtie:2.2.3--h43eeafb_0' : + 'biocontainers/stringtie:2.2.3--h43eeafb_0' }" + + input: + tuple val(meta), path(bam) + path annotation_gtf + + output: + tuple val(meta), path("*.transcripts.gtf"), emit: transcript_gtf + tuple val(meta), path("*.abundance.txt") , emit: abundance + tuple val(meta), path("*.coverage.gtf") , optional: true, emit: coverage_gtf + tuple val(meta), path("*.ballgown") , optional: true, emit: ballgown + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = annotation_gtf ? "-G $annotation_gtf" : "" + def ballgown = annotation_gtf ? "-b ${prefix}.ballgown" : "" + def coverage = annotation_gtf ? "-C ${prefix}.coverage.gtf" : "" + + def strandedness = '' + if (meta.strandedness == 'forward') { + strandedness = '--fr' + } else if (meta.strandedness == 'reverse') { + strandedness = '--rf' + } + """ + stringtie \\ + $bam \\ + $strandedness \\ + $reference \\ + -o ${prefix}.transcripts.gtf \\ + -A ${prefix}.gene.abundance.txt \\ + $coverage \\ + $ballgown \\ + -p $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.transcripts.gtf + touch ${prefix}.gene.abundance.txt + touch ${prefix}.coverage.gtf + touch ${prefix}.ballgown + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/stringtie/stringtie/meta.yml b/modules/nf-core/stringtie/stringtie/meta.yml new file mode 100644 index 00000000..e55b2abf --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/meta.yml @@ -0,0 +1,79 @@ +name: stringtie_stringtie +description: Transcript assembly and quantification for RNA-Se +keywords: + - transcript + - assembly + - quantification + - gtf +tools: + - stringtie2: + description: | + Transcript assembly and quantification for RNA-Seq + homepage: https://ccb.jhu.edu/software/stringtie/index.shtml + documentation: https://ccb.jhu.edu/software/stringtie/index.shtml?t=manual + licence: ["MIT"] + identifier: biotools:stringtie +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + Stringtie transcript gtf output(s). + - - annotation_gtf: + type: file + description: | + Annotation gtf file (optional). +output: + - transcript_gtf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.transcripts.gtf": + type: file + description: transcript gtf + pattern: "*.{transcripts.gtf}" + - abundance: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.abundance.txt": + type: file + description: abundance + pattern: "*.{abundance.txt}" + - coverage_gtf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.coverage.gtf": + type: file + description: coverage gtf + pattern: "*.{coverage.gtf}" + - ballgown: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ballgown": + type: file + description: for running ballgown + pattern: "*.{ballgown}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/stringtie/stringtie/tests/main.nf.test b/modules/nf-core/stringtie/stringtie/tests/main.nf.test new file mode 100644 index 00000000..2204e849 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/main.nf.test @@ -0,0 +1,213 @@ +nextflow_process { + + name "Test Process STRINGTIE_STRINGTIE" + script "../main.nf" + process "STRINGTIE_STRINGTIE" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "stringtie" + tag "stringtie/stringtie" + + test("sarscov2 [bam] - forward strandedness") { + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'forward' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.abundance, + process.out.transcript_gtf, + process.out.versions + ).match() } + ) + } + } + + test("sarscov2 [bam] - forward strandedness + reference annotation") { + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'forward' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.abundance, + process.out.ballgown, + process.out.transcript_gtf, + process.out.versions + ).match() } + ) + } + } + + test("sarscov2 [bam] - reverse strandedness") { + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.abundance, + process.out.transcript_gtf, + process.out.versions + ).match() } + ) + } + } + + test("sarscov2 [bam] - reverse strandedness + reference annotation") { + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.abundance, + process.out.ballgown, + process.out.transcript_gtf, + process.out.versions + ).match() } + ) + } + } + + test("sarscov2 [bam] - forward strandedness - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'forward' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 [bam] - forward strandedness + reference annotation - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'forward' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 [bam] - reverse strandedness - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 [bam] - reverse strandedness + reference annotation - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap b/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap new file mode 100644 index 00000000..d4645de3 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap @@ -0,0 +1,508 @@ +{ + "sarscov2 [bam] - forward strandedness + reference annotation": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,7d8bce7f2a922e367cedccae7267c22e" + ] + ], + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + [ + "e2t.ctab:md5,e981c0038295ae54b63cedb1083f1540", + "e_data.ctab:md5,6b4cf69bc03f3f69890f972a0e8b7471", + "i2t.ctab:md5,8a117c8aa4334b4c2d4711932b006fb4", + "i_data.ctab:md5,be3abe09740603213f83d50dcf81427f", + "t_data.ctab:md5,3b66c065da73ae0dd41cc332eff6a818" + ] + ] + ], + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,37154e7bda96544f24506ee902bb561d" + ] + ], + [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:56:50.294157199" + }, + "sarscov2 [bam] - forward strandedness": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,d6f5c8cadb8458f1df0427cf790246e3" + ] + ], + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,6087dfc9700a52d9e4a1ae3fcd1d1dfd" + ] + ], + [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:56:39.4249133" + }, + "sarscov2 [bam] - forward strandedness - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ], + "abundance": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ballgown": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "coverage_gtf": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "transcript_gtf": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:57:23.008470065" + }, + "sarscov2 [bam] - forward strandedness + reference annotation - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ], + "abundance": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ballgown": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "coverage_gtf": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "transcript_gtf": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:57:33.622824981" + }, + "sarscov2 [bam] - reverse strandedness + reference annotation - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ], + "abundance": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ballgown": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "coverage_gtf": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "transcript_gtf": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:57:55.803421433" + }, + "sarscov2 [bam] - reverse strandedness - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ], + "abundance": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ballgown": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "coverage_gtf": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "transcript_gtf": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:57:44.825389635" + }, + "sarscov2 [bam] - reverse strandedness + reference annotation": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,7385b870b955dae2c2ab78a70cf05cce" + ] + ], + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + [ + "e2t.ctab:md5,e981c0038295ae54b63cedb1083f1540", + "e_data.ctab:md5,879b6696029d19c4737b562e9d149218", + "i2t.ctab:md5,8a117c8aa4334b4c2d4711932b006fb4", + "i_data.ctab:md5,be3abe09740603213f83d50dcf81427f", + "t_data.ctab:md5,3b66c065da73ae0dd41cc332eff6a818" + ] + ] + ], + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,fbabb4e3888bbede67f11f692e484880" + ] + ], + [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:57:11.793664242" + }, + "sarscov2 [bam] - reverse strandedness": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,d6f5c8cadb8458f1df0427cf790246e3" + ] + ], + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,01d6da00a3c458420841e57427297183" + ] + ], + [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:57:01.166309777" + } +} \ No newline at end of file diff --git a/modules/nf-core/stringtie/stringtie/tests/nextflow.config b/modules/nf-core/stringtie/stringtie/tests/nextflow.config new file mode 100644 index 00000000..e3aaa099 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'STRINGTIE_STRINGTIE' { + ext.args = '' + } +} diff --git a/modules/nf-core/stringtie/stringtie/tests/tags.yml b/modules/nf-core/stringtie/stringtie/tests/tags.yml new file mode 100644 index 00000000..da9b051c --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/tags.yml @@ -0,0 +1,2 @@ +stringtie/stringtie: + - modules/nf-core/stringtie/stringtie/** diff --git a/modules/nf-core/ucsc/gtftogenepred/environment.yml b/modules/nf-core/ucsc/gtftogenepred/environment.yml new file mode 100644 index 00000000..5c4f6c2f --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ucsc-gtftogenepred=447 diff --git a/modules/nf-core/ucsc/gtftogenepred/main.nf b/modules/nf-core/ucsc/gtftogenepred/main.nf new file mode 100644 index 00000000..afbb5f3f --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/main.nf @@ -0,0 +1,54 @@ +process UCSC_GTFTOGENEPRED { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ucsc-gtftogenepred:447--h954228d_0': + 'biocontainers/ucsc-gtftogenepred:447--h954228d_0' }" + + input: + tuple val(meta), path(gtf) + + output: + tuple val(meta), path("*.genepred"), emit: genepred + tuple val(meta), path("*.refflat") , emit: refflat , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def gen_refflat = args.contains("-genePredExt") && args.contains("-geneNameAsName2") ? "true" : "false" + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '447' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + gtfToGenePred \\ + $args \\ + $gtf \\ + ${prefix}.genepred + + if [ "${gen_refflat}" == "true" ] ; then + awk 'BEGIN { OFS="\\t"} {print \$12, \$1, \$2, \$3, \$4, \$5, \$6, \$7, \$8, \$9, \$10}' ${prefix}.genepred > ${prefix}.refflat + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '447' + """ + touch ${prefix}.genepred + touch ${prefix}.refflat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/ucsc/gtftogenepred/meta.yml b/modules/nf-core/ucsc/gtftogenepred/meta.yml new file mode 100644 index 00000000..cf04154d --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/meta.yml @@ -0,0 +1,56 @@ +name: ucsc_gtftogenepred +description: compute average score of bigwig over bed file +keywords: + - gtf + - genepred + - refflat + - ucsc + - gtftogenepred +tools: + - ucsc: + description: Convert GTF files to GenePred format + homepage: http://hgdownload.cse.ucsc.edu/admin/exe/ + licence: ["varies; see http://genome.ucsc.edu/license"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gtf: + type: file + description: GTF file + pattern: "*.{gtf}" +output: + - genepred: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.genepred": + type: file + description: genepred file + pattern: "*.{genepred}" + - refflat: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.refflat": + type: file + description: refflat file + pattern: "*.{refflat}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@BarryDigby" + - "@anoronh4" +maintainers: + - "@BarryDigby" + - "@anoronh4" diff --git a/modules/nf-core/ucsc/gtftogenepred/tests/main.nf.test b/modules/nf-core/ucsc/gtftogenepred/tests/main.nf.test new file mode 100644 index 00000000..e0396a63 --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/tests/main.nf.test @@ -0,0 +1,36 @@ + +nextflow_process { + + name "Test Process UCSC_GTFTOGENEPRED" + script "../main.nf" + process "UCSC_GTFTOGENEPRED" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "ucsc" + tag "ucsc/gtftogenepred" + + test("test-ucsc-gtftogenepred") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) ] + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/ucsc/gtftogenepred/tests/main.nf.test.snap b/modules/nf-core/ucsc/gtftogenepred/tests/main.nf.test.snap new file mode 100644 index 00000000..f021f823 --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/tests/main.nf.test.snap @@ -0,0 +1,51 @@ +{ + "test-ucsc-gtftogenepred": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.genepred:md5,779e4749efaf38da3443ddfde30cc76c" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.refflat:md5,4101802f41d4cf7ee2667587da11bf42" + ] + ], + "2": [ + "versions.yml:md5,fd95365619a316eb451190365b1b799e" + ], + "genepred": [ + [ + { + "id": "test" + }, + "test.genepred:md5,779e4749efaf38da3443ddfde30cc76c" + ] + ], + "refflat": [ + [ + { + "id": "test" + }, + "test.refflat:md5,4101802f41d4cf7ee2667587da11bf42" + ] + ], + "versions": [ + "versions.yml:md5,fd95365619a316eb451190365b1b799e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-23T08:55:50.58172" + } +} \ No newline at end of file diff --git a/modules/nf-core/ucsc/gtftogenepred/tests/nextflow.config b/modules/nf-core/ucsc/gtftogenepred/tests/nextflow.config new file mode 100644 index 00000000..889bb6ce --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: UCSC_GTFTOGENEPRED { + ext.args = [ + "-genePredExt", + "-geneNameAsName2" + ].join(' ').trim() + } +} diff --git a/nextflow.config b/nextflow.config index be7a4aee..e9f2f6c4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,177 +1,358 @@ /* - * ------------------------------------------------- - * nf-core/rnafusion Nextflow config file - * ------------------------------------------------- - * Default config options for all environments. - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/rnafusion Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ // Global default params, used in configs params { - - // Options: Tool versions - arriba_version = '1.1.0' - ericscript_version = '0.5.5' - fusioncatcher_version = '1.00' - fusion_inspector_version = '1.3.1' - pizzly_version = '0.37.3' - squid_version = '1.5' - star_fusion_version = '1.6.0' - - // Options: Building STAR-star_index - star_index = false - read_length = 100 - - // Options: References - arriba = false - star_fusion = false - fusioncatcher = false - fusion_inspector = false - ericscript = false - pizzly = false - squid = false - - // Options: Arriba - arriba_opt = false - arriba_vis = false - - // Options: STAR-Fusion - star_fusion_opt = false - - // Options: FusionCatcher - fusioncatcher_opt = false - - // Options: Pizzly - pizzly_k = 31 - - // Options: fusion-report - fusion_report_opt = false - - // Defaults - reads = "data/*{1,2}.fastq.gz" - singleEnd = false - clusterOptions = false - awsqueue = false - awsregion = 'eu-west-1' - readPaths = null - debug = false - - // Options: download-references.nf - fusion_report = false - cosmic_usr = false - cosmic_passwd = false - star_fusion_ensembl = false - - // Shared default variables across different scripts - download_db = false - igenomes = true - igenomes_base = "./iGenomes" - outdir = './results' - tracedir = "${params.outdir}/pipeline_info" - - // Boilerplate options - name = false - multiqc_config = "$baseDir/assets/multiqc_config.yaml" - email = false - maxMultiqcEmailFileSize = 25.MB - plaintext_email = false - monochrome_logs = false - help = false - genome = false - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - hostnames = false - config_profile_description = false - config_profile_contact = false - config_profile_url = false -} -// Container slug. Stable releases should specify release tag! -// Developmental code should specify :dev -process.container = 'nfcore/rnafusion:dev' + // Input options + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' + references_only = false + cosmic_username = null + cosmic_passwd = null + qiagen = false + + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null + + // Genome + genome = 'GRCh38' + genomes_base = "${params.outdir}/references" + genome_gencode_version = 46 + read_length = 100 + starfusion_build = true + genomes = [:] + fusion_annot_lib = "https://github.com/FusionAnnotator/CTAT_HumanFusionLib/releases/download/v0.3.0/fusion_lib.Mar2021.dat.gz" // path to dat.gz CTAT genome lib // TODO: Update to latest with s3 link when available + species = "human" + + // Filtering + tools_cutoff = 1 + + // Trimming + fastp_trim = false + trim_tail = null + adapter_fasta = [] + + // Compression + cram = [] + + // Alignment options + star_ignore_sjdbgtf = false + seq_center = null + seq_platform = null + fusioncatcher_limitSjdbInsertNsj = 2000000 + fusioninspector_limitSjdbInsertNsj = 1000000 + + // Enable or disable tools + all = false + arriba = false + ctatsplicing = false + fusioncatcher = false + starindex = false + starfusion = false + stringtie = false + fusionreport = false + fusioninspector_only = false + + // Skip steps + skip_qc = false + skip_vis = false + skip_vcf = false + skip_salmon_index = false + + // Download references option + download_refs = false + + // Path to references + fasta = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}_dna_primary_assembly.fa" + fai = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}_dna_primary_assembly.fa.fai" + gtf = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}.gtf" + refflat = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}.gtf.refflat" + rrna_intervals = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}.interval_list" + gencode_ref = "${params.genomes_base}/gencode" + no_cosmic = false + arriba_ref_blacklist = "${params.genomes_base}/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz" + arriba_ref_cytobands = "${params.genomes_base}/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv" + arriba_ref_known_fusions = "${params.genomes_base}/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz" + arriba_ref_protein_domains = "${params.genomes_base}/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3" + fusioncatcher_ref = "${params.genomes_base}/fusioncatcher/human_v${params.genome_gencode_version}" + hgnc_ref = "${params.genomes_base}/hgnc/hgnc_complete_set.txt" + hgnc_date = "${params.genomes_base}/hgnc/HGNC-DB-timestamp.txt" + salmon_index = "${params.genomes_base}/salmon/salmon" + starfusion_ref = "${params.genomes_base}/starfusion/ctat_genome_lib_build_dir" + starindex_ref = "${params.genomes_base}/star" + fusionreport_ref = "${params.genomes_base}/fusion_report_db" + + + // Internal file presence checks + salmon_index_stub_check = "${params.genomes_base}/salmon/salmon/complete_ref_lens.bin" + starindex_ref_stub_check = "${params.genomes_base}/star/star/Genome" + fusionreport_ref_stub_check = "${params.genomes_base}/fusion_report_db/mitelman.db" + fusioncatcher_ref_stub_check = "${params.genomes_base}/fusioncatcher/human_v${params.genome_gencode_version}/ensembl_fully_overlapping_genes.txt" + starfusion_ref_stub_check = "${params.genomes_base}/starfusion/Pfam-A.hmm" + + // Path to fusion outputs + arriba_fusions = null + starfusion_fusions = null + fusioncatcher_fusions = null + fusioninspector_fusions = null + whitelist = null + + // Boilerplate options + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + help_full = false + show_hidden = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + + // Config options + config_profile_name = null + config_profile_description = null + + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + + // Schema validation default options + validate_params = true +} // Load base.config by default for all pipelines includeConfig 'conf/base.config' -// Load igenomes.config if required -if(params.igenomes){ - includeConfig 'conf/igenomes.config' +profiles { + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + docker { + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + podman { + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + shifter { + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + charliecloud { + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + test { + includeConfig 'conf/test.config' + } + test_build { + includeConfig 'conf/test_build.config' + } + test_cosmic { + includeConfig 'conf/test_cosmic.config' + } + test_full { + includeConfig 'conf/test_full.config' + } + + gitpod { + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB + } } // Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" -profiles { - awsbatch { includeConfig 'conf/awsbatch.config' } - conda { process.conda = "$baseDir/environment.yml" } - debug { process.beforeScript = 'echo $HOSTNAME' } - docker { docker.enabled = true } - singularity { singularity.enabled = true } - test { includeConfig 'conf/test.config' } +// Load nf-core/rnafusion custom profiles from different institutions. +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/rnafusion.config" : "/dev/null" + +// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +charliecloud.registry = 'quay.io' + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. + +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" } -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Set bash options +process.shell = """\ +bash +set -e # Exit if a tool returns a non-zero status/exit code +set -u # Treat unset variables and parameters as an error +set -o pipefail # Returns the status of the last command to exit with a non-zero status or zero if all successfully execute +set -C # No clobber - prevent output redirection from overwriting files. +""" + +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { - enabled = true - file = "${params.tracedir}/execution_timeline.html" + enabled = true + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { - enabled = true - file = "${params.tracedir}/execution_report.html" + enabled = true + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { - enabled = true - file = "${params.tracedir}/execution_trace.txt" + enabled = true + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { - enabled = true - file = "${params.tracedir}/pipeline_dag.svg" + enabled = true + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { - name = 'nf-core/rnafusion' - author = 'Martin Proks' - homePage = 'https://github.com/nf-core/rnafusion' - description = 'Nextflow rnafusion analysis pipeline, part of the nf-core community.' - mainScript = 'main.nf' - nextflowVersion = '>=0.32.0' - version = '1.0.2' + name = 'nf-core/rnafusion' + author = """Martin Proks, Annick Renevey""" + homePage = 'https://github.com/nf-core/rnafusion' + description = """Nextflow rnafusion analysis pipeline, part of the nf-core community.""" + mainScript = 'main.nf' + nextflowVersion = '!>=24.04.2' + version = '4.0.0dev' + doi = '' +} + +// Nextflow plugins +plugins { + id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + +validation { + defaultIgnoreParams = ["genomes"] + help { + enabled = true + command = "nextflow run $manifest.name -profile --input samplesheet.csv --outdir " + fullParameter = "help_full" + showHiddenParameter = "show_hidden" + beforeText = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;35m ${manifest.name} ${manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/${manifest.name}/blob/master/CITATIONS.md +""" + } + summary { + beforeText = validation.help.beforeText + afterText = validation.help.afterText + } } -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if(type == 'memory'){ - try { - if(obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if(type == 'time'){ - try { - if(obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if(type == 'cpus'){ - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} \ No newline at end of file +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' diff --git a/nextflow_schema.json b/nextflow_schema.json new file mode 100644 index 00000000..3b54697c --- /dev/null +++ b/nextflow_schema.json @@ -0,0 +1,575 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/rnafusion/master/nextflow_schema.json", + "title": "nf-core/rnafusion pipeline parameters", + "description": "Nextflow rnafusion analysis pipeline, part of the nf-core community.", + "type": "object", + "$defs": { + "skip_steps": { + "title": "Skip steps", + "type": "object", + "description": "Skip analysis steps", + "default": "", + "properties": { + "skip_qc": { + "type": "boolean", + "description": "Skip QC steps" + }, + "skip_vcf": { + "type": "boolean", + "description": "Skip vcf creation step" + }, + "skip_vis": { + "type": "boolean", + "description": "Skip visualisation steps" + }, + "skip_salmon_index": { + "type": "boolean", + "description": "Skip salmon index generation step", + "hidden": true + } + }, + "fa_icon": "fas fa-fast-forward" + }, + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["genomes_base", "outdir"], + "properties": { + "input": { + "type": "string", + "format": "file-path", + "schema": "assets/schema_input.json", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/rnafusion/usage#samplesheet-input).", + "fa_icon": "fas fa-file-csv" + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, + "multiqc_title": { + "type": "string", + "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "fa_icon": "fas fa-file-signature" + }, + "cosmic_username": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "COSMIC username" + }, + "cosmic_passwd": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "COSMIC password" + }, + "genomes_base": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to reference folder" + }, + "genome_gencode_version": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "gencode version" + }, + "starfusion_build": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "If set, starfusion references are built from scratch instead of downloaded (default)" + }, + "read_length": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "Read length", + "default": 100 + }, + "all": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run all references/analyses" + }, + "arriba": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run arriba references/analyses" + }, + "arriba_ref_blacklist": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba reference blacklist" + }, + "arriba_ref_cytobands": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba reference cytobands" + }, + "arriba_ref_known_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba reference known fusions" + }, + "arriba_ref_protein_domains": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba reference protein domain" + }, + "arriba_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba output" + }, + "download_refs": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Download references instead of building them (for fusioncatcher and starfusion)" + }, + "ctatsplicing": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Run CTAT-splicing to detect abberant cancer splicing introns. Needs --arriba and/or --starfusion to run." + }, + "gencode_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to gencode references" + }, + "fusioncatcher": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run fusioncatcher references/analyses" + }, + "fusioncatcher_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to fusioncatcher output" + }, + "fusioncatcher_limitSjdbInsertNsj": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "Use limitSjdbInsertNsj with int for fusioncatcher" + }, + "fusioncatcher_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to fusioncatcher references" + }, + "fusioncatcher_ref_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in fusioncatcher references" + }, + "fusioninspector_limitSjdbInsertNsj": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "Use limitSjdbInsertNsj with int for fusioninspector STAR process" + }, + "fusioninspector_only": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Skip fusion-report. --fusioninspector_fusions PATH needed to provide a fusion list as input" + }, + "fusioninspector_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to a fusion list file built with format GENE1--GENE2" + }, + "fusionreport": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build fusionreport references" + }, + "fusionreport_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to fusionreport references" + }, + "fusionreport_ref_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in fusionreport references" + }, + "hgnc_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to HGNC database file" + }, + "hgnc_date": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to HGNC timestamp file for database retrieval" + }, + "qiagen": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Use QIAGEN instead of SANGER to download COSMIC database" + }, + "salmon_index": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to salmon index" + }, + "salmon_index_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in salmon index" + }, + "starfusion": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run starfusion references/analyses" + }, + "starfusion_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to starfusion output" + }, + "starfusion_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to starfusion references" + }, + "starfusion_ref_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in starfusion references" + }, + "starindex": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run starindex references/analyses" + }, + "starindex_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to starindex references" + }, + "starindex_ref_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in starindex references" + }, + "stringtie": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Run stringtie analysis" + }, + "tools_cutoff": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "Discard fusions identified by less than INT tools" + }, + "whitelist": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to fusions to add to the input of fusioninspector" + } + } + }, + "read_trimming_options": { + "title": "Read trimming options", + "type": "object", + "fa_icon": "fas fa-cut", + "description": "Options to adjust read trimming criteria.", + "properties": { + "fastp_trim": { + "type": "boolean", + "description": "Preform fastp trimming of reads, default: false", + "fa_icon": "fas fa-cut" + }, + "trim_tail": { + "type": "integer", + "description": "Preform tail trimming of reads, default: null", + "fa_icon": "fas fa-cut" + }, + "adapter_fasta": { + "type": "string", + "description": "Path to adapter fasta file: default: []", + "fa_icon": "fas fa-cut" + } + } + }, + "compression_options": { + "title": "Alignment compression options", + "type": "object", + "fa_icon": "fas fa-cut", + "description": "Option to compress BAM files to CRAM.", + "properties": { + "cram": { + "type": "string", + "description": "List of tools for which to compress BAM file to CRAM,default: [], options: arriba, starfusion. Leave no space between options", + "fa_icon": "fas fa-cut" + } + } + }, + "reference_genome_options": { + "title": "Reference genome options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Reference genome related files and options required for the workflow.", + "properties": { + "references_only": { + "type": "boolean", + "description": "Skip running the analysis, only builds the references", + "fa_icon": "fas fa-book" + }, + "fasta": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "description": "Path to FASTA genome file.", + "fa_icon": "far fa-file-code" + }, + "fai": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?ai(\\.gz)?$", + "description": "Path to FASTA genome index file.", + "fa_icon": "far fa-file-code" + }, + "genome": { + "type": "string", + "description": "Name of iGenomes reference.", + "fa_icon": "fas fa-book" + }, + "gtf": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.gtf?(\\.gz)?$", + "description": "Path to GTF genome file.", + "fa_icon": "far fa-file-code" + }, + "refflat": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.refflat?$", + "description": "Path to GTF genome file.", + "fa_icon": "far fa-file-code" + }, + "rrna_intervals": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.interval_list?$", + "description": "Path to ribosomal interval list.", + "fa_icon": "far fa-file-code" + }, + "no_cosmic": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Avoid using Cosmic DB (for example in clinical case applications where a paid license applies." + }, + "fusion_annot_lib": { + "type": "string", + "description": "Path to Fusion Annotation Library to be used in STARFUSION_BUILD.", + "fa_icon": "far fa-file-code" + }, + "species": { + "type": "string", + "description": "Which species dfam should automatically download, default: human.", + "fa_icon": "far fa-file-code" + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "max_multiqc_email_size": { + "type": "string", + "description": "File size limit when attaching MultiQC reports to summary emails.", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "default": "25.MB", + "fa_icon": "fas fa-file-upload", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, + "multiqc_config": { + "type": "string", + "format": "file-path", + "description": "Custom config file to supply to MultiQC.", + "fa_icon": "fas fa-cog", + "hidden": true + }, + "multiqc_logo": { + "type": "string", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true + }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true + }, + "seq_center": { + "type": "string", + "description": "Sequencing center", + "hidden": true, + "fa_icon": "fas fa-toolbox", + "help_text": "This will reported in the BAM header as CN" + }, + "seq_platform": { + "type": "string", + "description": "Sequencing platform", + "hidden": true, + "fa_icon": "fas fa-toolbox", + "help_text": "This will reported in the BAM header as PL." + }, + "star_ignore_sjdbgtf": { + "type": "boolean", + "description": "Whether to ignore the GTF in STAR alignment", + "hidden": true, + "fa_icon": "fas fa-toolbox", + "help_text": "Setting false will use GTF file for STAR alignment" + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input_output_options" + }, + { + "$ref": "#/$defs/skip_steps" + }, + { + "$ref": "#/$defs/read_trimming_options" + }, + { + "$ref": "#/$defs/compression_options" + }, + { + "$ref": "#/$defs/reference_genome_options" + }, + { + "$ref": "#/$defs/institutional_config_options" + }, + { + "$ref": "#/$defs/generic_options" + } + ] +} diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..3ee9cc13 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,14 @@ +config { + // location for all nf-tests + testsDir "tests" + + // nf-test directory including temporary files for each test + workDir ".nf-test" + + // location of an optional nextflow.config file specific for executing tests + configFile "tests/nextflow.config" + + plugins { + load "nft-utils@0.0.3" + } +} diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf new file mode 100644 index 00000000..84ba3604 --- /dev/null +++ b/subworkflows/local/arriba_workflow/main.nf @@ -0,0 +1,111 @@ +include { ARRIBA_ARRIBA } from '../../../modules/nf-core/arriba/arriba/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_ARRIBA } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_ARRIBA } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_ARRIBA } from '../../../modules/nf-core/samtools/view/main' +include { STAR_ALIGN as STAR_FOR_ARRIBA } from '../../../modules/nf-core/star/align/main' + +include { CTATSPLICING_WORKFLOW } from '../ctatsplicing_workflow' + +workflow ARRIBA_WORKFLOW { + take: + reads // channel [ meta, [ fastqs ] ] + ch_gtf // channel [ meta, path_gtf ] + ch_fasta // channel [ meta, path_fasta ] + ch_starindex_ref // channel [ meta, path_index ] + ch_arriba_ref_blacklist // channel [ meta, path_blacklist ] + ch_arriba_ref_cytobands // channel [ meta, path_cytobands ] + ch_arriba_ref_known_fusions // channel [ meta, path_known_fusions ] + ch_arriba_ref_protein_domains // channel [ meta, path_proteins ] + ch_starfusion_ref // channel [ meta, path_starfusion_ref ] + arriba // boolean + all // boolean + fusioninspector_only // boolean + star_ignore_sjdbgtf // boolean + ctatsplicing // boolean + seq_center // string + arriba_fusions // path + cram // array + + main: + + def ch_versions = Channel.empty() + def ch_cram_index = Channel.empty() + def ch_dummy_file = file("$projectDir/assets/dummy_file_arriba.txt", checkIfExists: true) + + if (( arriba || all ) && !fusioninspector_only) { + + STAR_FOR_ARRIBA( + reads, + ch_starindex_ref, + ch_gtf, + star_ignore_sjdbgtf, + '', + seq_center + ) + ch_versions = ch_versions.mix(STAR_FOR_ARRIBA.out.versions) + + if ( ctatsplicing || all ) { + CTATSPLICING_WORKFLOW( + STAR_FOR_ARRIBA.out.spl_junc_tab, + STAR_FOR_ARRIBA.out.junction, + STAR_FOR_ARRIBA.out.bam, + ch_starfusion_ref + ) + ch_versions = ch_versions.mix(CTATSPLICING_WORKFLOW.out.versions) + } + + if ( arriba_fusions ) { + + ch_arriba_fusions = reads.combine( Channel.value( file( arriba_fusions, checkIfExists: true ) ) ) + .map { it -> [ it[0], it[2] ] } + ch_arriba_fusion_fail = ch_dummy_file + + } else { + + ARRIBA_ARRIBA ( + STAR_FOR_ARRIBA.out.bam, + ch_fasta, + ch_gtf, + ch_arriba_ref_blacklist, + ch_arriba_ref_known_fusions, + ch_arriba_ref_cytobands, + ch_arriba_ref_protein_domains + ) + + ch_versions = ch_versions.mix(ARRIBA_ARRIBA.out.versions) + + ch_arriba_fusions = ARRIBA_ARRIBA.out.fusions + ch_arriba_fusion_fail = ARRIBA_ARRIBA.out.fusions_fail.map{ it -> return it[1] } + } + + if ( cram.contains('arriba') ) { + + SAMTOOLS_SORT_FOR_ARRIBA(STAR_FOR_ARRIBA.out.bam, ch_fasta) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_FOR_ARRIBA.out.versions ) + + SAMTOOLS_VIEW_FOR_ARRIBA(SAMTOOLS_SORT_FOR_ARRIBA.out.bam.map { meta, bam -> [ meta, bam, [] ] }, ch_fasta, []) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_ARRIBA.out.versions ) + + SAMTOOLS_INDEX_FOR_ARRIBA(SAMTOOLS_VIEW_FOR_ARRIBA.out.cram) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_ARRIBA.out.versions ) + + // Join cram and index files + ch_cram_index = SAMTOOLS_VIEW_FOR_ARRIBA.out.cram.join(SAMTOOLS_INDEX_FOR_ARRIBA.out.crai) + } + + } else { + + ch_arriba_fusions = reads + .combine(Channel.value( file(ch_dummy_file, checkIfExists: true ) ) ) + .map { it -> [ it[0], it[2] ] } + + ch_arriba_fusion_fail = ch_dummy_file + } + + emit: + fusions = ch_arriba_fusions // channel [ meta, path_fusions ] + fusions_fail = ch_arriba_fusion_fail // channel [ path, fusions_failed ] + cram_index = ch_cram_index // channel [ meta, cram, crai ] + versions = ch_versions // channel [ versions ] + } + diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test b/subworkflows/local/arriba_workflow/tests/main.nf.test new file mode 100644 index 00000000..e49a3768 --- /dev/null +++ b/subworkflows/local/arriba_workflow/tests/main.nf.test @@ -0,0 +1,390 @@ +nextflow_workflow { + + name "Test Subworkflow ARRIBA_WORKFLOW" + script "../main.nf" + workflow "ARRIBA_WORKFLOW" + tag "subworkflow" + tag "arriba" + tag "arriba/arriba" + tag "samtools" + tag "samtools/index" + tag "samtools/sort" + tag "samtools/view" + tag "star" + tag "star/genomegenerate" + tag "star/align" + + + // Test #1 Indexing + test("ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4") { + + setup { + // Create genome index for STAR + run("STAR_GENOMEGENERATE") { + script "../../../../modules/nf-core/star/genomegenerate/main.nf" + process { + """ + // FASTA + input[0] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + + // GTF + input[1] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + """ + } + } + } + + when { + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [ id: "test_fastqs" ], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_1.fq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_2.fq.gz", checkIfExists: true) + ] + ] ) + + // ch_gtf + input[1] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_fasta + input[2] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_starindex_ref + input[3] = STAR_GENOMEGENERATE.out.index + + // ch_arriba_ref_blacklist + input[4] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_known_fusions + input[5] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_cytobands + input[6] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_protein_domains + input[7] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // arriba (boolean) + input[8] = true + + // all (boolean) + input[9] = true + + // fusioninspector_only (boolean) + input[10] = false + + // star_ignore_sjdbgtf (boolean) + input[11] = false + + // seq_center (string) + input[12] = 'test_center' + + // arriba_fusions (path) + input[13] = null + + // cram (array) + input[14] = [ 'arriba' ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { with(workflow.out) { + assert snapshot( + file(fusions[0][1]), + file(fusions_fail[0]), + file(cram_index[0][1]).name, + file(cram_index[0][2]).name, + versions.collect{ file(it) } + ).match() + } + } + ) + } + } + + + // Test #2 With arriba_fusions file + test("ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - External fusion file") { + + setup { + // Create genome index for STAR + run("STAR_GENOMEGENERATE") { + script "../../../../modules/nf-core/star/genomegenerate/main.nf" + process { + """ + // FASTA + input[0] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + + // GTF + input[1] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + """ + } + } + } + + when { + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [ id: "test_fastqs" ], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_1.fq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_2.fq.gz", checkIfExists: true) + ] + ] ) + + // ch_gtf + input[1] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_fasta + input[2] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_starindex_ref + input[3] = STAR_GENOMEGENERATE.out.index + + // ch_arriba_ref_blacklist + input[4] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_known_fusions + input[5] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_cytobands + input[6] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_protein_domains + input[7] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // arriba (boolean) + input[8] = true + + // all (boolean) + input[9] = true + + // fusioninspector_only (boolean) + input[10] = false + + // star_ignore_sjdbgtf (boolean) + input[11] = false + + // seq_center (string) + input[12] = 'test_center' + + // arriba_fusions (string path) + input[13] = "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/test_fastqs.arriba.fusions.tsv" + + // cram (array) + input[14] = [ 'arriba' ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { with(workflow.out) { + assert snapshot( + fusions[0].size() == 2, + fusions_fail.size() == 1, + file(cram_index[0][1]).name, + file(cram_index[0][2]).name, + versions.collect{ file(it) } + ).match() + } + } + ) + } + } + + // TEST #3 WITHOUT INDEXING + test("ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - cram = []") { + + setup { + // Create genome index for STAR + run("STAR_GENOMEGENERATE") { + script "../../../../modules/nf-core/star/genomegenerate/main.nf" + process { + """ + // FASTA + input[0] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + + // GTF + input[1] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + """ + } + } + } + + when { + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [ id: "test_fastqs" ], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_1.fq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_2.fq.gz", checkIfExists: true) + ] + ] ) + + // ch_gtf + input[1] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_fasta + input[2] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_starindex_ref + input[3] = STAR_GENOMEGENERATE.out.index + + // ch_arriba_ref_blacklist + input[4] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_known_fusions + input[5] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_cytobands + input[6] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_protein_domains + input[7] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // arriba (boolean) + input[8] = true + + // all (boolean) + input[9] = true + + // fusioninspector_only (boolean) + input[10] = false + + // star_ignore_sjdbgtf (boolean) + input[11] = false + + // seq_center (string) + input[12] = 'test_center' + + // arriba_fusions (path) + input[13] = null + + // cram (array) + input[14] = [ ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { with(workflow.out) { + assert snapshot( + file(fusions[0][1]), + file(fusions_fail[0]), + cram_index.size() == 0, + versions.collect{ file(it) } + ).match() + } + } + ) + } + } + +} diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test.snap b/subworkflows/local/arriba_workflow/tests/main.nf.test.snap new file mode 100644 index 00000000..2057827a --- /dev/null +++ b/subworkflows/local/arriba_workflow/tests/main.nf.test.snap @@ -0,0 +1,57 @@ +{ + "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - External fusion file": { + "content": [ + true, + true, + "test_fastqs_star_for_arriba_sorted.cram", + "test_fastqs_star_for_arriba_sorted.cram.crai", + [ + "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9", + "versions.yml:md5,85458747b55f37c1a5afd39ee7a3a4aa", + "versions.yml:md5,bfc5d96804f2991c7f7c705f1ddf81ec", + "versions.yml:md5,f4f64d3f1fd867d5afa51e03f7cf2824" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-11T15:53:59.18258718" + }, + "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4": { + "content": [ + "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d", + "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98", + "test_fastqs_star_for_arriba_sorted.cram", + "test_fastqs_star_for_arriba_sorted.cram.crai", + [ + "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", + "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9", + "versions.yml:md5,85458747b55f37c1a5afd39ee7a3a4aa", + "versions.yml:md5,bfc5d96804f2991c7f7c705f1ddf81ec", + "versions.yml:md5,f4f64d3f1fd867d5afa51e03f7cf2824" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-11T15:43:48.053656601" + }, + "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - cram = []": { + "content": [ + "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d", + "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98", + true, + [ + "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", + "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-11T16:07:37.079418154" + } +} \ No newline at end of file diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf new file mode 100644 index 00000000..56fd8eb2 --- /dev/null +++ b/subworkflows/local/build_references.nf @@ -0,0 +1,187 @@ +/* +======================================================================================== + IMPORT LOCAL MODULES/SUBWORKFLOWS +======================================================================================== +*/ + +include { GENCODE_DOWNLOAD } from '../../modules/local/gencode_download/main' +include { FUSIONCATCHER_BUILD } from '../../modules/local/fusioncatcher/build/main' +include { FUSIONREPORT_DOWNLOAD } from '../../modules/local/fusionreport/download/main' +include { HGNC_DOWNLOAD } from '../../modules/local/hgnc/main' +include { STARFUSION_BUILD } from '../../modules/local/starfusion/build/main' +include { GTF_TO_REFFLAT } from '../../modules/local/uscs/custom_gtftogenepred/main' +include { GET_RRNA_TRANSCRIPTS } from '../../modules/local/get_rrna_transcript/main' + +/* +======================================================================================== + IMPORT NF-CORE MODULES/SUBWORKFLOWS +======================================================================================== +*/ +include { ARRIBA_DOWNLOAD } from '../../modules/nf-core/arriba/download/main' +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' +include { STAR_GENOMEGENERATE } from '../../modules/nf-core/star/genomegenerate/main' +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/gatk4/createsequencedictionary/main' +include { GATK4_BEDTOINTERVALLIST } from '../../modules/nf-core/gatk4/bedtointervallist/main' +include { SALMON_INDEX } from '../../modules/nf-core/salmon/index/main' +include { GFFREAD } from '../../modules/nf-core/gffread/main' + +/* +======================================================================================== + RUN MAIN WORKFLOW +======================================================================================== +*/ + +workflow BUILD_REFERENCES { + + main: + ch_versions = Channel.empty() + + if (!file(params.fasta).exists() || file(params.fasta).isEmpty() || + !file(params.gtf).exists() || file(params.gtf).isEmpty()){ + GENCODE_DOWNLOAD(params.genome_gencode_version, params.genome) + ch_versions = ch_versions.mix(GENCODE_DOWNLOAD.out.versions) + ch_fasta = GENCODE_DOWNLOAD.out.fasta.map { that -> [[id:that.Name], that] } + ch_gtf = GENCODE_DOWNLOAD.out.gtf.map { that -> [[id:that.Name], that] } + } else { + ch_fasta = Channel.fromPath(params.fasta).map { that -> [[id:that.Name], that] } + ch_gtf = Channel.fromPath(params.gtf).map { that -> [[id:that.Name], that] } + } + + if (!file(params.fai).exists() || file(params.fai).isEmpty()){ + SAMTOOLS_FAIDX(ch_fasta, [[],[]]) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + ch_fai = SAMTOOLS_FAIDX.out.fai + } else { + ch_fai = Channel.fromPath(params.fai).map { that -> [[id:that.Name], that] } + } + + if ((!file(params.hgnc_ref).exists() || file(params.hgnc_ref).isEmpty() || + !file(params.hgnc_date).exists() || file(params.hgnc_date).isEmpty()) && !params.skip_vcf){ + HGNC_DOWNLOAD( ) + ch_versions = ch_versions.mix(HGNC_DOWNLOAD.out.versions) + ch_hgnc_ref = HGNC_DOWNLOAD.out.hgnc_ref + ch_hgnc_date = HGNC_DOWNLOAD.out.hgnc_date + } else { + ch_hgnc_ref = Channel.fromPath(params.hgnc_ref).map { that -> [[id:that.Name], that] } + ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { that -> [[id:that.Name], that] } + } + + if (!file(params.rrna_intervals).exists() || file(params.rrna_intervals).isEmpty()){ + GATK4_CREATESEQUENCEDICTIONARY(ch_fasta) + ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) + GET_RRNA_TRANSCRIPTS(ch_gtf) + ch_versions = ch_versions.mix(GET_RRNA_TRANSCRIPTS.out.versions) + GATK4_BEDTOINTERVALLIST(GET_RRNA_TRANSCRIPTS.out.bed, GATK4_CREATESEQUENCEDICTIONARY.out.dict ) + ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST.out.versions) + ch_rrna_interval = GATK4_BEDTOINTERVALLIST.out.interval_list + } else { + ch_rrna_interval = Channel.fromPath(params.rrna_intervals).map { that -> [[id:that.Name], that] } + } + + if (!file(params.refflat).exists() || file(params.refflat).isEmpty()){ + GTF_TO_REFFLAT(ch_gtf) + ch_versions = ch_versions.mix(GTF_TO_REFFLAT.out.versions) + ch_refflat = GTF_TO_REFFLAT.out.refflat.map { that -> [[id:that.Name], that] } + } else { + ch_refflat = Channel.fromPath(params.refflat).map { that -> [[id:that.Name], that] } + } + + if (!file(params.salmon_index).exists() || file(params.salmon_index).isEmpty() || + !file(params.salmon_index_stub_check).exists() || file(params.salmon_index_stub_check).isEmpty()){ // add condition for qc + GFFREAD(ch_gtf, ch_fasta.map{ it -> it[1] }) + ch_versions = ch_versions.mix(GFFREAD.out.versions) + SALMON_INDEX(ch_fasta.map{ it -> it[1] }, GFFREAD.out.gffread_fasta.map{ it -> it[1] }) + ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) + ch_salmon_index = SALMON_INDEX.out.index + } else { + ch_salmon_index = Channel.fromPath({params.salmon_index}) + } + + if ((params.starindex || params.all || params.starfusion || params.arriba) && + (!file(params.starindex_ref).exists() || file(params.starindex_ref).isEmpty() || + !file(params.starindex_ref_stub_check).exists() || file(params.starindex_ref_stub_check).isEmpty() )) { + STAR_GENOMEGENERATE(ch_fasta, ch_gtf) + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) + ch_starindex_ref = STAR_GENOMEGENERATE.out.index + } else { + ch_starindex_ref = Channel.fromPath(params.starindex_ref).map { that -> [[id:that.Name], that] } + } + + if ((params.arriba || params.all) && + (!file(params.arriba_ref_blacklist).exists() || file(params.arriba_ref_blacklist).isEmpty() || + !file(params.arriba_ref_known_fusions).exists() || file(params.arriba_ref_known_fusions).isEmpty() || + !file(params.arriba_ref_protein_domains).exists() || file(params.arriba_ref_protein_domains).isEmpty())) { + ARRIBA_DOWNLOAD(params.genome) + ch_versions = ch_versions.mix(ARRIBA_DOWNLOAD.out.versions) + ch_arriba_ref_blacklist = ARRIBA_DOWNLOAD.out.blacklist + ch_arriba_ref_cytobands = ARRIBA_DOWNLOAD.out.cytobands + ch_arriba_ref_known_fusions = ARRIBA_DOWNLOAD.out.known_fusions + ch_arriba_ref_protein_domains = ARRIBA_DOWNLOAD.out.protein_domains + } else { + ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist) + ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands) + ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions) + ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains) + } + + + if ((params.fusioncatcher || params.all) && + (!file(params.fusioncatcher_ref).exists() || file(params.fusioncatcher_ref).isEmpty() || + !file(params.fusioncatcher_ref_stub_check).exists() || file(params.fusioncatcher_ref_stub_check).isEmpty() )) { + FUSIONCATCHER_BUILD(params.genome_gencode_version) + ch_versions = ch_versions.mix(FUSIONCATCHER_BUILD.out.versions) + ch_fusioncatcher_ref = FUSIONCATCHER_BUILD.out.reference + } + else { + ch_fusioncatcher_ref = Channel.fromPath(params.fusioncatcher_ref) + } + + + if ((params.starfusion || params.all) && + (!file(params.starfusion_ref).exists() || file(params.starfusion_ref).isEmpty() || + !file(params.starfusion_ref_stub_check).exists() || file(params.starfusion_ref_stub_check).isEmpty() )) { + STARFUSION_BUILD(ch_fasta, ch_gtf, params.fusion_annot_lib, params.species) + ch_versions = ch_versions.mix(STARFUSION_BUILD.out.versions) + ch_starfusion_ref = STARFUSION_BUILD.out.reference + } + else { + ch_starfusion_ref = Channel.fromPath(params.starfusion_ref) + } + + + if ((params.fusionreport || params.all) && + (!file(params.fusionreport_ref).exists() || file(params.fusionreport_ref).isEmpty() || + !file(params.fusionreport_ref_stub_check).exists() || file(params.fusionreport_ref_stub_check).isEmpty())) { + if (!params.no_cosmic && (!params.cosmic_username || !params.cosmic_passwd)) { exit 1, 'COSMIC username and/or password missing' } + FUSIONREPORT_DOWNLOAD() + ch_versions = ch_versions.mix(FUSIONREPORT_DOWNLOAD.out.versions) + ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD.out.fusionreport_ref + } else { + ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { that -> [[id:that.Name], that] } + } + + emit: + ch_fasta + ch_gtf + ch_fai + ch_hgnc_ref + ch_hgnc_date + ch_rrna_interval + ch_refflat + ch_salmon_index + ch_starindex_ref + ch_arriba_ref_blacklist + ch_arriba_ref_cytobands + ch_arriba_ref_known_fusions + ch_arriba_ref_protein_domains + ch_fusioncatcher_ref + ch_starfusion_ref + ch_fusionreport_ref + versions = ch_versions +} + +/* +======================================================================================== + THE END +======================================================================================== +*/ diff --git a/subworkflows/local/ctatsplicing_workflow/main.nf b/subworkflows/local/ctatsplicing_workflow/main.nf new file mode 100644 index 00000000..8b279033 --- /dev/null +++ b/subworkflows/local/ctatsplicing_workflow/main.nf @@ -0,0 +1,31 @@ +include { CTATSPLICING_STARTOCANCERINTRONS } from '../../../modules/local/ctatsplicing/startocancerintrons' + +workflow CTATSPLICING_WORKFLOW { + take: + split_junctions // [ val(meta), path(split_junctions.SJ.out.tab) ] + junctions // [ val(meta), path(junctions.Chimeric.out.junction) ] + aligned_bams // [ val(meta), path(aligned_bams.Aligned.sortedByCoord.out.bam) ] + ctat_genome_lib // [ val(meta2), path(path/to/ctat_genome_lib) ] + + main: + def ch_versions = Channel.empty() + + if (params.ctatsplicing || params.all) { + def ch_ctatsplicing_input = split_junctions + .join(junctions, failOnMismatch:true, failOnDuplicate:true) + .join(aligned_bams, failOnMismatch:true, failOnDuplicate:true) + .map { meta, split_junction, junction, bam -> + [ meta, split_junction, junction, bam, [] ] + } + + CTATSPLICING_STARTOCANCERINTRONS( + ch_ctatsplicing_input, + ctat_genome_lib + ) + ch_versions = ch_versions.mix(CTATSPLICING_STARTOCANCERINTRONS.out.versions.first()) + + } + + emit: + versions = ch_versions +} diff --git a/subworkflows/local/fusioncatcher_workflow.nf b/subworkflows/local/fusioncatcher_workflow.nf new file mode 100644 index 00000000..97ed55de --- /dev/null +++ b/subworkflows/local/fusioncatcher_workflow.nf @@ -0,0 +1,34 @@ +include { FUSIONCATCHER } from '../../modules/local/fusioncatcher/detect/main' + + +workflow FUSIONCATCHER_WORKFLOW { + take: + reads + + main: + ch_versions = Channel.empty() + ch_dummy_file = file("$baseDir/assets/dummy_file_fusioncatcher.txt", checkIfExists: true) + + if ((params.fusioncatcher || params.all) && !params.fusioninspector_only) { + if (params.fusioncatcher_fusions){ + ch_fusioncatcher_fusions = reads.combine(Channel.value(file(params.fusioncatcher_fusions, checkIfExists:true))) + .map { it -> [ it[0], it[1] ] } + } else { + FUSIONCATCHER ( + reads, + params.fusioncatcher_ref + ) + ch_fusioncatcher_fusions = FUSIONCATCHER.out.fusions + ch_versions = ch_versions.mix(FUSIONCATCHER.out.versions) + } + } + else { + ch_fusioncatcher_fusions = reads.combine(Channel.value(file(ch_dummy_file, checkIfExists:true))) + .map { it -> [ it[0], it[1] ] } + } + + emit: + fusions = ch_fusioncatcher_fusions + versions = ch_versions + } + diff --git a/subworkflows/local/fusioninspector_workflow.nf b/subworkflows/local/fusioninspector_workflow.nf new file mode 100644 index 00000000..f521ae8c --- /dev/null +++ b/subworkflows/local/fusioninspector_workflow.nf @@ -0,0 +1,65 @@ +include { AGAT_CONVERTSPGFF2TSV } from '../../modules/nf-core/agat/convertspgff2tsv/main' +include { ARRIBA_VISUALISATION } from '../../modules/local/arriba/visualisation/main' +include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' +include { VCF_COLLECT } from '../../modules/local/vcf_collect/main' +include { FUSIONINSPECTOR } from '../../modules/local/fusioninspector/main' + +workflow FUSIONINSPECTOR_WORKFLOW { + take: + reads + fusion_list + fusion_list_filtered + fusionreport_out + fusionreport_csv + bam_sorted_indexed + ch_gtf + ch_arriba_ref_protein_domains + ch_arriba_ref_cytobands + ch_hgnc_ref + ch_hgnc_date + + main: + ch_versions = Channel.empty() + ch_arriba_visualisation = Channel.empty() + index ="${params.starfusion_ref}" + + ch_fusion_list = ( params.tools_cutoff > 1 ? fusion_list_filtered : fusion_list ) + .branch{ + no_fusions: it[1].size() == 0 + fusions: it[1].size() > 0 + } + + if (params.whitelist) { + ch_whitelist = ch_fusion_list.fusions.combine(Channel.value(file(params.whitelist, checkIfExists:true))) + .map { meta, fusions, whitelist -> [ meta, [fusions, whitelist] ] } + + CAT_CAT(ch_whitelist) // fusioninspector takes care of possible duplicates + ch_versions = ch_versions.mix(CAT_CAT.out.versions) + ch_reads_fusion = reads.join(CAT_CAT.out.file_out ) + } + else { + ch_reads_fusion = reads.join(ch_fusion_list.fusions ) + } + + FUSIONINSPECTOR( ch_reads_fusion, index) + ch_versions = ch_versions.mix(FUSIONINSPECTOR.out.versions) + + AGAT_CONVERTSPGFF2TSV(FUSIONINSPECTOR.out.out_gtf) + ch_versions = ch_versions.mix(AGAT_CONVERTSPGFF2TSV.out.versions) + + fusion_data = FUSIONINSPECTOR.out.tsv_coding_effect.join(AGAT_CONVERTSPGFF2TSV.out.tsv).join(fusionreport_out).join(fusionreport_csv) + VCF_COLLECT(fusion_data, ch_hgnc_ref, ch_hgnc_date) + ch_versions = ch_versions.mix(VCF_COLLECT.out.versions) + + if ((params.starfusion || params.all || params.stringtie) && !params.fusioninspector_only && !params.skip_vis) { + ch_bam_sorted_indexed_fusions = bam_sorted_indexed.join(FUSIONINSPECTOR.out.tsv) + ARRIBA_VISUALISATION(ch_bam_sorted_indexed_fusions, ch_gtf, ch_arriba_ref_protein_domains, ch_arriba_ref_cytobands) + ch_versions = ch_versions.mix(ARRIBA_VISUALISATION.out.versions) + ch_arriba_visualisation = ARRIBA_VISUALISATION.out.pdf + } + + emit: + ch_arriba_visualisation + versions = ch_versions +} + diff --git a/subworkflows/local/fusionreport_workflow.nf b/subworkflows/local/fusionreport_workflow.nf new file mode 100644 index 00000000..04019e12 --- /dev/null +++ b/subworkflows/local/fusionreport_workflow.nf @@ -0,0 +1,44 @@ +include { FUSIONREPORT } from '../../modules/local/fusionreport/detect/main' + + +workflow FUSIONREPORT_WORKFLOW { + take: + reads + fusionreport_ref + arriba_fusions + starfusion_fusions + fusioncatcher_fusions + + main: + ch_versions = Channel.empty() + ch_report = Channel.empty() + ch_csv = Channel.empty() + + if (!params.fusioninspector_only) { + reads_fusions = reads + .join(arriba_fusions, failOnMismatch:true, failOnDuplicate:true) + .join(starfusion_fusions, failOnMismatch:true, failOnDuplicate:true) + .join(fusioncatcher_fusions, failOnMismatch:true, failOnDuplicate:true) + + FUSIONREPORT(reads_fusions, fusionreport_ref, params.tools_cutoff) + ch_fusion_list = FUSIONREPORT.out.fusion_list + ch_fusion_list_filtered = FUSIONREPORT.out.fusion_list_filtered + ch_versions = ch_versions.mix(FUSIONREPORT.out.versions) + ch_report = FUSIONREPORT.out.report + ch_csv = FUSIONREPORT.out.csv + } else { + ch_fusion_list = reads.combine(Channel.value(file(params.fusioninspector_fusions, checkIfExists:true))) + .map { it -> [ it[0], it[1] ] } + + ch_fusion_list_filtered = ch_fusion_list + } + + emit: + versions = ch_versions + fusion_list = ch_fusion_list + fusion_list_filtered = ch_fusion_list_filtered + report = ch_report.ifEmpty(null) + csv = ch_csv.ifEmpty(null) + +} + diff --git a/subworkflows/local/qc_workflow/main.nf b/subworkflows/local/qc_workflow/main.nf new file mode 100644 index 00000000..4b635242 --- /dev/null +++ b/subworkflows/local/qc_workflow/main.nf @@ -0,0 +1,39 @@ +// +// Extract descriptive values from BAMs +// + +include { PICARD_COLLECTRNASEQMETRICS } from '../../../modules/nf-core/picard/collectrnaseqmetrics' +include { GATK4_MARKDUPLICATES } from '../../../modules/nf-core/gatk4/markduplicates' +include { PICARD_COLLECTINSERTSIZEMETRICS } from '../../../modules/nf-core/picard/collectinsertsizemetrics' + +workflow QC_WORKFLOW { + take: + ch_bam_sorted // channel [ meta, bam ] + ch_refflat // channel [ meta, refflat ] + ch_fasta // channel [ meta, fasta ] + ch_fai // channel [ meta, fai ] + ch_rrna_interval // channel [ meta, interval ] + + main: + ch_versions = Channel.empty() + + PICARD_COLLECTRNASEQMETRICS(ch_bam_sorted, ch_refflat.map{ meta, refflat -> [ refflat ] }, ch_fasta.map{ meta, fasta -> [ fasta ] }, ch_rrna_interval.map{ meta, intervals -> [ intervals ] }.ifEmpty([]) ) // Some chromosome or annotation may not have rRNA genes + ch_versions = ch_versions.mix(PICARD_COLLECTRNASEQMETRICS.out.versions) + ch_rnaseq_metrics = PICARD_COLLECTRNASEQMETRICS.out.metrics + + GATK4_MARKDUPLICATES(ch_bam_sorted, ch_fasta.map { meta, fasta -> [ fasta ]}, ch_fai.map { meta, fasta_fai -> [ fasta_fai ]}) + ch_versions = ch_versions.mix(GATK4_MARKDUPLICATES.out.versions) + ch_duplicate_metrics = GATK4_MARKDUPLICATES.out.metrics + + PICARD_COLLECTINSERTSIZEMETRICS(ch_bam_sorted) + ch_versions = ch_versions.mix(PICARD_COLLECTINSERTSIZEMETRICS.out.versions) + ch_insertsize_metrics = PICARD_COLLECTINSERTSIZEMETRICS.out.metrics + + emit: + versions = ch_versions // channel [ path ] + rnaseq_metrics = ch_rnaseq_metrics // channel [ meta, path ] + duplicate_metrics = ch_duplicate_metrics // channel [ meta, path ] + insertsize_metrics = ch_insertsize_metrics // channel [ meta, path ] + +} + diff --git a/subworkflows/local/qc_workflow/test/main.nf.test b/subworkflows/local/qc_workflow/test/main.nf.test new file mode 100644 index 00000000..cfb48e06 --- /dev/null +++ b/subworkflows/local/qc_workflow/test/main.nf.test @@ -0,0 +1,127 @@ +nextflow_workflow { + + name "Test Subworkflow QC_WORKFLOW" + script "../main.nf" + config "./nextflow.config" + workflow "QC_WORKFLOW" + tag "qc" + tag "subworkflow" + + test("QC_WORKFLOW - Homo sapiens chr22") { + + // Generate refflat file + setup { + + // Create refflat reference + run("UCSC_GTFTOGENEPRED") { + script "../../../../modules/nf-core/ucsc/gtftogenepred/main.nf" + process { + """ + input[0] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.gtf", + checkIfExists: true + ) + .map{ [ [id:it.Name], it ] } + """ + } + } + + // Filter GTF to extract rRNA genes + run("RRNATRANSCRIPTS") { + script "../../../../modules/nf-core/rrnatranscripts/main.nf" + process { + """ + input[0] = Channel.fromPath("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.gtf", checkIfExists: true) + """ + } + } + + // Convert rRNA GTF to BED + run("BEDOPS_CONVERT2BED") { + script "../../../../modules/nf-core/bedops/convert2bed/main.nf" + process { + """ + input[0] = RRNATRANSCRIPTS.out.rrna_gtf.map{ it -> [ [id:it.Name], it ] } + """ + } + } + + // Convert rRNA BED to interval list (the necessary file) + run("GATK4_BEDTOINTERVALLIST") { + script "../../../../modules/nf-core/gatk4/bedtointervallist/main.nf" + process { + """ + input[0] = BEDOPS_CONVERT2BED.out.bed + input[1] = Channel.of( + [ + [id: 'chr22_dic'], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.dic", checkIfExist: true) + ] + ) + """ + } + } + + } + + when { + // Params to activate modules ext.when condition + params { + skip_qc = false + fusioninspector_only = false + starfusion = true + all = true + } + + workflow { + """ + // ch_bam_sorted + input[0] = Channel.of( + [ + [id: "chr22_bam"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + + // ch_chrgtf + input[1] = Channel.of( + [ + [ id: "chr22_gtf" ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.gtf", checkIfExists: true) + ]) + + // ch_refflat + input[2] = UCSC_GTFTOGENEPRED.out.refflat + + // ch_fasta + input[3] = Channel.of( + [ + [ id: "test_ref" ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.fasta", checkIfExist: true) + ] ) + + // ch_fai + input[4] = Channel.of( + [ + [ id: "test_ref" ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExist: true) + ] ) + + // ch rRNA interval list + input[5] = GATK4_BEDTOINTERVALLIST.out.interval_list + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(file( workflow.out.versions[0] )).match('versions' ) }, + { assert snapshot(file( workflow.out.rnaseq_metrics[0][1] ).readLines()[4..-1]).md5().match('rnaseq_metrics' ) }, + { assert snapshot(file( workflow.out.duplicate_metrics[0][1] ).readLines()[4..-1]).md5().match('duplicate_metrics' ) }, + { assert snapshot(file( workflow.out.insertsize_metrics[0][1] ).readLines()[4..-1]).md5().match('insertsize_metrics') } + ) + } + } + +} diff --git a/subworkflows/local/qc_workflow/test/main.nf.test.snap b/subworkflows/local/qc_workflow/test/main.nf.test.snap new file mode 100644 index 00000000..b56c12b9 --- /dev/null +++ b/subworkflows/local/qc_workflow/test/main.nf.test.snap @@ -0,0 +1,36 @@ +{ + "duplicate_metrics": { + "content": "651d8a4702f9f9871e94afbce3e50e34", + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T19:29:09.711230835" + }, + "versions": { + "content": [ + "versions.yml:md5,3f13b395c67e317f74194b3b6c89f139" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T19:29:09.686297468" + }, + "rnaseq_metrics": { + "content": "84a348c3735ed2f6c47f346eeed661f4", + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T19:29:09.704632753" + }, + "insertsize_metrics": { + "content": "160db81b19843c4d46fe74ac61f9f013", + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T19:29:09.717929716" + } +} \ No newline at end of file diff --git a/subworkflows/local/qc_workflow/test/nextflow.config b/subworkflows/local/qc_workflow/test/nextflow.config new file mode 100644 index 00000000..498c456f --- /dev/null +++ b/subworkflows/local/qc_workflow/test/nextflow.config @@ -0,0 +1,10 @@ +process { + withName: PICARD_COLLECTRNASEQMETRICS { + ext.args = "--STRAND_SPECIFICITY SECOND_READ_TRANSCRIPTION_STRAND" + } + + withName: GATK4_BEDTOINTERVALLIST { + ext.args = "--KEEP_LENGTH_ZERO_INTERVALS true" + } + +} diff --git a/subworkflows/local/starfusion_workflow/main.nf b/subworkflows/local/starfusion_workflow/main.nf new file mode 100644 index 00000000..938832c9 --- /dev/null +++ b/subworkflows/local/starfusion_workflow/main.nf @@ -0,0 +1,80 @@ +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_STARFUSION } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_STARFUSION_CRAM } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_STARFUSION } from '../../../modules/nf-core/samtools/view/main' +include { STAR_ALIGN as STAR_FOR_STARFUSION } from '../../../modules/nf-core/star/align/main' +include { STARFUSION } from '../../../modules/local/starfusion/detect/main' +include { CTATSPLICING_WORKFLOW } from '../ctatsplicing_workflow' + +workflow STARFUSION_WORKFLOW { + take: + reads + ch_gtf + ch_starindex_ref + ch_fasta + ch_starfusion_ref + + main: + def ch_versions = Channel.empty() + def ch_align = Channel.empty() + def ch_starfusion_fusions = Channel.empty() + def bam_sorted_indexed = Channel.empty() + + ch_dummy_file = file("$baseDir/assets/dummy_file_starfusion.txt", checkIfExists: true) + + if ((params.starfusion || params.all || params.stringtie) && !params.fusioninspector_only) { + if (params.starfusion_fusions){ + ch_starfusion_fusions = reads.combine(Channel.value(file(params.starfusion_fusions, checkIfExists:true))) + .map { it -> [ it[0], it[2] ] } + } else { + STAR_FOR_STARFUSION( reads, ch_starindex_ref, ch_gtf, params.star_ignore_sjdbgtf, '', params.seq_center ?: '') + ch_versions = ch_versions.mix(STAR_FOR_STARFUSION.out.versions) + ch_align = STAR_FOR_STARFUSION.out.bam_sorted // TODO: This does not seem to be captured and used as the output is bam_sorted_aligned and not bam_sorted + + + SAMTOOLS_INDEX_FOR_STARFUSION(STAR_FOR_STARFUSION.out.bam_sorted) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_STARFUSION.out.versions) + bam_sorted_indexed = STAR_FOR_STARFUSION.out.bam_sorted.join(SAMTOOLS_INDEX_FOR_STARFUSION.out.bai) + reads_junction = reads.join(STAR_FOR_STARFUSION.out.junction ) // TODO: This join is not needed as STARFUSION can simply read from the junction file: https://github.com/STAR-Fusion/STAR-Fusion/wiki#alternatively-kickstart-mode-running-star-yourself-and-then-running-star-fusion-using-the-existing-outputs + + if (params.ctatsplicing || params.all) { + CTATSPLICING_WORKFLOW( + STAR_FOR_STARFUSION.out.spl_junc_tab, + STAR_FOR_STARFUSION.out.junction, + STAR_FOR_STARFUSION.out.bam, + ch_starfusion_ref + ) + ch_versions = ch_versions.mix(CTATSPLICING_WORKFLOW.out.versions) + } + + if (params.cram.contains('starfusion')){ + SAMTOOLS_VIEW_FOR_STARFUSION (bam_sorted_indexed, ch_fasta, [] ) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_STARFUSION.out.versions) + + SAMTOOLS_INDEX_FOR_STARFUSION_CRAM (SAMTOOLS_VIEW_FOR_STARFUSION.out.cram) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_STARFUSION_CRAM.out.versions) + } + if (params.starfusion || params.all){ + STARFUSION( reads_junction, ch_starfusion_ref.map { it -> it[1] }) + ch_versions = ch_versions.mix(STARFUSION.out.versions) + ch_starfusion_fusions = STARFUSION.out.fusions + } + + ch_star_stats = STAR_FOR_STARFUSION.out.log_final + ch_star_gene_count = STAR_FOR_STARFUSION.out.read_per_gene_tab + } + } + else { + ch_starfusion_fusions = reads.combine(Channel.value(file(ch_dummy_file, checkIfExists:true))) + .map { it -> [ it[0], it[2] ] } + ch_star_stats = Channel.empty() + ch_star_gene_count = Channel.empty() + } + emit: + fusions = ch_starfusion_fusions + star_stats = ch_star_stats + star_gene_count = ch_star_gene_count + ch_bam_sorted = ch_align + ch_bam_sorted_indexed = bam_sorted_indexed + versions = ch_versions + } + diff --git a/subworkflows/local/starfusion_workflow/tests/main.nf.test b/subworkflows/local/starfusion_workflow/tests/main.nf.test new file mode 100644 index 00000000..ac0e7798 --- /dev/null +++ b/subworkflows/local/starfusion_workflow/tests/main.nf.test @@ -0,0 +1,115 @@ +nextflow_workflow { + + name "Test Subworkflow STARFUSION_WORKFLOW" + script "../main.nf" + workflow "STARFUSION_WORKFLOW" + tag "subworkflow" + tag "star" + tag "star/genomegenerate" + tag "star/align" + + + test("STARFUSION_WORKFLOW - Homo sapiens - FASTQs minigenome") { + config './nextflow.config' + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../../modules/nf-core/star/genomegenerate/main.nf" + process { + """ + // FASTA + input[0] = Channel.fromPath( + "https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + + // GTF + input[1] = Channel.fromPath( + "https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + """ + } + } + + run("STARFUSION_BUILD") { + script "../../../../modules/local/starfusion/build/main.nf" + process { + """ + input[0] = [ + [ id:'minigenome fasta' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa") + ] + input[1] = [ + [ id:'minigenome gtf' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf") + ] + + input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz") + input [3] = "human" + """ + } + } + } + + when { + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [ id: "test_fastqs" ], + [ + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_1.fastq.gz", checkIfExists: true), + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_2.fastq.gz", checkIfExists: true) + ] + ] ) + + // ch_gtf + input[1] = + Channel.fromPath( + "https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_starindex_ref + input[2] = STAR_GENOMEGENERATE.out.index + + // ch_fasta + input[3] = + Channel.fromPath( + "https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_starfusion_ref + input[4] = STARFUSION_BUILD.out.reference + + """ + } + params { + starfusion = true + cram = 'starfusion' + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.fusions, + file(workflow.out.star_stats[0][1]).name, + workflow.out.star_gene_count, + workflow.out.ch_bam_sorted, + workflow.out.ch_bam_sorted_indexed, + workflow.out.versions + ).match() } + ) + } + } + + + + +} diff --git a/subworkflows/local/starfusion_workflow/tests/main.nf.test.snap b/subworkflows/local/starfusion_workflow/tests/main.nf.test.snap new file mode 100644 index 00000000..5bc897e7 --- /dev/null +++ b/subworkflows/local/starfusion_workflow/tests/main.nf.test.snap @@ -0,0 +1,38 @@ +{ + "STARFUSION_WORKFLOW - Homo sapiens - FASTQs minigenome": { + "content": [ + [ + [ + { + "id": "test_fastqs" + }, + "test_fastqs.starfusion.fusion_predictions.tsv:md5,abe17134a231642edf9351e4964e8a97" + ] + ], + "test_fastqs.Log.final.out", + [ + [ + { + "id": "test_fastqs" + }, + "test_fastqs.ReadsPerGene.out.tab:md5,8e0d42deeea09924d5c7ba3147bbfd78" + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,afde4f2fd6056df81e322b3c35ab7a8a", + "versions.yml:md5,e53f1ec32bc78a33f99892e42274833a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-23T18:25:52.838411738" + } +} \ No newline at end of file diff --git a/subworkflows/local/starfusion_workflow/tests/nextflow.config b/subworkflows/local/starfusion_workflow/tests/nextflow.config new file mode 100644 index 00000000..718add69 --- /dev/null +++ b/subworkflows/local/starfusion_workflow/tests/nextflow.config @@ -0,0 +1,37 @@ +process { + withName: 'STAR_FOR_STARFUSION' { + ext.args = '--twopassMode Basic \ + --outReadsUnmapped None \ + --readFilesCommand zcat \ + --outSAMtype BAM SortedByCoordinate \ + --outSAMstrandField intronMotif \ + --outSAMunmapped Within \ + --chimSegmentMin 12 \ + --chimJunctionOverhangMin 8 \ + --chimOutJunctionFormat 1 \ + --alignSJDBoverhangMin 10 \ + --alignMatesGapMax 100000 \ + --alignIntronMax 100000 \ + --alignSJstitchMismatchNmax 5 -1 5 5 \ + --chimMultimapScoreRange 3 \ + --chimScoreJunctionNonGTAG -4 \ + --chimMultimapNmax 20 \ + --chimNonchimScoreDropMin 10 \ + --peOverlapNbasesMin 12 \ + --peOverlapMMp 0.1 \ + --alignInsertionFlush Right \ + --alignSplicedMateMapLminOverLmate 0 \ + --alignSplicedMateMapLmin 30 \ + --chimOutType Junctions \ + --quantMode GeneCounts' + } + + withName: 'SAMTOOLS_INDEX_FOR_STARFUSION_CRAM' { + ext.prefix = { "${meta.id}.star_for_starfusion.Aligned.sortedByCoord.out" } + } + + withName: 'SAMTOOLS_VIEW_FOR_STARFUSION' { + ext.args = { "--output-fmt cram" } + ext.prefix = { "${meta.id}.star_for_starfusion.Aligned.sortedByCoord.out" } + } +} diff --git a/subworkflows/local/stringtie_workflow.nf b/subworkflows/local/stringtie_workflow.nf new file mode 100644 index 00000000..d66ec0ee --- /dev/null +++ b/subworkflows/local/stringtie_workflow.nf @@ -0,0 +1,36 @@ +include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/stringtie/main' +include { STRINGTIE_MERGE } from '../../modules/nf-core/stringtie/merge/main' + + +workflow STRINGTIE_WORKFLOW { + take: + bam_sorted + ch_gtf + + main: + ch_versions = Channel.empty() + ch_stringtie_gtf = Channel.empty() + + if ((params.stringtie || params.all) && !params.fusioninspector_only) { + STRINGTIE_STRINGTIE(bam_sorted, ch_gtf.map { meta, gtf -> [ gtf ]}) + ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions) + + STRINGTIE_STRINGTIE + .out + .transcript_gtf + .map { it -> it[1] } + .set { stringtie_gtf } + ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions) + + + STRINGTIE_MERGE (stringtie_gtf, ch_gtf.map { meta, gtf -> [ gtf ]}) + ch_versions = ch_versions.mix(STRINGTIE_MERGE.out.versions) + ch_stringtie_gtf = STRINGTIE_MERGE.out.gtf + } + + emit: + stringtie_gtf = ch_stringtie_gtf.ifEmpty(null) + versions = ch_versions + + } + diff --git a/subworkflows/local/stringtie_workflow/main.nf b/subworkflows/local/stringtie_workflow/main.nf new file mode 100644 index 00000000..25ff26ee --- /dev/null +++ b/subworkflows/local/stringtie_workflow/main.nf @@ -0,0 +1,36 @@ +include { STRINGTIE_STRINGTIE } from '../../../modules/nf-core/stringtie/stringtie/main' +include { STRINGTIE_MERGE } from '../../../modules/nf-core/stringtie/merge/main' + + +workflow STRINGTIE_WORKFLOW { + take: + bam_sorted // channel: [meta, bam] + ch_chrgtf // channel: [meta, gtf] + + main: + ch_versions = Channel.empty() + ch_stringtie_gtf = Channel.empty() + + if ((params.stringtie || params.all) && !params.fusioninspector_only) { + STRINGTIE_STRINGTIE(bam_sorted, ch_chrgtf.map { meta, gtf -> [ gtf ]}) + ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions) + + STRINGTIE_STRINGTIE + .out + .transcript_gtf + .map { it -> it[1] } + .set { stringtie_gtf } + ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions) + + + STRINGTIE_MERGE (stringtie_gtf, ch_chrgtf.map { meta, gtf -> [ gtf ]}) + ch_versions = ch_versions.mix(STRINGTIE_MERGE.out.versions) + ch_stringtie_gtf = STRINGTIE_MERGE.out.gtf + } + + emit: + stringtie_gtf = ch_stringtie_gtf.ifEmpty(null) // channel: [meta, gtf] + versions = ch_versions // channel: [ path(versions.yml) ] + + } + diff --git a/subworkflows/local/stringtie_workflow/tests/main.nf.test b/subworkflows/local/stringtie_workflow/tests/main.nf.test new file mode 100644 index 00000000..da423ed5 --- /dev/null +++ b/subworkflows/local/stringtie_workflow/tests/main.nf.test @@ -0,0 +1,70 @@ +nextflow_workflow { + + name "Test STRINGTIE_WORKFLOW" + script "../main.nf" + workflow "STRINGTIE_WORKFLOW" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "stringtie" + + test("Should run stringtie workflow with stringtie = true") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', strandedness:'reverse' ], // meta map + file("${params.test_data_base}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'test' ], // meta map + file("${params.test_data_base}/genomics/homo_sapiens/genome/genome.gtf", checkIfExists: true) + ]) + """ + } + params { + stringtie = true + outdir = "$outputDir" + test_data_base = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() }, + ) + } + } + + test("Should run stringtie workflow with params.all = true") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', strandedness:'reverse' ], // meta map + file("${params.test_data_base}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'test' ], // meta map + file("${params.test_data_base}/genomics/homo_sapiens/genome/genome.gtf", checkIfExists: true) + ]) + """ + } + params { + all = true + outdir = "$outputDir" + test_data_base = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() }, + ) + } + } + +} diff --git a/subworkflows/local/stringtie_workflow/tests/main.nf.test.snap b/subworkflows/local/stringtie_workflow/tests/main.nf.test.snap new file mode 100644 index 00000000..791c85e4 --- /dev/null +++ b/subworkflows/local/stringtie_workflow/tests/main.nf.test.snap @@ -0,0 +1,56 @@ +{ + "Should run stringtie workflow with params.all = true": { + "content": [ + { + "0": [ + "stringtie.merged.gtf:md5,6da479298d73d5b3216d4e1576a2bdf4" + ], + "1": [ + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,be2acf8efb04dc33562c9d00df7c3a50" + ], + "stringtie_gtf": [ + "stringtie.merged.gtf:md5,6da479298d73d5b3216d4e1576a2bdf4" + ], + "versions": [ + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,be2acf8efb04dc33562c9d00df7c3a50" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T17:51:10.73612885" + }, + "Should run stringtie workflow with stringtie = true": { + "content": [ + { + "0": [ + "stringtie.merged.gtf:md5,6da479298d73d5b3216d4e1576a2bdf4" + ], + "1": [ + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,be2acf8efb04dc33562c9d00df7c3a50" + ], + "stringtie_gtf": [ + "stringtie.merged.gtf:md5,6da479298d73d5b3216d4e1576a2bdf4" + ], + "versions": [ + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,be2acf8efb04dc33562c9d00df7c3a50" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T17:50:57.437102535" + } +} \ No newline at end of file diff --git a/subworkflows/local/trim_workflow/main.nf b/subworkflows/local/trim_workflow/main.nf new file mode 100644 index 00000000..d548c8ae --- /dev/null +++ b/subworkflows/local/trim_workflow/main.nf @@ -0,0 +1,46 @@ + + +include { FASTP } from '../../../modules/nf-core/fastp/main' +include { FASTQC as FASTQC_FOR_FASTP } from '../../../modules/nf-core/fastqc/main' + +workflow TRIM_WORKFLOW { + + take: + reads // channel [ meta, [ fastq files ] ] + adapter_fasta // channel [ path ] + fastp_trim // boolean + + main: + ch_versions = Channel.empty() + ch_fastp_html = Channel.empty() + ch_fastp_json = Channel.empty() + ch_fastqc_trimmed = Channel.empty() + + if ( fastp_trim ) { + FASTP(reads, adapter_fasta.ifEmpty( [] ), false, false, false) + ch_versions = ch_versions.mix(FASTP.out.versions) + + FASTQC_FOR_FASTP(FASTP.out.reads) + ch_versions = ch_versions.mix(FASTQC_FOR_FASTP.out.versions) + + ch_reads_all = FASTP.out.reads + ch_reads_fusioncatcher = ch_reads_all + ch_fastp_html = FASTP.out.html + ch_fastp_json = FASTP.out.json + ch_fastqc_trimmed = FASTQC_FOR_FASTP.out.zip + + } + else { + ch_reads_all = reads + ch_reads_fusioncatcher = reads + } + + emit: + ch_reads_all // Channel [ meta, [reads] ] + ch_reads_fusioncatcher // Channel [ meta, [reads] ] + ch_fastp_html // Channel [ meta, path_html ] + ch_fastp_json // Channel [ meta, path_json ] + ch_fastqc_trimmed // Channel [ meta, path_zip ] + versions = ch_versions // Channel [ versions ] + } + diff --git a/subworkflows/local/trim_workflow/tests/main.nf.test b/subworkflows/local/trim_workflow/tests/main.nf.test new file mode 100644 index 00000000..2f7568d6 --- /dev/null +++ b/subworkflows/local/trim_workflow/tests/main.nf.test @@ -0,0 +1,125 @@ +nextflow_workflow { + + name "Test Subworkflow QC_WORKFLOW" + script "../main.nf" + workflow "TRIM_WORKFLOW" + tag "qc" + tag "subworkflow" + tag "fastqc" + tag "fastp" + + test("TRIM_WORKFLOW - Homo sapiens - FASTQs - fastp == true") { + + when { + + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [id: "test_fastq"], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = Channel.empty() + input[2] = true + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { with(workflow.out) { + assert snapshot( + versions.collect{ file(it) }, + ch_reads_all[0][1].collect { file(it) }, + ch_reads_fusioncatcher[0][1].collect { file(it) }, + file(ch_fastp_html[0][1]).name, + file(ch_fastp_json[0][1]), + ch_fastqc_trimmed[0][1].collect { file(it).name } + ).match() } + } + ) + } + } + + test("TRIM_WORKFLOW - Homo sapiens - FASTQs - fastp == true + fasp_adaptors") { + + when { + + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [id: "test_fastq"], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/delete_me/fastp/adapters.fasta', checkIfExists: true) + input[2] = true + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { with(workflow.out) { + assert snapshot( + versions.collect{ file(it) }, + ch_reads_all[0][1].collect { file(it) }, + ch_reads_fusioncatcher[0][1].collect { file(it) }, + file(ch_fastp_html[0][1]).name, + file(ch_fastp_json[0][1]), + ch_fastqc_trimmed[0][1].collect { file(it).name } + ).match() } + } + ) + } + } + + test("TRIM_WORKFLOW - Homo sapiens - FASTQs - fastp == false") { + + when { + + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [id: "test_fastq"], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = Channel.empty() + input[2] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { with(workflow.out) { + assert snapshot( + versions.size() == 0, + ch_reads_all[0][1].size() == 2, + ch_reads_fusioncatcher[0][1].size() == 2, + ch_fastp_html.size() == 0, + ch_fastp_json.size() == 0, + ch_fastqc_trimmed.size() == 0 + ).match() } + } + ) + } + } + +} diff --git a/subworkflows/local/trim_workflow/tests/main.nf.test.snap b/subworkflows/local/trim_workflow/tests/main.nf.test.snap new file mode 100644 index 00000000..ae9f9e2e --- /dev/null +++ b/subworkflows/local/trim_workflow/tests/main.nf.test.snap @@ -0,0 +1,71 @@ +{ + "TRIM_WORKFLOW - Homo sapiens - FASTQs - fastp == true": { + "content": [ + [ + "versions.yml:md5,16187796d989b6260f572247e7dc0fc6", + "versions.yml:md5,ea42abe9875f41f8362a55ee7533f102" + ], + [ + "test_fastq_1.fastp.fastq.gz:md5,0c436583301dea48755a5252a2675b64", + "test_fastq_2.fastp.fastq.gz:md5,f7f38138255e63b33286b819b6177612" + ], + [ + "test_fastq_1.fastp.fastq.gz:md5,0c436583301dea48755a5252a2675b64", + "test_fastq_2.fastp.fastq.gz:md5,f7f38138255e63b33286b819b6177612" + ], + "test_fastq.fastp.html", + "test_fastq.fastp.json:md5,62066ad48c3d5981045cdd43e354cb2b", + [ + "test_fastq_trimmed_1_fastqc.zip", + "test_fastq_trimmed_2_fastqc.zip" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T16:21:52.926289296" + }, + "TRIM_WORKFLOW - Homo sapiens - FASTQs - fastp == false": { + "content": [ + true, + true, + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T16:22:45.877168833" + }, + "TRIM_WORKFLOW - Homo sapiens - FASTQs - fastp == true + fasp_adaptors": { + "content": [ + [ + "versions.yml:md5,16187796d989b6260f572247e7dc0fc6", + "versions.yml:md5,ea42abe9875f41f8362a55ee7533f102" + ], + [ + "test_fastq_1.fastp.fastq.gz:md5,adc67a7b4d0bf3520866d7599a4ba814", + "test_fastq_2.fastp.fastq.gz:md5,9ee7d6c5230442970997477464255e67" + ], + [ + "test_fastq_1.fastp.fastq.gz:md5,adc67a7b4d0bf3520866d7599a4ba814", + "test_fastq_2.fastp.fastq.gz:md5,9ee7d6c5230442970997477464255e67" + ], + "test_fastq.fastp.html", + "test_fastq.fastp.json:md5,feb3483311bfa4ded60146f1cbc13fd5", + [ + "test_fastq_trimmed_1_fastqc.zip", + "test_fastq_trimmed_2_fastqc.zip" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T16:22:26.29488483" + } +} \ No newline at end of file diff --git a/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf new file mode 100644 index 00000000..96279699 --- /dev/null +++ b/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf @@ -0,0 +1,298 @@ +// +// Subworkflow with functionality specific to the nf-core/rnafusion pipeline +// + +import groovy.json.JsonSlurper + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW TO INITIALISE PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + UTILS_NFSCHEMA_PLUGIN ( + workflow, + validate_params, + null + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create channel from input file provided through params.input + // + + Channel + .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + .map { + meta, fastq_1, fastq_2, strandedness -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ], strandedness ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ], strandedness ] + } + } + .groupTuple() + .map { samplesheet -> + validateInputSamplesheet(samplesheet) + } + .map { + meta, fastqs -> + return [ meta, fastqs.flatten() ] + } + .set { ch_samplesheet } + + emit: + samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW FOR PIPELINE COMPLETION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail( + summary_params, + email, + email_on_fail, + plaintext_email, + outdir, + monochrome_logs, + multiqc_report.toList() + ) + } + + completionSummary(monochrome_logs) + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() + + if (params.no_cosmic) { + log.warn("Skipping COSMIC DB download from `FUSIONREPORT_DOWNLOAD` and skip using it in `FUSIONREPORT`") + } + + if (params.starfusion_build && !params.fusion_annot_lib) { + error("No fusion annotation library provided. `STARFUSION_BUILD` is unable to run.") + } + +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same strandedness + def strandedness_ok = metas.collect{ it.strandedness }.unique().size == 1 + if (!strandedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must have the same strandedness!: ${metas[0].id}") + } + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} + +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + def manifest_doi = meta.manifest_map.doi.tokenize(",") + manifest_doi.each { doi_ref -> + temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + } + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} + +// +// Function to generate an error if contigs in genome fasta file > 512 Mbp +// +def checkMaxContigSize(fai_file) { + def max_size = 512000000 + fai_file.eachLine { line -> + def lspl = line.split('\t') + def chrom = lspl[0] + def size = lspl[1] + if (size.toInteger() > max_size) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Contig longer than ${max_size}bp found in reference genome!\n\n" + + " ${chrom}: ${size}\n\n" + + " Provide the '--bam_csi_index' parameter to use a CSI instead of BAI index.\n\n" + + " Please see:\n" + + " https://github.com/nf-core/rnaseq/issues/744\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..d6e593e8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info("${workflow.manifest.name} ${getWorkflowVersion()}") + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = groovy.json.JsonOutput.toJson(params) + temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) + + nextflow.extension.FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + def parser = new org.yaml.snakeyaml.Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } + catch (NullPointerException e) { + log.debug(e) + log.warn("Could not verify conda channel configuration.") + return null + } + catch (IOException e) { + log.debug(e) + log.warn("Could not verify conda channel configuration.") + return null + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = required_channels_in_order != channels.findAll { ch -> ch in required_channels_in_order } + + if (channels_missing | channel_priority_violation) { + log.warn """\ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + There is a problem with your Conda configuration! + You will need to set-up the conda-forge and bioconda channels correctly. + Please refer to https://bioconda.github.io/ + The observed channel order is + ${channels} + but the following channel order is required: + ${required_channels_in_order} + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + """.stripIndent(true) + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..02dbf094 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,113 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + expect { + with(workflow) { + assert success + assert "nextflow_workflow v9.9.9" in stdout + } + } + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..a09572e5 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..bfd25876 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,419 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NFCORE_PIPELINE { + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + def valid_config = true as Boolean + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn( + "[${workflow.manifest.name}] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + "Please refer to the quick start section and usage docs for the pipeline.\n " + ) + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error( + "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } + if (nextflow_cli_args[0]) { + log.warn( + "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + def yaml = new org.yaml.snakeyaml.Yaml() + def versions = yaml.load(yaml_file).collectEntries { k, v -> [k.tokenize(':')[-1], v] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + ${workflow.manifest.name}: ${getWorkflowVersion()} + Nextflow: ${workflow.nextflow.version} + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + summary_params + .keySet() + .each { group -> + def group_params = summary_params.get(group) + // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    ${group}

    \n" + summary_section += "
    \n" + group_params + .keySet() + .sort() + .each { param -> + summary_section += "
    ${param}
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + def yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" as String + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + def colorcodes = [:] as Map + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// Return a single report from an object that may be a Path or List +// +def getSingleReport(multiqc_reports) { + if (multiqc_reports instanceof Path) { + return multiqc_reports + } else if (multiqc_reports instanceof List) { + if (multiqc_reports.size() == 0) { + log.warn("[${workflow.manifest.name}] No reports found from process 'MULTIQC'") + return null + } else if (multiqc_reports.size() == 1) { + return multiqc_reports.first() + } else { + log.warn("[${workflow.manifest.name}] Found multiple reports from process 'MULTIQC', will use only one") + return multiqc_reports.first() + } + } else { + return null + } +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[${workflow.manifest.name}] Successful: ${workflow.runName}" + if (!workflow.success) { + subject = "[${workflow.manifest.name}] FAILED: ${workflow.runName}" + } + + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) { + misc_fields['Pipeline repository Git URL'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['Pipeline repository Git Commit'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['Pipeline Git branch/tag'] = workflow.revision + } + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = getSingleReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as MemoryUnit + def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + def colors = logColours(monochrome_logs) as Map + if (email_address) { + try { + if (plaintext_email) { + new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') + } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + ['sendmail', '-t'].execute() << sendmail_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (sendmail)-") + } + catch (Exception msg) { + log.debug(msg.toString()) + log.debug("Trying with mail instead of sendmail") + // Catch failures and try with plaintext + def mail_cmd = ['mail', '-s', subject, '--content-type=text/html', email_address] + mail_cmd.execute() << email_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (mail)-") + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + nextflow.extension.FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html") + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + nextflow.extension.FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt") + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Pipeline completed successfully${colors.reset}-") + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-") + } + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.red} Pipeline completed with errors${colors.reset}-") + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) { + misc_fields['repository'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['commitid'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['revision'] = workflow.revision + } + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection() + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")) + def postRC = post.getResponseCode() + if (!postRC.equals(200)) { + log.warn(post.getErrorStream().getText()) + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..f117040c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,126 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function getSingleReport with a single file") { + function "getSingleReport" + + when { + function { + """ + input[0] = file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert function.result.contains("test.tsv") } + ) + } + } + + test("Test Function getSingleReport with multiple files") { + function "getSingleReport" + + when { + function { + """ + input[0] = [ + file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true), + file(params.modules_testdata_base_path + '/generic/tsv/network.tsv', checkIfExists: true), + file(params.modules_testdata_base_path + '/generic/tsv/expression.tsv', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert function.result.contains("test.tsv") }, + { assert !function.result.contains("network.tsv") }, + { assert !function.result.contains("expression.tsv") } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..02c67014 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,136 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf new file mode 100644 index 00000000..4994303e --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -0,0 +1,46 @@ +// +// Subworkflow that uses the nf-schema plugin to validate parameters and render the parameter summary +// + +include { paramsSummaryLog } from 'plugin/nf-schema' +include { validateParameters } from 'plugin/nf-schema' + +workflow UTILS_NFSCHEMA_PLUGIN { + + take: + input_workflow // workflow: the workflow object used by nf-schema to get metadata from the workflow + validate_params // boolean: validate the parameters + parameters_schema // string: path to the parameters JSON schema. + // this has to be the same as the schema given to `validation.parametersSchema` + // when this input is empty it will automatically use the configured schema or + // "${projectDir}/nextflow_schema.json" as default. This input should not be empty + // for meta pipelines + + main: + + // + // Print parameter summary to stdout. This will display the parameters + // that differ from the default given in the JSON schema + // + if(parameters_schema) { + log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) + } else { + log.info paramsSummaryLog(input_workflow) + } + + // + // Validate the parameters using nextflow_schema.json or the schema + // given via the validation.parametersSchema configuration option + // + if(validate_params) { + if(parameters_schema) { + validateParameters(parameters_schema:parameters_schema) + } else { + validateParameters() + } + } + + emit: + dummy_emit = true +} + diff --git a/subworkflows/nf-core/utils_nfschema_plugin/meta.yml b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml new file mode 100644 index 00000000..f7d9f028 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml @@ -0,0 +1,35 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "utils_nfschema_plugin" +description: Run nf-schema to validate parameters and create a summary of changed parameters +keywords: + - validation + - JSON schema + - plugin + - parameters + - summary +components: [] +input: + - input_workflow: + type: object + description: | + The workflow object of the used pipeline. + This object contains meta data used to create the params summary log + - validate_params: + type: boolean + description: Validate the parameters and error if invalid. + - parameters_schema: + type: string + description: | + Path to the parameters JSON schema. + This has to be the same as the schema given to the `validation.parametersSchema` config + option. When this input is empty it will automatically use the configured schema or + "${projectDir}/nextflow_schema.json" as default. The schema should not be given in this way + for meta pipelines. +output: + - dummy_emit: + type: boolean + description: Dummy emit to make nf-core subworkflows lint happy +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test new file mode 100644 index 00000000..8fb30164 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_workflow { + + name "Test Subworkflow UTILS_NFSCHEMA_PLUGIN" + script "../main.nf" + workflow "UTILS_NFSCHEMA_PLUGIN" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/utils_nfschema_plugin" + tag "plugin/nf-schema" + + config "./nextflow.config" + + test("Should run nothing") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } + + test("Should run nothing - custom schema") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params - custom schema") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config new file mode 100644 index 00000000..0907ac58 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -0,0 +1,8 @@ +plugins { + id "nf-schema@2.1.0" +} + +validation { + parametersSchema = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + monochromeLogs = true +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..331e0d2f --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "$defs": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input_output_options" + }, + { + "$ref": "#/$defs/generic_options" + } + ] +} diff --git a/tests/.nftignore b/tests/.nftignore new file mode 100644 index 00000000..54792ea8 --- /dev/null +++ b/tests/.nftignore @@ -0,0 +1,5 @@ +pipeline_info/*.{html,json,txt,yml} +references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz +references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz +references/ensembl/Homo_sapiens.GRCh38.102.dna.primary_assembly.fa +references/hgnc/HGNC-DB-timestamp.txt diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 00000000..c3f050dc --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,8 @@ +// Limit and standardize resources for github actions and reproducibility +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} diff --git a/tests/test_cosmic.nf.test b/tests/test_cosmic.nf.test new file mode 100644 index 00000000..9a56e76e --- /dev/null +++ b/tests/test_cosmic.nf.test @@ -0,0 +1,101 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + profile "test_cosmic" + tag "pipeline" + tag "pipeline_rnafusion" + tag "test_cosmic" + + test("test cosmic with fastp trim") { + + when { + params { + outdir = "$outputDir" + fastp_trim = true + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + // Ignore files with timestamps in their names + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } + + test("test cosmic no fastp trim") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + // Ignore files with timestamps in their names + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } + + test("test cosmic no fastp trim build") { + + when { + params { + outdir = "$outputDir" + build_references = true + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + // Ignore files with timestamps in their names + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/test_cosmic.nf.test.snap b/tests/test_cosmic.nf.test.snap new file mode 100644 index 00000000..b8e6d4a5 --- /dev/null +++ b/tests/test_cosmic.nf.test.snap @@ -0,0 +1,58 @@ +{ + "test cosmic no fastp trim build": { + "content": [ + 0, + { + "Workflow": { + "nf-core/rnafusion": "v4.0.0dev" + } + }, + [ + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T12:42:49.558429" + }, + "test cosmic no fastp trim": { + "content": [ + 0, + null, + [ + "pipeline_info" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T15:09:53.130295" + }, + "test cosmic with fastp trim": { + "content": [ + 0, + null, + [ + "pipeline_info" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T15:09:29.277399" + } +} \ No newline at end of file diff --git a/tests/test_stub.nf.test b/tests/test_stub.nf.test new file mode 100644 index 00000000..b0601a9f --- /dev/null +++ b/tests/test_stub.nf.test @@ -0,0 +1,73 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + profile "test" + options "-stub" // TODO remove once reference files are available + tag "pipeline" + tag "pipeline_rnafusion" + tag "test_stub" + + test("stub test with fastp trim") { + + when { + params { + outdir = "$outputDir" + fastp_trim = true + fusion_annot_lib = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + // Ignore files with timestamps in their names + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + // def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') //TODO uncomment once -stub is removed + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + // stable_path // TODO uncomment once -stub is removed + ).match() } + ) + } + } + + test("stub test no fastp trim") { + + when { + params { + outdir = "$outputDir" + fusion_annot_lib = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + // Ignore files with timestamps in their names + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + // def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') //TODO uncomment once -stub is removed + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + // stable_path // TODO uncomment once -stub is removed + ).match() } + ) + } + } +} diff --git a/tests/test_stub.nf.test.snap b/tests/test_stub.nf.test.snap new file mode 100644 index 00000000..0ac620e7 --- /dev/null +++ b/tests/test_stub.nf.test.snap @@ -0,0 +1,737 @@ +{ + "stub test no fastp trim": { + "content": [ + 31, + { + "ARRIBA_ARRIBA": { + "arriba": "2.4.0" + }, + "ARRIBA_DOWNLOAD": { + "arriba_download": "2.4.0" + }, + "CTATSPLICING_STARTOCANCERINTRONS": { + "ctat-splicing": "0.0.2" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FUSIONCATCHER": { + "fusioncatcher": 1.35 + }, + "FUSIONCATCHER_BUILD": { + "fusioncatcher": "fusioncatcher.py 1.35" + }, + "FUSIONREPORT": { + "fusion_report": "2.1.5" + }, + "FUSIONREPORT_DOWNLOAD": { + "fusion_report": "2.1.5" + }, + "GATK4_BEDTOINTERVALLIST": { + "gatk4": "4.6.1.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.5.0.0", + "samtools": "1.19.2" + }, + "GENCODE_DOWNLOAD": { + "wget": null + }, + "GET_RRNA_TRANSCRIPTS": { + "get_rrna_transcripts": "v1.0" + }, + "GFFREAD": { + "gffread": "0.12.7" + }, + "GTF_TO_REFFLAT": { + "gtfToGenePred": 377 + }, + "HGNC_DOWNLOAD": { + "wget": null + }, + "PICARD_COLLECTINSERTSIZEMETRICS": { + "picard": "3.3.0" + }, + "PICARD_COLLECTRNASEQMETRICS": { + "picard": "3.3.0" + }, + "SALMON_INDEX": { + "salmon": "1.10.3" + }, + "SALMON_QUANT": { + "salmon": "1.10.3" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "SAMTOOLS_INDEX_FOR_STARFUSION": { + "samtools": 1.21 + }, + "STARFUSION": { + "STAR-Fusion": "1.14.0" + }, + "STARFUSION_BUILD": { + "STAR-Fusion": "1.14.0" + }, + "STAR_FOR_ARRIBA": { + "star": "2.7.11b", + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_FOR_STARFUSION": { + "star": "2.7.11b", + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_GENOMEGENERATE": { + "star": "2.7.11b", + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STRINGTIE_MERGE": { + "stringtie": "2.2.1" + }, + "STRINGTIE_STRINGTIE": { + "stringtie": "2.2.3" + }, + "Workflow": { + "nf-core/rnafusion": "v4.0.0dev" + } + }, + [ + "arriba", + "arriba/test.arriba.fusions.discarded.tsv", + "arriba/test.arriba.fusions.tsv", + "ctatsplicing", + "ctatsplicing/arriba", + "ctatsplicing/arriba/test.cancer.introns", + "ctatsplicing/arriba/test.cancer.introns.prelim", + "ctatsplicing/arriba/test.cancer_intron_reads.sorted.bam", + "ctatsplicing/arriba/test.cancer_intron_reads.sorted.bam.bai", + "ctatsplicing/arriba/test.chckpts", + "ctatsplicing/arriba/test.ctat-splicing.igv.html", + "ctatsplicing/arriba/test.gene_reads.sorted.sifted.bam", + "ctatsplicing/arriba/test.gene_reads.sorted.sifted.bam.bai", + "ctatsplicing/arriba/test.igv.tracks", + "ctatsplicing/arriba/test.introns", + "ctatsplicing/arriba/test.introns.for_IGV.bed", + "ctatsplicing/starfusion", + "ctatsplicing/starfusion/test.cancer.introns", + "ctatsplicing/starfusion/test.cancer.introns.prelim", + "ctatsplicing/starfusion/test.cancer_intron_reads.sorted.bam", + "ctatsplicing/starfusion/test.cancer_intron_reads.sorted.bam.bai", + "ctatsplicing/starfusion/test.chckpts", + "ctatsplicing/starfusion/test.ctat-splicing.igv.html", + "ctatsplicing/starfusion/test.gene_reads.sorted.sifted.bam", + "ctatsplicing/starfusion/test.gene_reads.sorted.sifted.bam.bai", + "ctatsplicing/starfusion/test.igv.tracks", + "ctatsplicing/starfusion/test.introns", + "ctatsplicing/starfusion/test.introns.for_IGV.bed", + "fastqc", + "fastqc/test.html", + "fastqc/test.zip", + "fusioncatcher", + "fusioncatcher/human_v46", + "fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt", + "fusioncatcher/test.fusioncatcher.fusion-genes.txt", + "fusioncatcher/test.fusioncatcher.log", + "fusioncatcher/test.fusioncatcher.summary.txt", + "fusionreport", + "fusionreport/test", + "fusionreport/test/AAA_BBB.html", + "fusionreport/test/test.fusionreport.tsv", + "fusionreport/test/test.fusionreport_filtered.tsv", + "fusionreport/test/test.fusions.csv", + "fusionreport/test/test.fusions.json", + "fusionreport/test/test_fusionreport_index.html", + "gatk4", + "gatk4/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", + "get", + "get/rrna.bed", + "get/rrna.gtf", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "picard", + "picard/test.bai", + "picard/test.bam", + "picard/test.bam.metrics", + "picard/test.cram", + "picard/test.cram.crai", + "picard/test.rna_metrics", + "picard/test_collectinsertsize.pdf", + "picard/test_collectinsertsize.txt", + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "references", + "references/arriba", + "references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", + "references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", + "references/fusion_report_db", + "references/fusion_report_db/DB-timestamp.txt", + "references/fusion_report_db/cosmic.db", + "references/fusion_report_db/fusion_report.log", + "references/fusion_report_db/fusiongdb2.db", + "references/fusion_report_db/mitelman.db", + "references/gencode", + "references/gencode/Homo_sapiens.GRCh38.46.gtf", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.interval_list", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.refflat", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa.fai", + "references/gffread", + "references/gffread/Homo_sapiens.GRCh38.46.gtf.fasta", + "references/hgnc", + "references/hgnc/HGNC-DB-timestamp.txt", + "references/hgnc/hgnc_complete_set.txt", + "references/salmon", + "references/salmon/salmon", + "references/salmon/salmon/complete_ref_lens.bin", + "references/salmon/salmon/ctable.bin", + "references/salmon/salmon/ctg_offsets.bin", + "references/salmon/salmon/duplicate_clusters.tsv", + "references/salmon/salmon/info.json", + "references/salmon/salmon/mphf.bin", + "references/salmon/salmon/pos.bin", + "references/salmon/salmon/pre_indexing.log", + "references/salmon/salmon/rank.bin", + "references/salmon/salmon/refAccumLengths.bin", + "references/salmon/salmon/ref_indexing.log", + "references/salmon/salmon/reflengths.bin", + "references/salmon/salmon/refseq.bin", + "references/salmon/salmon/seq.bin", + "references/salmon/salmon/versionInfo.json", + "references/star", + "references/star/Genome", + "references/star/Log.out", + "references/star/SA", + "references/star/SAindex", + "references/star/chrLength.txt", + "references/star/chrName.txt", + "references/star/chrNameLength.txt", + "references/star/chrStart.txt", + "references/star/exonGeTrInfo.tab", + "references/star/exonInfo.tab", + "references/star/geneInfo.tab", + "references/star/genomeParameters.txt", + "references/star/sjdbInfo.txt", + "references/star/sjdbList.fromGTF.out.tab", + "references/star/sjdbList.out.tab", + "references/star/transcriptInfo.tab", + "salmon", + "salmon/test", + "salmon/test_lib_format_counts.json", + "salmon/test_meta_info.json", + "star_for_arriba", + "star_for_arriba/test.Aligned.sortedByCoord.out.bam", + "star_for_arriba/test.Aligned.unsort.out.bam", + "star_for_arriba/test.Chimeric.out.junction", + "star_for_arriba/test.Log.final.out", + "star_for_arriba/test.Log.out", + "star_for_arriba/test.Log.progress.out", + "star_for_arriba/test.ReadsPerGene.out.tab", + "star_for_arriba/test.SJ.out.tab", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_arriba/test.out.sam", + "star_for_arriba/test.sortedByCoord.out.bam", + "star_for_arriba/test.tab", + "star_for_arriba/test.toTranscriptome.out.bam", + "star_for_arriba/test.unmapped_1.fastq.gz", + "star_for_arriba/test.unmapped_2.fastq.gz", + "star_for_arriba/testXd.out.bam", + "star_for_starfusion", + "star_for_starfusion/test.Aligned.sortedByCoord.out.bam", + "star_for_starfusion/test.Aligned.unsort.out.bam", + "star_for_starfusion/test.Chimeric.out.junction", + "star_for_starfusion/test.Log.final.out", + "star_for_starfusion/test.Log.out", + "star_for_starfusion/test.Log.progress.out", + "star_for_starfusion/test.ReadsPerGene.out.tab", + "star_for_starfusion/test.SJ.out.tab", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_starfusion/test.out.sam", + "star_for_starfusion/test.sortedByCoord.out.bam", + "star_for_starfusion/test.sortedByCoord.out.bam.bai", + "star_for_starfusion/test.tab", + "star_for_starfusion/test.toTranscriptome.out.bam", + "star_for_starfusion/test.unmapped_1.fastq.gz", + "star_for_starfusion/test.unmapped_2.fastq.gz", + "star_for_starfusion/testXd.out.bam", + "starfusion", + "starfusion/ctat_genome_lib_build_dir", + "starfusion/ctat_genome_lib_build_dir/AnnotFilterRule.pm", + "starfusion/ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz", + "starfusion/ctat_genome_lib_build_dir/__chkpts", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.dat.gz", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.idx", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.gz", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.idx", + "starfusion/ctat_genome_lib_build_dir/pfam_domains.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cds", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed", + "starfusion/ctat_genome_lib_build_dir/ref_annot.pep", + "starfusion/ctat_genome_lib_build_dir/ref_annot.prot_info.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.fai", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.mm2", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ndb", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nhr", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nin", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.njs", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.not", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nsq", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ntf", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nto", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm", + "starfusion/ctat_genome_lib_build_dir/trans.blast.dat.gz", + "starfusion/test.starfusion.abridged.coding_effect.tsv", + "starfusion/test.starfusion.abridged.tsv", + "starfusion/test.starfusion.fusion_predictions.tsv", + "stringtie", + "stringtie/[:]", + "stringtie/[:]/stringtie.merged.gtf", + "stringtie/test.ballgown", + "stringtie/test.coverage.gtf", + "stringtie/test.gene.abundance.txt", + "stringtie/test.transcripts.gtf" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-20T14:00:19.311265" + }, + "stub test with fastp trim": { + "content": [ + 33, + { + "ARRIBA_ARRIBA": { + "arriba": "2.4.0" + }, + "ARRIBA_DOWNLOAD": { + "arriba_download": "2.4.0" + }, + "CTATSPLICING_STARTOCANCERINTRONS": { + "ctat-splicing": "0.0.2" + }, + "FASTP": { + "fastp": "0.23.4" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FASTQC_FOR_FASTP": { + "fastqc": "0.12.1" + }, + "FUSIONCATCHER": { + "fusioncatcher": 1.35 + }, + "FUSIONCATCHER_BUILD": { + "fusioncatcher": "fusioncatcher.py 1.35" + }, + "FUSIONREPORT": { + "fusion_report": "2.1.5" + }, + "FUSIONREPORT_DOWNLOAD": { + "fusion_report": "2.1.5" + }, + "GATK4_BEDTOINTERVALLIST": { + "gatk4": "4.6.1.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.5.0.0", + "samtools": "1.19.2" + }, + "GENCODE_DOWNLOAD": { + "wget": null + }, + "GET_RRNA_TRANSCRIPTS": { + "get_rrna_transcripts": "v1.0" + }, + "GFFREAD": { + "gffread": "0.12.7" + }, + "GTF_TO_REFFLAT": { + "gtfToGenePred": 377 + }, + "HGNC_DOWNLOAD": { + "wget": null + }, + "PICARD_COLLECTINSERTSIZEMETRICS": { + "picard": "3.3.0" + }, + "PICARD_COLLECTRNASEQMETRICS": { + "picard": "3.3.0" + }, + "SALMON_INDEX": { + "salmon": "1.10.3" + }, + "SALMON_QUANT": { + "salmon": "1.10.3" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "SAMTOOLS_INDEX_FOR_STARFUSION": { + "samtools": 1.21 + }, + "STARFUSION": { + "STAR-Fusion": "1.14.0" + }, + "STARFUSION_BUILD": { + "STAR-Fusion": "1.14.0" + }, + "STAR_FOR_ARRIBA": { + "star": "2.7.11b", + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_FOR_STARFUSION": { + "star": "2.7.11b", + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_GENOMEGENERATE": { + "star": "2.7.11b", + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STRINGTIE_MERGE": { + "stringtie": "2.2.1" + }, + "STRINGTIE_STRINGTIE": { + "stringtie": "2.2.3" + }, + "Workflow": { + "nf-core/rnafusion": "v4.0.0dev" + } + }, + [ + "arriba", + "arriba/test.arriba.fusions.discarded.tsv", + "arriba/test.arriba.fusions.tsv", + "ctatsplicing", + "ctatsplicing/arriba", + "ctatsplicing/arriba/test.cancer.introns", + "ctatsplicing/arriba/test.cancer.introns.prelim", + "ctatsplicing/arriba/test.cancer_intron_reads.sorted.bam", + "ctatsplicing/arriba/test.cancer_intron_reads.sorted.bam.bai", + "ctatsplicing/arriba/test.chckpts", + "ctatsplicing/arriba/test.ctat-splicing.igv.html", + "ctatsplicing/arriba/test.gene_reads.sorted.sifted.bam", + "ctatsplicing/arriba/test.gene_reads.sorted.sifted.bam.bai", + "ctatsplicing/arriba/test.igv.tracks", + "ctatsplicing/arriba/test.introns", + "ctatsplicing/arriba/test.introns.for_IGV.bed", + "ctatsplicing/starfusion", + "ctatsplicing/starfusion/test.cancer.introns", + "ctatsplicing/starfusion/test.cancer.introns.prelim", + "ctatsplicing/starfusion/test.cancer_intron_reads.sorted.bam", + "ctatsplicing/starfusion/test.cancer_intron_reads.sorted.bam.bai", + "ctatsplicing/starfusion/test.chckpts", + "ctatsplicing/starfusion/test.ctat-splicing.igv.html", + "ctatsplicing/starfusion/test.gene_reads.sorted.sifted.bam", + "ctatsplicing/starfusion/test.gene_reads.sorted.sifted.bam.bai", + "ctatsplicing/starfusion/test.igv.tracks", + "ctatsplicing/starfusion/test.introns", + "ctatsplicing/starfusion/test.introns.for_IGV.bed", + "fastp", + "fastp/test.fastp.html", + "fastp/test.fastp.json", + "fastp/test.fastp.log", + "fastp/test_1.fastp.fastq.gz", + "fastp/test_2.fastp.fastq.gz", + "fastqc", + "fastqc/test.html", + "fastqc/test.zip", + "fastqc_for_fastp", + "fastqc_for_fastp/test_trimmed.html", + "fastqc_for_fastp/test_trimmed.zip", + "fusioncatcher", + "fusioncatcher/human_v46", + "fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt", + "fusioncatcher/test.fusioncatcher.fusion-genes.txt", + "fusioncatcher/test.fusioncatcher.log", + "fusioncatcher/test.fusioncatcher.summary.txt", + "fusionreport", + "fusionreport/test", + "fusionreport/test/AAA_BBB.html", + "fusionreport/test/test.fusionreport.tsv", + "fusionreport/test/test.fusionreport_filtered.tsv", + "fusionreport/test/test.fusions.csv", + "fusionreport/test/test.fusions.json", + "fusionreport/test/test_fusionreport_index.html", + "gatk4", + "gatk4/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", + "get", + "get/rrna.bed", + "get/rrna.gtf", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "picard", + "picard/test.bai", + "picard/test.bam", + "picard/test.bam.metrics", + "picard/test.cram", + "picard/test.cram.crai", + "picard/test.rna_metrics", + "picard/test_collectinsertsize.pdf", + "picard/test_collectinsertsize.txt", + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "references", + "references/arriba", + "references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", + "references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", + "references/fusion_report_db", + "references/fusion_report_db/DB-timestamp.txt", + "references/fusion_report_db/cosmic.db", + "references/fusion_report_db/fusion_report.log", + "references/fusion_report_db/fusiongdb2.db", + "references/fusion_report_db/mitelman.db", + "references/gencode", + "references/gencode/Homo_sapiens.GRCh38.46.gtf", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.interval_list", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.refflat", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa.fai", + "references/gffread", + "references/gffread/Homo_sapiens.GRCh38.46.gtf.fasta", + "references/hgnc", + "references/hgnc/HGNC-DB-timestamp.txt", + "references/hgnc/hgnc_complete_set.txt", + "references/salmon", + "references/salmon/salmon", + "references/salmon/salmon/complete_ref_lens.bin", + "references/salmon/salmon/ctable.bin", + "references/salmon/salmon/ctg_offsets.bin", + "references/salmon/salmon/duplicate_clusters.tsv", + "references/salmon/salmon/info.json", + "references/salmon/salmon/mphf.bin", + "references/salmon/salmon/pos.bin", + "references/salmon/salmon/pre_indexing.log", + "references/salmon/salmon/rank.bin", + "references/salmon/salmon/refAccumLengths.bin", + "references/salmon/salmon/ref_indexing.log", + "references/salmon/salmon/reflengths.bin", + "references/salmon/salmon/refseq.bin", + "references/salmon/salmon/seq.bin", + "references/salmon/salmon/versionInfo.json", + "references/star", + "references/star/Genome", + "references/star/Log.out", + "references/star/SA", + "references/star/SAindex", + "references/star/chrLength.txt", + "references/star/chrName.txt", + "references/star/chrNameLength.txt", + "references/star/chrStart.txt", + "references/star/exonGeTrInfo.tab", + "references/star/exonInfo.tab", + "references/star/geneInfo.tab", + "references/star/genomeParameters.txt", + "references/star/sjdbInfo.txt", + "references/star/sjdbList.fromGTF.out.tab", + "references/star/sjdbList.out.tab", + "references/star/transcriptInfo.tab", + "salmon", + "salmon/test", + "salmon/test_lib_format_counts.json", + "salmon/test_meta_info.json", + "star_for_arriba", + "star_for_arriba/test.Aligned.sortedByCoord.out.bam", + "star_for_arriba/test.Aligned.unsort.out.bam", + "star_for_arriba/test.Chimeric.out.junction", + "star_for_arriba/test.Log.final.out", + "star_for_arriba/test.Log.out", + "star_for_arriba/test.Log.progress.out", + "star_for_arriba/test.ReadsPerGene.out.tab", + "star_for_arriba/test.SJ.out.tab", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_arriba/test.out.sam", + "star_for_arriba/test.sortedByCoord.out.bam", + "star_for_arriba/test.tab", + "star_for_arriba/test.toTranscriptome.out.bam", + "star_for_arriba/test.unmapped_1.fastq.gz", + "star_for_arriba/test.unmapped_2.fastq.gz", + "star_for_arriba/testXd.out.bam", + "star_for_starfusion", + "star_for_starfusion/test.Aligned.sortedByCoord.out.bam", + "star_for_starfusion/test.Aligned.unsort.out.bam", + "star_for_starfusion/test.Chimeric.out.junction", + "star_for_starfusion/test.Log.final.out", + "star_for_starfusion/test.Log.out", + "star_for_starfusion/test.Log.progress.out", + "star_for_starfusion/test.ReadsPerGene.out.tab", + "star_for_starfusion/test.SJ.out.tab", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_starfusion/test.out.sam", + "star_for_starfusion/test.sortedByCoord.out.bam", + "star_for_starfusion/test.sortedByCoord.out.bam.bai", + "star_for_starfusion/test.tab", + "star_for_starfusion/test.toTranscriptome.out.bam", + "star_for_starfusion/test.unmapped_1.fastq.gz", + "star_for_starfusion/test.unmapped_2.fastq.gz", + "star_for_starfusion/testXd.out.bam", + "starfusion", + "starfusion/ctat_genome_lib_build_dir", + "starfusion/ctat_genome_lib_build_dir/AnnotFilterRule.pm", + "starfusion/ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz", + "starfusion/ctat_genome_lib_build_dir/__chkpts", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.dat.gz", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.idx", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.gz", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.idx", + "starfusion/ctat_genome_lib_build_dir/pfam_domains.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cds", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed", + "starfusion/ctat_genome_lib_build_dir/ref_annot.pep", + "starfusion/ctat_genome_lib_build_dir/ref_annot.prot_info.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.fai", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.mm2", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ndb", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nhr", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nin", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.njs", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.not", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nsq", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ntf", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nto", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm", + "starfusion/ctat_genome_lib_build_dir/trans.blast.dat.gz", + "starfusion/test.starfusion.abridged.coding_effect.tsv", + "starfusion/test.starfusion.abridged.tsv", + "starfusion/test.starfusion.fusion_predictions.tsv", + "stringtie", + "stringtie/[:]", + "stringtie/[:]/stringtie.merged.gtf", + "stringtie/test.ballgown", + "stringtie/test.coverage.gtf", + "stringtie/test.gene.abundance.txt", + "stringtie/test.transcripts.gtf" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-20T13:57:10.179877" + } +} diff --git a/tools/arriba/Dockerfile b/tools/arriba/Dockerfile deleted file mode 100644 index 2acb0f46..00000000 --- a/tools/arriba/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -FROM nfcore/base - -LABEL authors="rickard.hammaren@scilifelab.se, phil.ewels@scilifelab.se, martin.proks@scilifelab.se" \ - description="Docker image containing all requirements for nfcore/rnafusion pipeline" - -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/arriba/bin:$PATH diff --git a/tools/arriba/environment.yml b/tools/arriba/environment.yml deleted file mode 100644 index 2228d674..00000000 --- a/tools/arriba/environment.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: arriba -channels: - - conda-forge - - bioconda -dependencies: - - arriba=1.1.0 - - bioconda::star=2.7.0f - - bioconda::samtools=1.9 - - conda-forge::r-circlize - - bioconda::bioconductor-genomicalignments - - bioconda::bioconductor-genomicranges \ No newline at end of file diff --git a/tools/build.sh b/tools/build.sh deleted file mode 100644 index 4c0f2577..00000000 --- a/tools/build.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash - -PREFIX="nfcore/rnafusion" - -create_container() { - TOOL_PATH=$1 - TOOL=$2 - VERSION="$(cat $TOOL_PATH/environment.yml | grep "$TOOL=" | cut -d"=" -f2)" - CONTAINER_NAME="$PREFIX:${TOOL}_v$VERSION" - echo "Building [$CONTAINER_NAME]" - docker build $TOOL_PATH -t $CONTAINER_NAME - docker push $CONTAINER_NAME -} - -if [ $# -eq 0 ]; then - echo "No tool name specified!" - echo "Run build.sh -h for help" - exit 1 -fi - -if [ $1 == "-h" ]; then - echo "Utility for building docker containers from tools/" - echo "Usage: build.sh [options]" - echo - echo "Options:" - echo " all build all tools including main image" - echo " builds specific tool" - echo " Example: sh build.sh ericscript" - exit 0 -fi - -if [ $1 == "all" ]; then - for TOOL in */; do - create_container `pwd`/$TOOL ${TOOL%?} - done - # Build main cotainer - VERSION="$(cat ../nextflow.config | grep "container" | cut -d":" -f2 | cut -d "'" -f1)" - CONTAINER_NAME=$PREFIX:$VERSION - echo "Building [$CONTAINER_NAME]" - docker build ../. -t $CONTAINER_NAME -else - TOOL=$1 - TOOL_PATH="$(pwd)/$TOOL" - if [ ! -d $TOOL_PATH ]; then - echo "The tool doesn't exist" - exit 1 - else - create_container $TOOL_PATH $TOOL - fi -fi diff --git a/tools/ericscript/Dockerfile b/tools/ericscript/Dockerfile deleted file mode 100644 index c2bd562b..00000000 --- a/tools/ericscript/Dockerfile +++ /dev/null @@ -1,12 +0,0 @@ -FROM nfcore/base - -LABEL authors="rickard.hammaren@scilifelab.se, phil.ewels@scilifelab.se, martin.proks@scilifelab.se" \ - description="Docker image containing all requirements for nfcore/rnafusion pipeline" - -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/ericscript/bin:$PATH - -RUN ln -s /lib/x86_64-linux-gnu/libreadline.so.7.0 /lib/x86_64-linux-gnu/libreadline.so.6 -RUN sed -i 's/system("R.*CheckDB.R");/#&/' /opt/conda/envs/ericscript/share/ericscript-0.5.5-3/ericscript.pl -RUN echo 1 > /opt/conda/envs/ericscript/share/ericscript-0.5.5-3/lib/data/_resources/.flag.dbexists \ No newline at end of file diff --git a/tools/ericscript/environment.yml b/tools/ericscript/environment.yml deleted file mode 100644 index b97a101a..00000000 --- a/tools/ericscript/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: ericscript -channels: - - bioconda -dependencies: - - ericscript=0.5.5 diff --git a/tools/fusion-inspector/Dockerfile b/tools/fusion-inspector/Dockerfile deleted file mode 100644 index bd6868c9..00000000 --- a/tools/fusion-inspector/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM nfcore/base - -LABEL authors="rickard.hammaren@scilifelab.se, phil.ewels@scilifelab.se, martin.proks@scilifelab.se" \ - description="Docker image containing all requirements for nfcore/rnafusion pipeline" - -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/fusion-inspector/bin:$PATH - -RUN ln -s /lib/x86_64-linux-gnu/libcrypt.so.1 /lib/x86_64-linux-gnu/libcrypto.so.1.0.0 \ No newline at end of file diff --git a/tools/fusion-inspector/environment.yml b/tools/fusion-inspector/environment.yml deleted file mode 100644 index e822d623..00000000 --- a/tools/fusion-inspector/environment.yml +++ /dev/null @@ -1,6 +0,0 @@ -name: fusion-inspector -channels: - - bioconda -dependencies: - - bioconda::star=2.6.1b - - fusion-inspector=1.3.1 diff --git a/tools/fusioncatcher/Dockerfile b/tools/fusioncatcher/Dockerfile deleted file mode 100644 index 7cc59290..00000000 --- a/tools/fusioncatcher/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -FROM nfcore/base - -LABEL authors="rickard.hammaren@scilifelab.se, phil.ewels@scilifelab.se, martin.proks@scilifelab.se" \ - description="Docker image containing all requirements for nfcore/rnafusion pipeline" - -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/fusioncatcher/bin:$PATH diff --git a/tools/pizzly/Dockerfile b/tools/pizzly/Dockerfile deleted file mode 100644 index af6f7f8a..00000000 --- a/tools/pizzly/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -FROM nfcore/base - -LABEL authors="rickard.hammaren@scilifelab.se, phil.ewels@scilifelab.se, martin.proks@scilifelab.se" \ - description="Docker image containing all requirements for nfcore/rnafusion pipeline" - -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/pizzly/bin:$PATH diff --git a/tools/pizzly/environment.yml b/tools/pizzly/environment.yml deleted file mode 100644 index 47a88cd8..00000000 --- a/tools/pizzly/environment.yml +++ /dev/null @@ -1,6 +0,0 @@ -name: pizzly -channels: - - bioconda -dependencies: - - pizzly=0.37.3 - - kallisto=0.44.0 diff --git a/tools/squid/Dockerfile b/tools/squid/Dockerfile deleted file mode 100644 index 2b0ed4ff..00000000 --- a/tools/squid/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM nfcore/base - -LABEL authors="rickard.hammaren@scilifelab.se, phil.ewels@scilifelab.se, martin.proks@scilifelab.se" \ - description="Docker image containing all requirements for nfcore/rnafusion pipeline" - -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/squid/bin:$PATH - -RUN wget https://raw.githubusercontent.com/Kingsford-Group/squid/master/utils/AnnotateSQUIDOutput.py -O /AnnotateSQUIDOutput.py \ - && chmod +x /AnnotateSQUIDOutput.py \ - && ln -s /AnnotateSQUIDOutput.py /usr/local/bin \ - && ln -s /opt/conda/envs/squid/bin/python3 /bin/python - -RUN ln -s /lib/x86_64-linux-gnu/libcrypt.so.1 /lib/x86_64-linux-gnu/libcrypto.so.1.0.0 diff --git a/tools/squid/environment.yml b/tools/squid/environment.yml deleted file mode 100644 index 9e3fdd2a..00000000 --- a/tools/squid/environment.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: squid -channels: - - bioconda - - conda-forge -dependencies: - - squid=1.5 - - bioconda::star=2.7.0f - - bioconda::samtools=1.9 - - conda-forge::python=3.7.1 - - conda-forge::numpy=1.15.4 diff --git a/tools/star-fusion/Dockerfile b/tools/star-fusion/Dockerfile deleted file mode 100644 index 862efe0f..00000000 --- a/tools/star-fusion/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM nfcore/base - -LABEL authors="rickard.hammaren@scilifelab.se, phil.ewels@scilifelab.se, martin.proks@scilifelab.se" \ - description="Docker image containing all requirements for nfcore/rnafusion pipeline" - -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a - -ENV PATH /opt/conda/envs/star-fusion/bin:$PATH \ No newline at end of file diff --git a/tools/star-fusion/environment.yml b/tools/star-fusion/environment.yml deleted file mode 100644 index 49695d4d..00000000 --- a/tools/star-fusion/environment.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: star-fusion -channels: - - bioconda - - conda-forge -dependencies: - - bioconda::star=2.7.0f - - bioconda::star-fusion=1.6.0 - - bioconda::trinity=2.6.6 - - conda-forge::perl-carp-assert \ No newline at end of file diff --git a/tower.yml b/tower.yml new file mode 100644 index 00000000..2edf5a7f --- /dev/null +++ b/tower.yml @@ -0,0 +1,31 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + "**/arriba/*.arriba.fusions.tsv": + display: "Arriba identified fusion TSV report" + "**/arriba_visualisation/*_combined_fusions_arriba_visualisation.pdf": + display: "PDF visualisation of the transcripts involved in predicted fusions" + "**/fastp/*fastp.html": + display: "Post fastp trimming HTML report" + "**/fusioncatcher/*.fusioncatcher.fusion-genes.txt": + display: "FusionCatcher identified fusion TXT report" + "**/fusioninspector/*.FusionInspector.fusions.abridged.tsv": + display: "FusionInspector TSV report" + "**/fusionreport/*/*_fusionreport_index.html": + display: "Fusion-report HTML report" + "**/vcf/*_fusion_data.vcf.gz": + display: "Collected statistics on each fusion fed to FusionInspector in VCF format" + "**/picard/*.MarkDuplicates.metrics.txt": + display: "Picard: Metrics from CollectRnaMetrics" + "**/picard/*_rna_metrics.txt": + display: "GATK4: Metrics from MarkDuplicates" + "**/picard/*insert*size*metrics.txt": + display: "GATK4: Metrics from InsertSizeMetrics" + "**/picard/*pdf": + display: "GATK4: InsertSizeMetrics histogram" + "**/star_for_starfusion/*ReadsPerGene.out.tab": + display: "Number of reads per gene" + "**/starfusion/*.starfusion.fusion_predictions.tsv": + display: "STAR-Fusion identified fusion TSV report" + "**/stringtie/*/*stringtie.merged.gtf": + display: "Merged GTFs from StringTie with annotations" diff --git a/utils/download-singularity-img.sh b/utils/download-singularity-img.sh deleted file mode 100644 index 3d1676b4..00000000 --- a/utils/download-singularity-img.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash - -get_tool_version() { - echo $(cat ../nextflow.config | grep "$1_version" | cut -d"=" -f2 | tr -d \' | tr -d ' ') -} - -if [ $# -eq 0 ]; then - echo "No output path specified" - echo "Run download-singularity-img.sh -h for help" - exit 1 -fi - -if [ $1 == "-h" ]; then - echo "Utility for downloading singularity images from DockerHub" - echo - echo "Usage: download-singularity-img.sh [output directory]" - echo "Example: sh download-singularity-img.sh /path" - echo - exit 0 -elif [ -d "$1" ]; then - ARRIBA=$(get_tool_version "arriba") - ERICSCRIPT=$(get_tool_version "ericscript") - FUSIONCATCHER=$(get_tool_version "fusioncatcher") - FUSION_INSPECTOR=$(get_tool_version "fusion_inspector") - PIZZLY=$(get_tool_version "pizzly") - RNAFUSION="$(cat ../nextflow.config | grep "container" | cut -d":" -f2 | cut -d "'" -f1)" - SQUID=$(get_tool_version "squid") - STAR_FUSION=$(get_tool_version "star_fusion") - - cwd=$1/rnafusion_containers_v$RNAFUSION - echo "Creating folder $cwd" - mkdir $cwd && cd $cwd - echo "Pulling images ..." - - singularity pull --name "rnafusion_arriba_v${ARRIBA}.img" docker://nfcore/rnafusion:arriba_v${ARRIBA} - singularity pull --name "rnafusion_ericscript_v${ERICSCRIPT}.img" docker://nfcore/rnafusion:ericscript_v${ERICSCRIPT} - singularity pull --name "rnafusion_fusioncatcher_v${FUSIONCATCHER}.img" docker://nfcore/rnafusion:fusioncatcher_v${FUSIONCATCHER} - singularity pull --name "rnafusion_fusion-inspector_v${FUSION_INSPECTOR}.img" docker://nfcore/rnafusion:fusion-inspector_v${FUSION_INSPECTOR} - singularity pull --name "rnafusion_pizzly_v${PIZZLY}.img" docker://nfcore/rnafusion:pizzly_v${PIZZLY} - singularity pull --name "rnafusion_v${RNAFUSION}.img" docker://nfcore/rnafusion:${RNAFUSION} - singularity pull --name "rnafusion_squid_v${SQUID}.img" docker://nfcore/rnafusion:squid_v${SQUID} - singularity pull --name "rnafusion_star-fusion_v${STAR_FUSION}.img" docker://nfcore/rnafusion:star-fusion_v${STAR_FUSION} - -else - echo "Path doesn't exists" - exit 1 -fi diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf new file mode 100644 index 00000000..857827aa --- /dev/null +++ b/workflows/rnafusion.nf @@ -0,0 +1,245 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { BUILD_REFERENCES } from '../subworkflows/local/build_references' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { TRIM_WORKFLOW } from '../subworkflows/local/trim_workflow/main' +include { ARRIBA_WORKFLOW } from '../subworkflows/local/arriba_workflow' +include { QC_WORKFLOW } from '../subworkflows/local/qc_workflow' +include { STARFUSION_WORKFLOW } from '../subworkflows/local/starfusion_workflow' +include { STRINGTIE_WORKFLOW } from '../subworkflows/local/stringtie_workflow/main' +include { FUSIONCATCHER_WORKFLOW } from '../subworkflows/local/fusioncatcher_workflow' +include { FUSIONINSPECTOR_WORKFLOW } from '../subworkflows/local/fusioninspector_workflow' +include { FUSIONREPORT_WORKFLOW } from '../subworkflows/local/fusionreport_workflow' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { SALMON_QUANT } from '../modules/nf-core/salmon/quant/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_rnafusion_pipeline' +include { validateInputSamplesheet } from '../subworkflows/local/utils_nfcore_rnafusion_pipeline' + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow RNAFUSION { + + + take: + ch_samplesheet // channel: samplesheet read in from --input + + main: + + def ch_versions = Channel.empty() + def ch_multiqc_files = Channel.empty() + + // + // Create references if necessary + // + + BUILD_REFERENCES() + ch_versions = ch_versions.mix(BUILD_REFERENCES.out.versions) + + + // + // QC from FASTQ files + // + FASTQC ( + ch_samplesheet + ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + ch_versions = ch_versions.mix(FASTQC.out.versions) + + + // + // Trimming + // + TRIM_WORKFLOW ( + ch_samplesheet, + Channel.value(params.adapter_fasta), + params.fastp_trim + ) + ch_reads = TRIM_WORKFLOW.out.ch_reads_all + ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) + + SALMON_QUANT( ch_reads, BUILD_REFERENCES.out.ch_salmon_index.map{ it -> it[1] }, BUILD_REFERENCES.out.ch_gtf.map{ it -> it[1] }, [], false, 'A') + ch_multiqc_files = ch_multiqc_files.mix(SALMON_QUANT.out.json_info.collect{it[1]}) + ch_versions = ch_versions.mix(SALMON_QUANT.out.versions) + + + // + // SUBWORKFLOW: Run STAR alignment and Arriba + // + + // TODO: add params.seq_platform and pass it as argument to arriba_workflow + // TODO: improve how params.arriba_fusions would avoid running arriba module. Maybe imputed from samplesheet? + // TODO: same as above, but with ch_arriba_fusion_fail. It's currently replaces by a dummy file + + ARRIBA_WORKFLOW ( + ch_reads, + BUILD_REFERENCES.out.ch_gtf, + BUILD_REFERENCES.out.ch_fasta, + BUILD_REFERENCES.out.ch_starindex_ref, + BUILD_REFERENCES.out.ch_arriba_ref_blacklist, + BUILD_REFERENCES.out.ch_arriba_ref_cytobands, + BUILD_REFERENCES.out.ch_arriba_ref_known_fusions, + BUILD_REFERENCES.out.ch_arriba_ref_protein_domains, + BUILD_REFERENCES.out.ch_starfusion_ref, + params.arriba, // boolean + params.all, // boolean + params.fusioninspector_only, // boolean + params.star_ignore_sjdbgtf, // boolean + params.ctatsplicing, // boolean + params.seq_center ?: '', // string + params.arriba_fusions, // path + params.cram // array + ) + ch_versions = ch_versions.mix(ARRIBA_WORKFLOW.out.versions) + + + //Run STAR fusion + STARFUSION_WORKFLOW ( + ch_reads, + BUILD_REFERENCES.out.ch_gtf, + BUILD_REFERENCES.out.ch_starindex_ref, + BUILD_REFERENCES.out.ch_fasta, + BUILD_REFERENCES.out.ch_starfusion_ref + ) + ch_versions = ch_versions.mix(STARFUSION_WORKFLOW.out.versions) + + + //Run fusioncatcher + FUSIONCATCHER_WORKFLOW ( + ch_reads + ) + ch_versions = ch_versions.mix(FUSIONCATCHER_WORKFLOW.out.versions) + + + //Run stringtie + STRINGTIE_WORKFLOW ( + STARFUSION_WORKFLOW.out.ch_bam_sorted, + BUILD_REFERENCES.out.ch_gtf + ) + ch_versions = ch_versions.mix(STRINGTIE_WORKFLOW.out.versions) + + + //Run fusion-report + FUSIONREPORT_WORKFLOW ( + ch_reads, + BUILD_REFERENCES.out.ch_fusionreport_ref, + ARRIBA_WORKFLOW.out.fusions, + STARFUSION_WORKFLOW.out.fusions, + FUSIONCATCHER_WORKFLOW.out.fusions + ) + ch_versions = ch_versions.mix(FUSIONREPORT_WORKFLOW.out.versions) + + //Run fusionInpector + FUSIONINSPECTOR_WORKFLOW ( + ch_reads, + FUSIONREPORT_WORKFLOW.out.fusion_list, + FUSIONREPORT_WORKFLOW.out.fusion_list_filtered, + FUSIONREPORT_WORKFLOW.out.report, + FUSIONREPORT_WORKFLOW.out.csv, + STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, + BUILD_REFERENCES.out.ch_gtf, + BUILD_REFERENCES.out.ch_arriba_ref_protein_domains, + BUILD_REFERENCES.out.ch_arriba_ref_cytobands, + BUILD_REFERENCES.out.ch_hgnc_ref, + BUILD_REFERENCES.out.ch_hgnc_date + ) + ch_versions = ch_versions.mix(FUSIONINSPECTOR_WORKFLOW.out.versions) + + + //QC + QC_WORKFLOW ( + STARFUSION_WORKFLOW.out.ch_bam_sorted, + BUILD_REFERENCES.out.ch_refflat, + BUILD_REFERENCES.out.ch_fasta, + BUILD_REFERENCES.out.ch_fai, + BUILD_REFERENCES.out.ch_rrna_interval + ) + ch_versions = ch_versions.mix(QC_WORKFLOW.out.versions) + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_pipeline_software_mqc_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } + + + // + // MODULE: MultiQC + // + ch_multiqc_config = Channel.fromPath( + "$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? + Channel.fromPath(params.multiqc_config, checkIfExists: true) : + Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.empty() + + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true + ) + ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastp_html.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastp_json.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastqc_trimmed.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_stats.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_gene_count.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.rnaseq_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.duplicate_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.insertsize_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FUSIONINSPECTOR_WORKFLOW.out.ch_arriba_visualisation.collect{it[1]}.ifEmpty([])) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), + [], + [] + ) + + + + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] + +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/