diff --git a/.circleci/config.yml b/.circleci/config.yml index b5f780c..1e8d627 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -17,15 +17,15 @@ jobs: - checkout - restore_cache: keys: - - v1-nextflow-env-{{ .Branch }}-{{ checksum "test-environment.yml" }} + - v1-nextflow-env-{{ .Branch }}-{{ checksum "tests-environment.yml" }} - restore_cache: keys: - v1-job-env-{{ .Branch }}-{{ checksum "task-environment.yml" }} - run: name: "run integration tests" - command: "test/test1.sh test-results/ mamba nextflowEnv/" + command: "integration-tests/run.sh mamba tests-results/ nextflowEnv/" - save_cache: - key: v1-nextflow-env-{{ .Branch }}-{{ checksum "test-environment.yml" }} + key: v1-nextflow-env-{{ .Branch }}-{{ checksum "tests-environment.yml" }} paths: - nextflowEnv/ - save_cache: @@ -38,4 +38,4 @@ jobs: workflows: test-workflow: jobs: - - integration-test + - integration-tests diff --git a/.dockerignore b/.dockerignore index 5b8b7a5..89eaadd 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,5 +9,10 @@ work .circleci cache .git +.github .gitignore -nextflowEnv +*Env/ +integration-tests/ +.run/ +*.log +*Env/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 9e609fe..cc9e385 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ cache/singularity/* *~ *.sif .git +.run/ +*Env/ diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 32178c5..0000000 --- a/Dockerfile +++ /dev/null @@ -1,53 +0,0 @@ -FROM continuumio/miniconda3:4.10.3 - -LABEL maintainer="Philip R. Kensche " - -# Capitalized versions for many tools. Minuscle version at least for apt. -ARG HTTP_PROXY="" -ARG http_proxy="$HTTP_PROXY" -ARG HTTPS_PROXY="" -ARG https_proxy="$HTTPS_PROXY" -ARG NO_PROXY="" -ARG no_proxy="$NO_PROXY" - -# Setup base conda container with bash as default shell. -SHELL ["/bin/bash", "-c"] -RUN conda init bash - -# Add nf-bam2fastq requirements. -LABEL org.opencontainers.image.source="https://github.com/dkfz-odcf/nf-bam2fastq" -COPY task-environment.yml ./ -RUN conda config --set proxy_servers.http "$HTTP_PROXY" && \ - conda config --set proxy_servers.https "$HTTPS_PROXY" && \ - conda env create -n nf-bam2fastq -f task-environment.yml && \ - source activate nf-bam2fastq && \ - conda clean --all -f -y - -# ps is needed by Nextflow for collecting runtime information from the container -RUN apt update && \ - apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* && \ - apt clean - -# For login Bash /etc/profile and ~/.profile is sourced. /etc/profile sources /etc/bash.bashrc. -# For non-login, interactive Bash /etc/bash.bashrc is sourced directly. -# For non-login, non-interactive Bash. We set BASH_ENV/ENV to /etc/bash.bashrc -# NOTE: ~/.bashrc could not be used, because when using it, ~/ is /root/. -# Therefore /etc/bash.bashrc is used to use conda for all user IDs. -# NOTE: Conda should be fully available in non-login, interactive shell. Conda itself creates -# /etc/profile.d/conda.sh. The code that `conda init bash` writes to ~/.bashrc is moved -# to /etc/bash.bashrc and reads the /etc/profile.d/conda.sh. -ENV BASH_ENV /etc/container.bashrc -ENV ENV /etc/container.bashrc - -RUN grep "managed by 'conda init'" -A 100 ~/.bashrc >> /etc/container.bashrc && \ - rm ~/.bashrc && \ - echo -e '\ -set +u\n\ -source activate nf-bam2fastq\n\ -set -u\n\' >> /etc/container.bashrc && \ - echo "source /etc/profile" > ~/.profile && \ - cp ~/.profile /.profile && \ - echo "source /etc/container.bashrc" >> /etc/bash.bashrc - -ENTRYPOINT ["bash", "-i", "-c"] diff --git a/README.md b/README.md index 825a3d2..74a65af 100644 --- a/README.md +++ b/README.md @@ -4,62 +4,66 @@ Convert BAM files back to FASTQ. -## Quickstart with Conda +## Quickstart with Docker -We do not recommend Conda for running the workflow. It may happen that packages are not available in any channels anymore and that the environment is broken. For reproducible research, please use containers. +Dependent on the version of the workflow that you want to run it might not be possible to re-build the Conda environment. Therefore, to guarantee reproducibility we create [container images](https://github.com/orgs/DKFZ-ODCF/packages) of the task environment. -Provided you have a working [Conda](https://docs.conda.io/en/latest/) installation, you can run the workflow with +For instance, if you want to run the workflow locally with Docker you can do e.g. ```bash -mkdir test_out/ nextflow run main.nf \ - -profile local,conda \ + -profile local,docker \ -ansi-log \ - --input=/path/to/your.bam \ + --input=integration-tests/test1_paired.bam,integration-tests/test1_unpaired.bam \ --outputDir=test_out \ - --sortFastqs=false + --sortFastqs=true ``` -For each BAM file in the comma-separated `--input` parameter, one directory with FASTQs is created in the `outputDir`. With the `local` profile the processing jobs will be executed locally. The `conda` profile will let Nextflow create a Conda environment from the `task-environment.yml` file. By default, the conda environment will be created in the source directory of the workflow (see [nextflow.config](https://github.com/DKFZ-ODCF/nf-bam2fastq/blob/master/nextflow.config)). +## Quickstart with Singularity -## Quickstart with Docker +In your cluster, you may not have access to Docker. In this situation you can use [Singularity](https://singularity.lbl.gov/), if it is installed in your cluster. Note that unfortunately, Nextflow will fail to convert the Docker image into a Singularity image, unless Docker is available. But you can get the Singularity image yourself: -Dependent on the version of the workflow that you want to run it might not be possible to re-build the Conda environment. Therefore, to guarantee reproducibility we create [container images](https://github.com/orgs/DKFZ-ODCF/packages) of the task environment. +You can run the workflow with the "singularity" profile, e.g. on an LSF cluster: -For instance, if you want to run the workflow locally with Docker you can do e.g. +```bash +nextflow run $repoDir/main.nf \ + -profile lsf,singularity \ + --input=$repoDir/integration-tests/test1_paired.bam,$repoDir/integration-tests/test1_unpaired.bam \ + --outputDir=test_out \ + --sortFastqs=true +``` + +Nextflow will automatically pull the Docker image, convert it into a Singularity image, put it at `$repoDir/cache/singularity/ghcr.io-dkfz-odcf-nf-bam2fastq-$containerVersion.img`, and then run the workflow. + +> WARNING: Downloading the cached container is probably *not* concurrency-safe. If you run multiple workflows at the same time, all of them trying to cache the Singularity container, you will probably end up with a mess. In that case, download the container manually with following command to pull the container: +> ```bash +> containerVersion=1.3.0 +> repoDir=/path/to/nf-bam2fastq +> +> singularity build \ +> "$repoDir/cache/singularity/ghcr.io-dkfz-odcf-nf-bam2fastq-$containerVersion.img" \ +> "docker://ghcr.io/dkfz-odcf/nf-bam2fastq:$containerVersion" +> ``` + +## Quickstart with Conda + +> NOTE: Conda is a decent tool for building containers, although these containers tend to be rather big. However, we do *not* recommend you use Conda for reproducibly running workflows. The Conda solution proposed here really is mostly for development. We will not give support for this. + +We do not recommend Conda for running the workflow. It may happen that packages are not available in any channels anymore and that the environment is broken. For reproducible research, please use containers. + +Provided you have a working [Conda](https://docs.conda.io/en/latest/) installation, you can run the workflow with ```bash +mkdir test_out/ nextflow run main.nf \ - -profile local,docker \ - -ansi-log \ - --input=test/test1_paired.bam,test/test1_unpaired.bam \ + -profile local,conda \ + --input=/path/to/your.bam \ --outputDir=test_out \ - --sortFastqs=true + --sortFastqs=false ``` -## Quickstart with Singularity +For each BAM file in the comma-separated `--input` parameter, one directory with FASTQs is created in the `outputDir`. With the `local` profile the processing jobs will be executed locally. The `conda` profile will let Nextflow create a Conda environment from the `task-environment.yml` file. By default, the conda environment will be created in the source directory of the workflow (see [nextflow.config](https://github.com/DKFZ-ODCF/nf-bam2fastq/blob/master/nextflow.config)). -In your cluster, you may not have access to Docker. In this situation you can use [Singularity](https://singularity.lbl.gov/), if it is installed in your cluster. Note that unfortunately, Nextflow will fail to convert the Docker image into a Singularity image, unless Docker is available. But you can get the Singularity image yourself: - -1. Create a Singularity image from the public Docker container - ```bash - version=1.0.0 - repoDir=/path/to/nf-bam2fastq - - singularity build \ - "$repoDir/cache/singularity/nf-bam2fastq_$version.sif" \ - "docker://ghcr.io/dkfz-odcf/nf-bam2fastq:$version" - ``` - Note that the location and name of the Singularity image is configured in the `nextflow.config`. -3. Now, you can run the workflow with the "singularity" profile, e.g. on an LSF cluster: - ```bash - nextflow run /path/to/nf-bam2fastq/main.nf \ - -profile lsf,singularity \ - -ansi-log \ - --input=test/test1_paired.bam,test/test1_unpaired.bam \ - --outputDir=test_out \ - --sortFastqs=true - ``` ## Remarks @@ -108,6 +112,150 @@ These files are all always produced, independent of whether your data is actuall Note that Nextflow creates the `work/` directory, the `.nextflow/` directory, and the `.nextflow.log*` files in the directory in which it is executed. +#### Example + +For instance, the output for the two test BAMs in the `integration-tests/reference/` directory would look as follows. Note that these files contain multiple read groups: + +```bash +$ samtools view -H | grep -P '^@RG' +@RG ID:run4_gerald_D1VCPACXX_4 LB:tumor_gms PL:ILLUMINA SM:sample_tumor_gms +@RG ID:run5_gerald_D1VCPACXX_5 LB:tumor_gms PL:ILLUMINA SM:sample_tumor_gms +@RG ID:run1_gerald_D1VCPACXX_1 LB:tumor_gms PL:ILLUMINA SM:sample_tumor_gms +@RG ID:run3_gerald_D1VCPACXX_3 LB:tumor_gms PL:ILLUMINA SM:sample_tumor_gms +@RG ID:run2_gerald_D1VCPACXX_2 LB:tumor_gms PL:ILLUMINA SM:sample_tumor_gms +``` + +Consequently, there will be a lot of output files: + +```bash +test1_paired.bam +test1_paired.bam_fastqs/ +├── default_R1.fastq.gz +├── default_R2.fastq.gz +├── default_S.fastq.gz +├── default_U1.fastq.gz +├── default_U2.fastq.gz +├── run1_gerald_D1VCPACXX_1_R1.fastq.gz +├── run1_gerald_D1VCPACXX_1_R2.fastq.gz +├── run1_gerald_D1VCPACXX_1_S.fastq.gz +├── run1_gerald_D1VCPACXX_1_U1.fastq.gz +├── run1_gerald_D1VCPACXX_1_U2.fastq.gz +├── run2_gerald_D1VCPACXX_2_R1.fastq.gz +├── run2_gerald_D1VCPACXX_2_R2.fastq.gz +├── run2_gerald_D1VCPACXX_2_S.fastq.gz +├── run2_gerald_D1VCPACXX_2_U1.fastq.gz +├── run2_gerald_D1VCPACXX_2_U2.fastq.gz +├── run3_gerald_D1VCPACXX_3_R1.fastq.gz +├── run3_gerald_D1VCPACXX_3_R2.fastq.gz +├── run3_gerald_D1VCPACXX_3_S.fastq.gz +├── run3_gerald_D1VCPACXX_3_U1.fastq.gz +├── run3_gerald_D1VCPACXX_3_U2.fastq.gz +├── run4_gerald_D1VCPACXX_4_R1.fastq.gz +├── run4_gerald_D1VCPACXX_4_R2.fastq.gz +├── run4_gerald_D1VCPACXX_4_S.fastq.gz +├── run4_gerald_D1VCPACXX_4_U1.fastq.gz +├── run4_gerald_D1VCPACXX_4_U2.fastq.gz +├── run5_gerald_D1VCPACXX_5_R1.fastq.gz +├── run5_gerald_D1VCPACXX_5_R2.fastq.gz +├── run5_gerald_D1VCPACXX_5_S.fastq.gz +├── run5_gerald_D1VCPACXX_5_U1.fastq.gz +└── run5_gerald_D1VCPACXX_5_U2.fastq.gz +test1_paired.bam_sorted_fastqs/ +├── default_R1.sorted.fastq.gz +├── default_R2.sorted.fastq.gz +├── default_S.sorted.fastq.gz +├── default_U1.sorted.fastq.gz +├── default_U2.sorted.fastq.gz +├── run1_gerald_D1VCPACXX_1_R1.sorted.fastq.gz +├── run1_gerald_D1VCPACXX_1_R2.sorted.fastq.gz +├── run1_gerald_D1VCPACXX_1_S.sorted.fastq.gz +├── run1_gerald_D1VCPACXX_1_U1.sorted.fastq.gz +├── run1_gerald_D1VCPACXX_1_U2.sorted.fastq.gz +├── run2_gerald_D1VCPACXX_2_R1.sorted.fastq.gz +├── run2_gerald_D1VCPACXX_2_R2.sorted.fastq.gz +├── run2_gerald_D1VCPACXX_2_S.sorted.fastq.gz +├── run2_gerald_D1VCPACXX_2_U1.sorted.fastq.gz +├── run2_gerald_D1VCPACXX_2_U2.sorted.fastq.gz +├── run3_gerald_D1VCPACXX_3_R1.sorted.fastq.gz +├── run3_gerald_D1VCPACXX_3_R2.sorted.fastq.gz +├── run3_gerald_D1VCPACXX_3_S.sorted.fastq.gz +├── run3_gerald_D1VCPACXX_3_U1.sorted.fastq.gz +├── run3_gerald_D1VCPACXX_3_U2.sorted.fastq.gz +├── run4_gerald_D1VCPACXX_4_R1.sorted.fastq.gz +├── run4_gerald_D1VCPACXX_4_R2.sorted.fastq.gz +├── run4_gerald_D1VCPACXX_4_S.sorted.fastq.gz +├── run4_gerald_D1VCPACXX_4_U1.sorted.fastq.gz +├── run4_gerald_D1VCPACXX_4_U2.sorted.fastq.gz +├── run5_gerald_D1VCPACXX_5_R1.sorted.fastq.gz +├── run5_gerald_D1VCPACXX_5_R2.sorted.fastq.gz +├── run5_gerald_D1VCPACXX_5_S.sorted.fastq.gz +├── run5_gerald_D1VCPACXX_5_U1.sorted.fastq.gz +└── run5_gerald_D1VCPACXX_5_U2.sorted.fastq.gz +test1_unpaired.bam +test1_unpaired.bam_fastqs/ +├── default_R1.fastq.gz +├── default_R2.fastq.gz +├── default_S.fastq.gz +├── default_U1.fastq.gz +├── default_U2.fastq.gz +├── run1_gerald_D1VCPACXX_1_R1.fastq.gz +├── run1_gerald_D1VCPACXX_1_R2.fastq.gz +├── run1_gerald_D1VCPACXX_1_S.fastq.gz +├── run1_gerald_D1VCPACXX_1_U1.fastq.gz +├── run1_gerald_D1VCPACXX_1_U2.fastq.gz +├── run2_gerald_D1VCPACXX_2_R1.fastq.gz +├── run2_gerald_D1VCPACXX_2_R2.fastq.gz +├── run2_gerald_D1VCPACXX_2_S.fastq.gz +├── run2_gerald_D1VCPACXX_2_U1.fastq.gz +├── run2_gerald_D1VCPACXX_2_U2.fastq.gz +├── run3_gerald_D1VCPACXX_3_R1.fastq.gz +├── run3_gerald_D1VCPACXX_3_R2.fastq.gz +├── run3_gerald_D1VCPACXX_3_S.fastq.gz +├── run3_gerald_D1VCPACXX_3_U1.fastq.gz +├── run3_gerald_D1VCPACXX_3_U2.fastq.gz +├── run4_gerald_D1VCPACXX_4_R1.fastq.gz +├── run4_gerald_D1VCPACXX_4_R2.fastq.gz +├── run4_gerald_D1VCPACXX_4_S.fastq.gz +├── run4_gerald_D1VCPACXX_4_U1.fastq.gz +├── run4_gerald_D1VCPACXX_4_U2.fastq.gz +├── run5_gerald_D1VCPACXX_5_R1.fastq.gz +├── run5_gerald_D1VCPACXX_5_R2.fastq.gz +├── run5_gerald_D1VCPACXX_5_S.fastq.gz +├── run5_gerald_D1VCPACXX_5_U1.fastq.gz +└── run5_gerald_D1VCPACXX_5_U2.fastq.gz +test1_unpaired.bam_sorted_fastqs/ +├── default_R1.sorted.fastq.gz +├── default_R2.sorted.fastq.gz +├── default_S.sorted.fastq.gz +├── default_U1.sorted.fastq.gz +├── default_U2.sorted.fastq.gz +├── run1_gerald_D1VCPACXX_1_R1.sorted.fastq.gz +├── run1_gerald_D1VCPACXX_1_R2.sorted.fastq.gz +├── run1_gerald_D1VCPACXX_1_S.sorted.fastq.gz +├── run1_gerald_D1VCPACXX_1_U1.sorted.fastq.gz +├── run1_gerald_D1VCPACXX_1_U2.sorted.fastq.gz +├── run2_gerald_D1VCPACXX_2_R1.sorted.fastq.gz +├── run2_gerald_D1VCPACXX_2_R2.sorted.fastq.gz +├── run2_gerald_D1VCPACXX_2_S.sorted.fastq.gz +├── run2_gerald_D1VCPACXX_2_U1.sorted.fastq.gz +├── run2_gerald_D1VCPACXX_2_U2.sorted.fastq.gz +├── run3_gerald_D1VCPACXX_3_R1.sorted.fastq.gz +├── run3_gerald_D1VCPACXX_3_R2.sorted.fastq.gz +├── run3_gerald_D1VCPACXX_3_S.sorted.fastq.gz +├── run3_gerald_D1VCPACXX_3_U1.sorted.fastq.gz +├── run3_gerald_D1VCPACXX_3_U2.sorted.fastq.gz +├── run4_gerald_D1VCPACXX_4_R1.sorted.fastq.gz +├── run4_gerald_D1VCPACXX_4_R2.sorted.fastq.gz +├── run4_gerald_D1VCPACXX_4_S.sorted.fastq.gz +├── run4_gerald_D1VCPACXX_4_U1.sorted.fastq.gz +├── run4_gerald_D1VCPACXX_4_U2.sorted.fastq.gz +├── run5_gerald_D1VCPACXX_5_R1.sorted.fastq.gz +├── run5_gerald_D1VCPACXX_5_R2.sorted.fastq.gz +├── run5_gerald_D1VCPACXX_5_S.sorted.fastq.gz +├── run5_gerald_D1VCPACXX_5_U1.sorted.fastq.gz +└── run5_gerald_D1VCPACXX_5_U2.sorted.fastq.gz +``` + ## Environment and Execution [Nextflow](https://www.nextflow.io/docs/latest/config.html#config-profiles)'s `-profile` parameter allows setting technical options for executing the workflow. You have already seen some of the profiles and that these can be combined. We conceptually separated the predefined profiles into two types -- those concerning the "environment" and those for selecting the "executor". @@ -132,11 +280,11 @@ By default, the Conda environments of the jobs as well as the Singularity contai cd $workflowRepoDir # Refer to the nextflow.config for the name of the Singularity image. singularity build \ - cache/singularity/nf-bam2fastq_1.0.0.sif \ - docker://ghcr.io/dkfz-odcf/nf-bam2fastq:1.0.0 + cache/singularity/ghcr.io-dkfz-odcf-nf-bam2fastq-$containerVersion.img \ + container-specs/Singularity.def # Test your container -test/test1.sh test-results/ singularity nextflowEnv/ +integration-tests/run.sh singularity test-results/ nextflowEnv/ ``` This is suited for either a user-specific installation or for a centralized installation for which the environments should be shared for all users. Please refer to the `nextflow.config` or the `NXF_*_CACHEDIR` environment variables to change this default (see [here](https://www.nextflow.io/docs/latest/config.html#environment-variables). @@ -148,10 +296,10 @@ Make sure your users have read and execute permissions on the directories and re The integration tests can be run with ```bash -test/test1.sh test-results/ $profile +integration-tests/run.sh $profile test-results/ ``` -This will create a test Conda environment in `test-results/nextflowEnv` and then run the tests. For the tests themselves you can use a local Conda environment or a Docker container, dependent on whether you set `$profile` to "conda" or "docker", respectively. These integration tests are also run in Travis CI. +This will create a test Conda environment in `./nextflowEnv` and then run the tests. For the tests themselves you can use a local Conda environment or a Docker container, dependent on whether you set `$profile` to "conda" or "docker", respectively. These integration tests are also run in Travis CI. ### Continuous Delivery @@ -165,31 +313,43 @@ This is an outline of the procedure to release the container to [Github Containe 1. Set the version that you want to release as variable. For the later commands you can set the Bash variable ```bash - versionTag=1.2.0 + containerVersion=1.3.0 ``` 2. Build the container. - ```bash + ```bash docker \ build \ - -t ghcr.io/dkfz-odcf/nf-bam2fastq:$versionTag \ + -t ghcr.io/dkfz-odcf/nf-bam2fastq:$containerVersion \ --build-arg HTTP_PROXY=$HTTP_PROXY \ --build-arg HTTPS_PROXY=$HTTPS_PROXY \ + -f container-specs/Dockerfile \ ./ ``` -3. Edit the version-tag for the docker container in the "docker"-profile in the `nextflow.config` to match `$versionTag`. +3. Edit the version-tag for the docker container in the "docker"-profile in the `nextflow.config` to match `$containerVersion`. 4. Run the integration test with the new container ```bash - test/test1.sh docker-test docker + integration-tests/run.sh docker docker-test-results/ ``` 5. If the test succeeds, push the container to Github container registry. Set the CR_PAT variable to your personal access token (PAT): ```bash echo $CR_PAT | docker login ghcr.io -u vinjana --password-stdin - docker image push ghcr.io/dkfz-odcf/nf-bam2fastq:$versionTag + docker image push ghcr.io/dkfz-odcf/nf-bam2fastq:$containerVersion ``` ## Release Notes -* 1.2.0 +* 1.3.0 (March, 2024) + * Minor: Let Nextflow automatically create the cached Singularity image. + > NOTE: The cached image name was changed to Nextflow's default name. + * Patch: Reuse to the simpler Dockerfile that is also used in the [nf-seq-qc](https://gitlab.com/one-touch-pipeline/workflows/nf-seq-qc) and [nf-seq-convert](https://gitlab.com/one-touch-pipeline/workflows/nf-seq-convert) workflows. + * Patch: Bumped Dockerfile base image to miniconda3:4.12.0. + * Patch: Bumped minimum Nextflow to 23.10.1. Version 22 uses `singularity exec`, while 23 uses `singularity run`, which impacts process isolation. + * Patch: Added a `Singularity.def`, in case the automatic conversion by Nextflow does not work. + * Patch: Mention Conda only for development in `README.md`. Otherwise, it should not be used. + * Patch: Test-script now implements a simple backwards-compatibility test by comparing against old result files. + * Patch: Renamed `test/test1.sh` to `integration-tests/run.sh`. Changed order of parameters. + +* 1.2.0 (May, 2023) * Minor: Updated to miniconda3:4.10.3 base container, because the previous version (4.9.2) didn't build anymore. * Minor: Use `-env none` for "lsf" cluster profile. Local environment should not be copied. This probably caused problems with the old "dkfzModules" environment profile. * Patch: Require Nextflow >= 22.07.1, which fixes an LSF memory request bug. Added options for per-job memory requests to "lsf" profile in `nextflow.config`. diff --git a/container-specs/Dockerfile b/container-specs/Dockerfile new file mode 100644 index 0000000..1a4cffc --- /dev/null +++ b/container-specs/Dockerfile @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: 2024 The OTP authors +# +# SPDX-License-Identifier: MIT + +FROM continuumio/miniconda3:4.12.0 + +LABEL maintainer="Philip R. Kensche " + +# Capitalized versions for many tools. Minuscle version at least for apt. +ARG HTTP_PROXY="" +ARG http_proxy="$HTTP_PROXY" +ARG HTTPS_PROXY="" +ARG https_proxy="$HTTPS_PROXY" +ARG NO_PROXY="" +ARG no_proxy="$NO_PROXY" + +# Setup base conda container with bash as default shell. +SHELL ["/bin/bash", "-c"] +RUN conda init bash + +# Now, the stuff that needs to be rebuild for every workflow/container. +# The envName can be reused in later layers, and in a Singularity.def. +ENV envName="nf-bam2fastq" +LABEL org.opencontainers.image.source="https://ghcr.io/dkfz-odcf/$envName" + +COPY task-environment.yml ./ + +RUN conda config --set proxy_servers.http "$HTTP_PROXY" && \ + conda config --set proxy_servers.https "$HTTPS_PROXY" && \ + conda env create -n "$envName" -f task-environment.yml && \ + source activate "$envName" && \ + conda clean --all -f -y + +# Use -p path instead of -n name to activate the environment. Otherwise, Conda will fail with +# NoWritableEnvsDirError. +ENTRYPOINT ["conda", "run", "--no-capture-output", "-p", "/opt/conda/envs/$envName"] diff --git a/container-specs/Singularity.def b/container-specs/Singularity.def new file mode 100644 index 0000000..e00fe29 --- /dev/null +++ b/container-specs/Singularity.def @@ -0,0 +1,15 @@ +# SPDX-FileCopyrightText: 2023 The OTP authors +# +# SPDX-License-Identifier: MIT + +# This file is needed for the conversion of the Docker image into a Singularity image that +# can be executed with `singularity exec`, like Nextflow does it. `singularity exec` does *not* +# invoke the runscript, but the environment initialization in the Dockerfile is done via the +# ENTRYPOINT and that is converted into the Singularity runscript. +# +# Note that the docker-daemon bootstrap allows to do this with locally tagged images. +Bootstrap: docker-daemon +From: ghcr.io/dkfz-odcf/nf-bam2fastq:1.3.0 + +%environment + source activate "$envName" diff --git a/test/test1_paired.bam b/integration-tests/reference/test1_paired.bam similarity index 100% rename from test/test1_paired.bam rename to integration-tests/reference/test1_paired.bam diff --git a/integration-tests/reference/test1_paired.bam_fastqs/default_R1.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/default_R1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/default_R1.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/default_R2.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/default_R2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/default_R2.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/default_S.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/default_S.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/default_S.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/default_U1.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/default_U1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/default_U1.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/default_U2.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/default_U2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/default_U2.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_R1.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_R1.fastq.gz new file mode 100644 index 0000000..582c512 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_R1.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_R2.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_R2.fastq.gz new file mode 100644 index 0000000..03b0182 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_R2.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_S.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_S.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_S.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_U1.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_U1.fastq.gz new file mode 100644 index 0000000..0025566 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_U1.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_U2.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_U2.fastq.gz new file mode 100644 index 0000000..f2061b4 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run1_gerald_D1VCPACXX_1_U2.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_R1.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_R1.fastq.gz new file mode 100644 index 0000000..186d3d8 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_R1.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_R2.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_R2.fastq.gz new file mode 100644 index 0000000..5fbc211 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_R2.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_S.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_S.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_S.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_U1.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_U1.fastq.gz new file mode 100644 index 0000000..44a9caf Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_U1.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_U2.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_U2.fastq.gz new file mode 100644 index 0000000..6302187 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run2_gerald_D1VCPACXX_2_U2.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_R1.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_R1.fastq.gz new file mode 100644 index 0000000..6d9f31a Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_R1.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_R2.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_R2.fastq.gz new file mode 100644 index 0000000..3a91160 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_R2.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_S.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_S.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_S.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_U1.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_U1.fastq.gz new file mode 100644 index 0000000..d9c3c0f Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_U1.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_U2.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_U2.fastq.gz new file mode 100644 index 0000000..5022e21 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run3_gerald_D1VCPACXX_3_U2.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_R1.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_R1.fastq.gz new file mode 100644 index 0000000..3d1e6de Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_R1.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_R2.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_R2.fastq.gz new file mode 100644 index 0000000..15c5c71 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_R2.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_S.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_S.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_S.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_U1.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_U1.fastq.gz new file mode 100644 index 0000000..55c2d4e Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_U1.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_U2.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_U2.fastq.gz new file mode 100644 index 0000000..579f3eb Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run4_gerald_D1VCPACXX_4_U2.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_R1.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_R1.fastq.gz new file mode 100644 index 0000000..01103e1 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_R1.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_R2.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_R2.fastq.gz new file mode 100644 index 0000000..54c45ae Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_R2.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_S.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_S.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_S.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_U1.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_U1.fastq.gz new file mode 100644 index 0000000..2fdb57a Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_U1.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_U2.fastq.gz b/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_U2.fastq.gz new file mode 100644 index 0000000..7166da2 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_fastqs/run5_gerald_D1VCPACXX_5_U2.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_R1.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_R1.sorted.fastq.gz new file mode 100644 index 0000000..22ceeef Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_R1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_R2.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_R2.sorted.fastq.gz new file mode 100644 index 0000000..22ceeef Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_R2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_S.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_S.sorted.fastq.gz new file mode 100644 index 0000000..99561c6 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_S.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_U1.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_U1.sorted.fastq.gz new file mode 100644 index 0000000..b648b3d Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_U1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_U2.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_U2.sorted.fastq.gz new file mode 100644 index 0000000..d4925a9 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/default_U2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_R1.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_R1.sorted.fastq.gz new file mode 100644 index 0000000..4ff1674 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_R1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_R2.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_R2.sorted.fastq.gz new file mode 100644 index 0000000..139a904 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_R2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_S.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_S.sorted.fastq.gz new file mode 100644 index 0000000..d4925a9 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_S.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_U1.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_U1.sorted.fastq.gz new file mode 100644 index 0000000..d25e766 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_U1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_U2.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_U2.sorted.fastq.gz new file mode 100644 index 0000000..3bb2dd6 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_U2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_R1.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_R1.sorted.fastq.gz new file mode 100644 index 0000000..24cc72c Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_R1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_R2.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_R2.sorted.fastq.gz new file mode 100644 index 0000000..af4ad42 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_R2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_S.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_S.sorted.fastq.gz new file mode 100644 index 0000000..701d0f9 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_S.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_U1.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_U1.sorted.fastq.gz new file mode 100644 index 0000000..6cd7db2 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_U1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_U2.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_U2.sorted.fastq.gz new file mode 100644 index 0000000..739f582 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_U2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_R1.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_R1.sorted.fastq.gz new file mode 100644 index 0000000..150480d Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_R1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_R2.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_R2.sorted.fastq.gz new file mode 100644 index 0000000..2b653f1 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_R2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_S.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_S.sorted.fastq.gz new file mode 100644 index 0000000..701d0f9 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_S.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_U1.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_U1.sorted.fastq.gz new file mode 100644 index 0000000..52e7814 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_U1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_U2.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_U2.sorted.fastq.gz new file mode 100644 index 0000000..6b10cdd Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_U2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_R1.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_R1.sorted.fastq.gz new file mode 100644 index 0000000..0fea15c Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_R1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_R2.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_R2.sorted.fastq.gz new file mode 100644 index 0000000..4d803ed Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_R2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_S.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_S.sorted.fastq.gz new file mode 100644 index 0000000..87f7fb4 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_S.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_U1.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_U1.sorted.fastq.gz new file mode 100644 index 0000000..199b8f6 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_U1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_U2.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_U2.sorted.fastq.gz new file mode 100644 index 0000000..3b950e5 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_U2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_R1.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_R1.sorted.fastq.gz new file mode 100644 index 0000000..bcb5003 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_R1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_R2.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_R2.sorted.fastq.gz new file mode 100644 index 0000000..2d542f4 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_R2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_S.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_S.sorted.fastq.gz new file mode 100644 index 0000000..87f7fb4 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_S.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_U1.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_U1.sorted.fastq.gz new file mode 100644 index 0000000..531c6ce Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_U1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_U2.sorted.fastq.gz b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_U2.sorted.fastq.gz new file mode 100644 index 0000000..8abd7d5 Binary files /dev/null and b/integration-tests/reference/test1_paired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_U2.sorted.fastq.gz differ diff --git a/test/test1_unpaired.bam b/integration-tests/reference/test1_unpaired.bam similarity index 100% rename from test/test1_unpaired.bam rename to integration-tests/reference/test1_unpaired.bam diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/default_R1.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/default_R1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/default_R1.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/default_R2.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/default_R2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/default_R2.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/default_S.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/default_S.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/default_S.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/default_U1.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/default_U1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/default_U1.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/default_U2.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/default_U2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/default_U2.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_R1.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_R1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_R1.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_R2.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_R2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_R2.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_S.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_S.fastq.gz new file mode 100644 index 0000000..d20a137 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_S.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_U1.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_U1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_U1.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_U2.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_U2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run1_gerald_D1VCPACXX_1_U2.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_R1.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_R1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_R1.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_R2.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_R2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_R2.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_S.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_S.fastq.gz new file mode 100644 index 0000000..a6f0c82 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_S.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_U1.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_U1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_U1.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_U2.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_U2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run2_gerald_D1VCPACXX_2_U2.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_R1.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_R1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_R1.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_R2.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_R2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_R2.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_S.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_S.fastq.gz new file mode 100644 index 0000000..e916926 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_S.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_U1.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_U1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_U1.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_U2.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_U2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run3_gerald_D1VCPACXX_3_U2.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_R1.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_R1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_R1.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_R2.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_R2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_R2.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_S.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_S.fastq.gz new file mode 100644 index 0000000..6cbbb80 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_S.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_U1.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_U1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_U1.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_U2.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_U2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run4_gerald_D1VCPACXX_4_U2.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_R1.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_R1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_R1.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_R2.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_R2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_R2.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_S.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_S.fastq.gz new file mode 100644 index 0000000..7b9ab24 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_S.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_U1.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_U1.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_U1.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_U2.fastq.gz b/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_U2.fastq.gz new file mode 100644 index 0000000..229151a Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_fastqs/run5_gerald_D1VCPACXX_5_U2.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_R1.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_R1.sorted.fastq.gz new file mode 100644 index 0000000..88b8d99 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_R1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_R2.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_R2.sorted.fastq.gz new file mode 100644 index 0000000..88b8d99 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_R2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_S.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_S.sorted.fastq.gz new file mode 100644 index 0000000..b648b3d Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_S.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_U1.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_U1.sorted.fastq.gz new file mode 100644 index 0000000..88b8d99 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_U1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_U2.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_U2.sorted.fastq.gz new file mode 100644 index 0000000..88b8d99 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/default_U2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_R1.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_R1.sorted.fastq.gz new file mode 100644 index 0000000..88b8d99 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_R1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_R2.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_R2.sorted.fastq.gz new file mode 100644 index 0000000..88b8d99 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_R2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_S.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_S.sorted.fastq.gz new file mode 100644 index 0000000..96a8fb1 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_S.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_U1.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_U1.sorted.fastq.gz new file mode 100644 index 0000000..22ceeef Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_U1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_U2.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_U2.sorted.fastq.gz new file mode 100644 index 0000000..b648b3d Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run1_gerald_D1VCPACXX_1_U2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_R1.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_R1.sorted.fastq.gz new file mode 100644 index 0000000..22ceeef Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_R1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_R2.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_R2.sorted.fastq.gz new file mode 100644 index 0000000..22ceeef Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_R2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_S.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_S.sorted.fastq.gz new file mode 100644 index 0000000..caedd98 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_S.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_U1.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_U1.sorted.fastq.gz new file mode 100644 index 0000000..88b8d99 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_U1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_U2.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_U2.sorted.fastq.gz new file mode 100644 index 0000000..88b8d99 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run2_gerald_D1VCPACXX_2_U2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_R1.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_R1.sorted.fastq.gz new file mode 100644 index 0000000..22ceeef Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_R1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_R2.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_R2.sorted.fastq.gz new file mode 100644 index 0000000..22ceeef Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_R2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_S.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_S.sorted.fastq.gz new file mode 100644 index 0000000..e822997 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_S.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_U1.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_U1.sorted.fastq.gz new file mode 100644 index 0000000..88b8d99 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_U1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_U2.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_U2.sorted.fastq.gz new file mode 100644 index 0000000..b648b3d Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run3_gerald_D1VCPACXX_3_U2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_R1.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_R1.sorted.fastq.gz new file mode 100644 index 0000000..88b8d99 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_R1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_R2.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_R2.sorted.fastq.gz new file mode 100644 index 0000000..88b8d99 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_R2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_S.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_S.sorted.fastq.gz new file mode 100644 index 0000000..106c964 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_S.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_U1.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_U1.sorted.fastq.gz new file mode 100644 index 0000000..4fb08c2 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_U1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_U2.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_U2.sorted.fastq.gz new file mode 100644 index 0000000..22ceeef Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run4_gerald_D1VCPACXX_4_U2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_R1.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_R1.sorted.fastq.gz new file mode 100644 index 0000000..88b8d99 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_R1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_R2.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_R2.sorted.fastq.gz new file mode 100644 index 0000000..88b8d99 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_R2.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_S.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_S.sorted.fastq.gz new file mode 100644 index 0000000..a504057 Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_S.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_U1.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_U1.sorted.fastq.gz new file mode 100644 index 0000000..22ceeef Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_U1.sorted.fastq.gz differ diff --git a/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_U2.sorted.fastq.gz b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_U2.sorted.fastq.gz new file mode 100644 index 0000000..b648b3d Binary files /dev/null and b/integration-tests/reference/test1_unpaired.bam_sorted_fastqs/run5_gerald_D1VCPACXX_5_U2.sorted.fastq.gz differ diff --git a/test/test1.sh b/integration-tests/run.sh similarity index 69% rename from test/test1.sh rename to integration-tests/run.sh index 1b560b3..4582a61 100755 --- a/test/test1.sh +++ b/integration-tests/run.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2022 DKFZ. +# Copyright (c) 2024 DKFZ. # # Distributed under the MIT License (license terms are at https://github.com/DKFZ-ODCF/nf-bam2fastq/blob/master/LICENSE.txt). # @@ -9,17 +9,19 @@ set -x set -ue set -o pipefail -outDir="${1:?No outDir set}" -environmentProfile="${2:-conda}" -nextflowEnvironment="${3:-$outDir/nextflowEnv}" +testsDir="$(readlink -f "$(dirname "${BASH_SOURCE[0]}")")" -if [[ "$environmentProfile" == "mamba" ]]; then +environmentProfile="${1:-singularity}" +outDir="${2:?No outDir set}" +nextflowEnvironment="${3:-$testsDir/nextflowEnv}" + +if command -v mamba; then condaBinary=mamba else condaBinary=conda fi -workflowDir="$(readlink -f "$(dirname "${BASH_SOURCE[0]}")/..")" +workflowDir="$testsDir/.." readsInBam() { local bamFile="${1:?No BAM file given}" @@ -71,6 +73,7 @@ set -ue # Keep memory footprint small export NXF_OPTS="-Xmx128m" +export NXF_SINGULARITY_RUN_COMMAND=run # Run the tests. nextflow run "$workflowDir/main.nf" \ @@ -78,18 +81,18 @@ nextflow run "$workflowDir/main.nf" \ -ansi-log \ -resume \ -work-dir "$outDir/work" \ - --input="$workflowDir/test/test1_paired.bam,$workflowDir/test/test1_unpaired.bam" \ + --input="$workflowDir/integration-tests/reference/test1_paired.bam,$workflowDir/integration-tests/reference/test1_unpaired.bam" \ --outputDir="$outDir" \ --sortFastqs=false \ --compressorThreads=0 \ --sortThreads=1 \ --sortMemory="100 MB" assertEqual \ - "$(readsInBam "$workflowDir/test/test1_paired.bam")" \ + "$(readsInBam "$workflowDir/integration-tests/reference/test1_paired.bam")" \ "$(readsInOutputDir "$outDir/test1_paired.bam_fastqs")" \ "Read number in unsorted output FASTQs on paired-end input bam" assertEqual \ - "$(readsInBam "$workflowDir/test/test1_unpaired.bam")" \ + "$(readsInBam "$workflowDir/integration-tests/reference/test1_unpaired.bam")" \ "$(readsInOutputDir "$outDir/test1_unpaired.bam_fastqs")" \ "Read number in unsorted output FASTQs on single-end input bam" @@ -98,19 +101,27 @@ nextflow run "$workflowDir/main.nf" \ -ansi-log \ -resume \ -work-dir "$outDir/work" \ - --input="$workflowDir/test/test1_paired.bam,$workflowDir/test/test1_unpaired.bam" \ + --input="$workflowDir/integration-tests/reference/test1_paired.bam,$workflowDir/integration-tests/reference/test1_unpaired.bam" \ --outputDir="$outDir" \ --sortFastqs=true \ --compressorThreads=0 \ --sortThreads=1 \ --sortMemory="100 MB" assertEqual \ - "$(readsInBam "$workflowDir/test/test1_paired.bam")" \ + "$(readsInBam "$workflowDir/integration-tests/reference/test1_paired.bam")" \ "$(readsInOutputDir "$outDir/test1_paired.bam_sorted_fastqs")" \ "Read number in sorted output FASTQs on paired-end input bam" assertEqual \ - "$(readsInBam "$workflowDir/test/test1_unpaired.bam")" \ + "$(readsInBam "$workflowDir/integration-tests/reference/test1_unpaired.bam")" \ "$(readsInOutputDir "$outDir/test1_unpaired.bam_sorted_fastqs")" \ "Read number in sorted output FASTQs on single-end input bam" +for ref in reference/test*/*; do + out="$outDir/$(echo "$ref" | sed "s/reference//")" + assertEqual \ + "$(zcat "$ref" | md5sum | cut -d' ' -f1)" \ + "$(zcat "$out" | md5sum | cut -d' ' -f1)" \ + "MD5 of $ref and $out" +done + testFinished diff --git a/main.nf b/main.nf index f45d92d..11a9f6f 100644 --- a/main.nf +++ b/main.nf @@ -162,10 +162,6 @@ String toSortMemoryString(MemoryUnit mem) { } /** The actual workflow */ -bamFiles_ch = Channel. - fromPath(params.input.split(',') as List, - checkIfExists: true) - Boolean compressBamToFastqOutput = params.sortFastqs ? params.compressIntermediateFastqs : true @@ -180,10 +176,10 @@ process bamToFastq { publishDir params.outputDir, enabled: !params.sortFastqs, mode: publishMode.toString() input: - file bamFile from bamFiles_ch + file bamFile output: - tuple file(bamFile), file("**/*.${fastqSuffix(compressBamToFastqOutput)}") into readsFiles_ch + tuple file(bamFile), file("**/*.${fastqSuffix(compressBamToFastqOutput)}") shell: """ @@ -199,29 +195,6 @@ process bamToFastq { } -// Create two channels of matched paired-end and unmatched or single-end reads, each of tuples of (bam, fastq). -readsFiles_ch.into { readsFilesA_ch; readsFilesB_ch } -pairedFastqs_ch = readsFilesA_ch.flatMap { - def (bam, fastqs) = it - fastqs.grep { it.getFileName() =~ /.+_R[12]\.fastq(?:\.[^.]*)?$/ }. - groupBy { fastq -> fastq.getFileName().toString().replaceFirst("_R[12].fastq(?:.gz)?\$", "") }. - collect { key, files -> - assert files.size() == 2 - files.sort() - [bam, files[0], files[1]] - } -} - - -// Unpaired FASTQs are unmatched or orphaned paired-reads (1 or 2) and singletons, i.e. unpaired reads. -unpairedFastqs_ch = readsFilesB_ch.flatMap { - def (bam, fastqs) = it - fastqs. - grep { it.getFileName() =~ /.+_(U[12]|S)\.fastq(?:\.[^.]*)?$/ }. - collect { [bam, it] } -} - - process nameSortUnpairedFastqs { cpus { params.sortThreads + (params.compressIntermediateFastqs ? params.compressorThreads : 0 ) } memory { (sortMemory + 100.MB) * params.sortThreads * 1.2 } @@ -235,15 +208,14 @@ process nameSortUnpairedFastqs { params.sortFastqs input: - tuple file(bam), file(fastq) from unpairedFastqs_ch + tuple file(bam), file(fastq) output: - tuple file(bam), file(sortedFastqFile) into sortedUnpairedFastqs_ch + tuple file(bam), file(sortedFastqFile) script: - bamFileName = bam.getFileName().toString() - outDir = "${bamFileName}_sorted_fastqs" - sortedFastqFile = sortedFastqFile(outDir, fastq, true) + outDir = "${bam.getFileName().toString()}_sorted_fastqs" as String + sortedFastqFile = sortedFastqFile(outDir, fastq.toRealPath(), true) """ mkdir -p "$outDir" compressedInputFastqs="$compressBamToFastqOutput" \ @@ -272,16 +244,15 @@ process nameSortPairedFastqs { params.sortFastqs input: - tuple file(bam), file(fastq1), file(fastq2) from pairedFastqs_ch + tuple file(bam), file(fastq1), file(fastq2) output: - tuple file(bam), file(sortedFastqFile1), file(sortedFastqFile2) into sortedPairedFastqs_ch + tuple file(bam), file(sortedFastqFile1), file(sortedFastqFile2) script: - bamFileName = bam.getFileName().toString() - outDir = "${bamFileName}_sorted_fastqs" - sortedFastqFile1 = sortedFastqFile(outDir, fastq1, true) - sortedFastqFile2 = sortedFastqFile(outDir, fastq2, true) + outDir = "${bam.getFileName().toString()}_sorted_fastqs" as String + sortedFastqFile1 = sortedFastqFile(outDir, fastq1.toRealPath(), true) + sortedFastqFile2 = sortedFastqFile(outDir, fastq2.toRealPath(), true) """ mkdir -p "$outDir" compressedInputFastqs="$compressBamToFastqOutput" \ @@ -298,6 +269,39 @@ process nameSortPairedFastqs { } +workflow { + + bamFiles_ch = Channel.fromPath(params.input.split(',') as List, checkIfExists: true) + readsFiles_ch = bamToFastq(bamFiles_ch) + + pairedFastqs_ch = readsFiles_ch.flatMap { + def (bam, fastqs) = it + fastqs.grep { + it.getFileName() =~ /.+_R[12]\.fastq(?:\.[^.]*)?$/ + }. + groupBy { fastq -> + fastq.getFileName().toString().replaceFirst("_R[12].fastq(?:.gz)?\$", "") + }. + collect { key, files -> + assert files.size() == 2 + files.sort() + [bam, files[0], files[1]] + } + } + nameSortPairedFastqs(pairedFastqs_ch) + + // Unpaired FASTQs are unmatched or orphaned paired-reads (1 or 2) and singletons, i.e. unpaired reads. + unpairedFastqs_ch = readsFiles_ch.flatMap { + def (bam, fastqs) = it + fastqs. + grep { it.getFileName() =~ /.+_(U[12]|S)\.fastq(?:\.[^.]*)?$/ }. + collect { [bam, it] } + } + nameSortUnpairedFastqs(unpairedFastqs_ch) + + +} + workflow.onComplete { println "Workflow run $workflow.runName completed at $workflow.complete with status " + diff --git a/nextflow.config b/nextflow.config index 0ca558d..3e0cb80 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,70 +14,63 @@ manifest { mainScript = 'main.nf' version = '1.2.0' author = 'Philip Reiner Kensche' - nextflowVersion = '>= 22.07.1' + nextflowVersion = '>= 23.10.1' } // The workflow may refer to an older container version, e.g. if the container was not updated. -ext.containerVersion = '1.0.0' +ext.containerVersion = '1.3.0' +ext.containerUrl = "ghcr.io/dkfz-odcf/nf-bam2fastq:${ext.containerVersion}" +ext.taskEnvYaml = "${projectDir}/task-environment.yml" profiles { + // Note, that here we don't use `process { ... }` but `executor { ... }` blocks, as these + // would override the settings from other profiles. The current settings allow to combine + // profiles, e.g. tests,lsf,singularity, to run the workflow with tests resources on LSF using + // Singularity containers. + test { - process { - cpus = 1 - memory = 1.GB - } + process.cpus = 1 + process.memory = 1.GB } local { - process { - executor = 'local' - } + process.executor = 'local' } conda { conda.enabled = true conda.cacheDir = "${projectDir}/cache/conda" - process { - conda = "${projectDir}/task-environment.yml" - } } mamba { conda.enabled = true - useMamba = true + conda.useMamba = true conda.cacheDir = "${projectDir}/cache/conda" - process { - conda = "${projectDir}/task-environment.yml" - } } docker { docker.enabled = true docker.runOptions='-u $(id -u):$(id -g)' - process { - container = "ghcr.io/dkfz-odcf/nf-bam2fastq:${ext.containerVersion}" - } + process.container = ext.containerUrl } singularity { - process.container = "nf-bam2fastq_${ext.containerVersion}.sif" + // Automatically pull the Docker image, and put it into the cache directory singularity.enabled = true singularity.cacheDir = "${projectDir}/cache/singularity" // The singularity containers are stored in the workflow-directory singularity.autoMounts = true + process.container = "docker-daemon://${ext.containerUrl}" } lsf { - process { - executor = 'lsf' - clusterOptions = '-env none' - } - executor { - // scratch = '$SCRATCHDIR/$LSB_JOBID' - perTaskReserve = false - perJobMemLimit = true - } + process.executor = 'lsf' + process.clusterOptions = '-env none' + + // executor.scratch = '$SCRATCHDIR/$LSB_JOBID' + executor.perTaskReserve = false + executor.perJobMemLimit = true } } diff --git a/nf-bam2fastq.iml b/nf-bam2fastq.iml index ad12601..90cacad 100644 --- a/nf-bam2fastq.iml +++ b/nf-bam2fastq.iml @@ -10,14 +10,15 @@ - + + \ No newline at end of file diff --git a/task-environment.yml b/task-environment.yml index 73f8f63..9628cae 100755 --- a/task-environment.yml +++ b/task-environment.yml @@ -4,6 +4,7 @@ channels: - bioconda - defaults dependencies: + - procps-ng - _libgcc_mutex=0.1=conda_forge - _openmp_mutex=4.5=1_gnu - bash=5.0.018=h0a1914f_0 diff --git a/test-environment.yml b/test-environment.yml index ae3e642..2e7570c 100755 --- a/test-environment.yml +++ b/test-environment.yml @@ -1,4 +1,4 @@ -name: test-environment +name: tests-environment channels: - conda-forge - bioconda @@ -7,6 +7,6 @@ dependencies: - mamba - bash=5.0.018 - samtools=1.11 - - nextflow=22.10.1 + - nextflow=23.10.1 - gradle=7.4.2 - openjdk=11.0.15