.github/workflows/main.yml

name: Tests

on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - "*"

jobs:
  Cancel-previous-jobs:
    runs-on: ubuntu-latest
    if: github.ref != 'refs/heads/master'
    steps:
      - uses: khan/pull-request-workflow-cancel@1.0.0
        with:
          workflows: "main.yml"
        env:
          GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"

  Formatting:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - name: Formatting
        uses: github/super-linter@v4.10.1
        env:
          VALIDATE_ALL_CODEBASE: false
          DEFAULT_BRANCH: master
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          FILTER_REGEX_EXCLUDE: CHANGELOG.md
          VALIDATE_SNAKEMAKE_SNAKEFMT: true
          VALIDATE_PYTHON_BLACK: true
          VALIDATE_MARKDOWN: true

  Linting:
    runs-on: ubuntu-latest
    env:
      GISAID_API_TOKEN: ${{ secrets.GISAID_API_TOKEN }}
    steps:
      - uses: actions/checkout@v2
      - name: Lint workflow
        uses: snakemake/snakemake-github-action@v1.24.0
        with:
          directory: .
          snakefile: workflow/Snakefile
          stagein: mamba install -n snakemake -c conda-forge peppy
          args: "--lint"

  # pre-commit action currently fails:
  # https://github.com/IKIM-Essen/uncovar/actions/runs/4304753941/jobs/7506225198#step:4:115
  # revisit when new pre-commit release >3.0.0 is out
  # Pre-Commit:
  #   runs-on: ubuntu-latest
  #   if: github.ref != 'refs/heads/master'
  #   steps:
  #     - uses: actions/checkout@v2
  #     - uses: actions/setup-python@v4
  #     - uses: pre-commit/action@v3.0.0

  Technology-Tests:
    runs-on: ubuntu-latest
    env:
      GISAID_API_TOKEN: ${{ secrets.GISAID_API_TOKEN }}
    needs:
      - Formatting
      - Linting
      #- Pre-Commit
    strategy:
      matrix:
        rule: [all, all -npr]
        # disable ont actions
        # technology: [all, illumina, ont, ion]
        technology: [all, illumina, ion]
        seq_method: [shotgun, amplicon]
    steps:
      - uses: actions/checkout@v2

      # android - will release about 10 GB if you don't need Android
      # dotnet  - will release about 20 GB if you don't need .NET
      - name: Free up some disk sapce
        run: |
          sudo rm -rf /usr/local/lib/android
          sudo rm -rf /usr/share/dotnet
      - name: Prepare test data for all technologies
        if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'all' || matrix.rule == 'compare_assemblers')
        run: |
          if [[ "${{ matrix.seq_method }}"  = "shotgun" ]] ; then export AMPLICON=0; else export AMPLICON=1; fi
          mkdir -p .tests/data
          curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.1.fastq.gz
          curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.2.fastq.gz
          curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ERR5745913.fastq.gz > .tests/data/ion_reads.fastq.gz
          echo sample_name,fq1,fq2,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
          echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,$AMPLICON,illumina >> .tests/config/pep/samples.csv
          echo ion-test,data/ion_reads.fastq.gz,,2022-01-01,$AMPLICON,ion >> .tests/config/pep/samples.csv
      - name: Prepare test data for Illumina
        if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'illumina' || matrix.rule == 'compare_assemblers')
        run: |
          if [[ "${{ matrix.seq_method }}"  = "shotgun" ]] ; then export AMPLICON=0; else export AMPLICON=1; fi
          mkdir -p .tests/data
          curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.1.fastq.gz
          curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.2.fastq.gz
          echo sample_name,fq1,fq2,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
          echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,$AMPLICON,illumina >> .tests/config/pep/samples.csv
      # - name: Prepare test data for Oxford Nanopore
      #   if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'ont' || matrix.rule == 'compare_assemblers')
      #   run: |
      #     if [[ "${{ matrix.seq_method }}"  = "shotgun" ]] ; then export AMPLICON=0; else export AMPLICON=1; fi
      #     mkdir -p .tests/data
      #     curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ont_reads.fastq.gz > .tests/data/ont_reads.fastq.gz
      #     echo sample_name,fq1,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
      #     echo ont-test,data/ont_reads.fastq.gz,2022-01-01,$AMPLICON,ont >> .tests/config/pep/samples.csv
      - name: Prepare test data for Ion Torrent
        if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'ion' || matrix.rule == 'compare_assemblers')
        run: |
          if [[ "${{ matrix.seq_method }}"  = "shotgun" ]] ; then export AMPLICON=0; else export AMPLICON=1; fi
          mkdir -p .tests/data
          curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ERR5745913.fastq.gz > .tests/data/ion_reads.fastq.gz
          echo sample_name,fq1,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
          echo ion-test,data/ion_reads.fastq.gz,2022-01-01,$AMPLICON,ion >> .tests/config/pep/samples.csv
      - name: Use smaller reference files for testing
        if: steps.test-resources.outputs.cache-hit != true
        run: |
          # mkdir -p .tests/resources/minikraken-8GB
          # curl -SL https://github.com/thomasbtf/small-kraken-db/raw/master/human_k2db.tar.gz | tar zxvf - -C .tests/resources/minikraken-8GB --strip 1
          mkdir -p .tests/resources/genomes
          curl -SL "https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?id=BA000005.3&db=nuccore&report=fasta" | gzip -c > .tests/resources/genomes/human-genome.fna.gz
      - name: Simulate GISAID download
        run: |
          mkdir -p .tests/results/benchmarking/tables
          echo -e "resources/genomes/B.1.1.7.fasta\nresources/genomes/B.1.351.fasta" > .tests/results/benchmarking/tables/strain-genomes.txt
          mkdir -p .tests/resources/genomes
          curl "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=MZ314997.1&rettype=fasta" | sed '$ d' > .tests/resources/genomes/B.1.1.7.fasta
          curl "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=MZ314998.1&rettype=fasta" | sed '$ d' > .tests/resources/genomes/B.1.351.fasta
      - name: Test rule ${{ matrix.rule }} on ${{ matrix.technology }} ${{ matrix.seq_method }} data
        uses: snakemake/snakemake-github-action@v1.24.0
        with:
          directory: .tests
          snakefile: workflow/Snakefile
          args: "-pr --use-conda --show-failed-logs --cores 2 --resources ncbi_api_requests=1 --conda-cleanup-pkgs cache --conda-frontend mamba ${{ matrix.rule }}"

      - name: Test report
        uses: snakemake/snakemake-github-action@v1.24.0
        if: startsWith(matrix.rule, 'all -npr') != true
        with:
          directory: .tests
          snakefile: workflow/Snakefile
          args: "${{ matrix.rule }} --report report.zip"

      - name: Upload report
        uses: actions/upload-artifact@v3
        if: matrix.technology == 'all' && matrix.rule != 'all -npr'
        with:
          name: report-rule-${{ matrix.rule }}-${{ matrix.technology }}-${{ matrix.seq_method }}
          path: .tests/results/patient-reports/2022-01-01.zip

      - name: Upload logs
        uses: actions/upload-artifact@v3
        if: matrix.technology == 'all' && matrix.rule != 'all -npr'
        with:
          name: log-rule-${{ matrix.rule }}-technology-${{ matrix.technology }}
          path: .tests/logs/

      - name: Change permissions for caching
        run: sudo chmod -R 755 .tests/.snakemake/conda

      - name: Print disk space
        run: sudo df -h

  Benchmarks-Tests:
    runs-on: ubuntu-latest
    env:
      GISAID_API_TOKEN: ${{ secrets.GISAID_API_TOKEN }}
    needs:
      - Formatting
      - Linting
      #- Pre-Commit
    strategy:
      matrix:
        rule:
          [
            benchmark_strain_calling,
            benchmark_assembly,
            benchmark_mixtures,
            benchmark_non_sars_cov_2,
            benchmark_reads,
            compare_assemblers,
          ]
    # generate_test_cases,
    steps:
      - uses: actions/checkout@v2

      # - name: Cache conda dependencies
      #   uses: actions/cache@v2
      #   with:
      #     path: |
      #       .tests/.snakemake/conda
      #     key: benchmarks-${{ runner.os }}-${{ matrix.rule }}-${{ matrix.technology }}-${{ matrix.seq_method }}-${{ hashFiles('*.tests/.snakemake/conda/*.yaml') }}

      # TODO caches are currently completely misleading, as they lead to certain files becoming present on disk which might
      # then hide failures that would otherwise be seen.

      # - name: Get date
      #   id: get-date
      #   run: |
      #     echo "::set-output name=date::$(/bin/date -u "+%Y%m%d")"
      #   shell: bash

      # - name: Cache resources
      #   id: test-resources
      #   uses: actions/cache@v2
      #   with:
      #     path: |
      #       .tests/resources/minikraken-8GB
      #       .tests/resources/genomes/human-genome.fna.gz
      #     key: ${{ runner.os }}-test-resources-${{ steps.get-date.outputs.date }}-${{ hashFiles('**.tests/resources**taxo.k2d') }}
      #     restore-keys: |
      #       ${{ runner.os }}-test-resources-${{ steps.get-date.outputs.date }}-
      #       ${{ runner.os }}-test-resources-

      # - name: Cache results
      #   if: startsWith(matrix.rule, 'all')
      #   id: test-results
      #   uses: actions/cache@v2
      #   with:
      #     path: |
      #       .tests/results
      #     key: ${{ runner.os }}-results-${{ steps.get-date.outputs.date }}-${{ hashFiles('**results/2021-02-01/qc/multiqc.html') }}
      #     restore-keys: |
      #       ${{ runner.os }}-results-${{ steps.get-date.outputs.date }}-
      #       ${{ runner.os }}-results-

      # - name: Cache data
      #   if: startsWith(matrix.rule, 'all')
      #   id: test-data
      #   uses: actions/cache@v2
      #   with:
      #     path: |
      #       .tests/data
      #     key: ${{ runner.os }}-test-data-${{ steps.get-date.outputs.date }}-${{ hashFiles('**.tests/data/*.fastq.gz') }}
      #     restore-keys: |
      #       ${{ runner.os }}-test-data-${{ steps.get-date.outputs.date }}-
      #       ${{ runner.os }}-test-data-

      # - name: Cache benchmark data
      #   if: startsWith(matrix.rule, 'all') != true
      #   id: benchmark-data
      #   uses: actions/cache@v2
      #   with:
      #     path: |
      #       .tests/resources/benchmarking
      #     key: ${{ runner.os }}-benchmark-data-${{ steps.get-date.outputs.date }}-${{ hashFiles('**.tests/resources/benchmarking/**/reads.1.fastq.gz') }}
      #     restore-keys: |
      #       ${{ runner.os }}-benchmark-data-${{ steps.get-date.outputs.date }}-
      #       ${{ runner.os }}-benchmark-data-

      # - name: Cache test dependencies
      #   if: startsWith(matrix.rule, 'all')
      #   id: test-dependencies
      #   uses: actions/cache@v2
      #   with:
      #     path: |
      #       .tests/.snakemake/conda
      #     key: ${{ runner.os }}-sars-cov-test-dependencies-${{ steps.get-date.outputs.date }}-${{ hashFiles('*.tests/.snakemake/conda/*.yaml') }}
      #     restore-keys: |
      #       ${{ runner.os }}-sars-cov-test-dependencies-${{ steps.get-date.outputs.date }}-
      #       ${{ runner.os }}-sars-cov-test-dependencies-

      # - name: Cache benchmark dependencies
      #   if: startsWith(matrix.rule, 'all') != true
      #   id: benchmark-dependencies
      #   uses: actions/cache@v2
      #   with:
      #     path: |
      #       .tests/.snakemake/conda
      #     key: ${{ runner.os }}-sars-cov-benchmark-dependencies-${{ steps.get-date.outputs.date }}-${{ hashFiles('*.tests/.snakemake/conda/*.yaml') }}
      #     restore-keys: |
      #       ${{ runner.os }}-sars-cov-benchmark-dependencies-${{ steps.get-date.outputs.date }}-
      #       ${{ runner.os }}-sars-cov-benchmark-dependencies-

      - name: Prepare test data
        if: matrix.rule == 'generate_test_cases'
        run: |
          mkdir -p .tests/data
          curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.1.fastq.gz
          curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.2.fastq.gz
          curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ont_reads.fastq.gz > .tests/data/ont_reads.fastq.gz
          echo sample_name,fq1,fq2,date,is_amplicon_data,technology,test_case > .tests/config/pep/samples.csv
          echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,1,illumina,case >> .tests/config/pep/samples.csv
          echo ont-test,data/ont_reads.fastq.gz,,2022-01-01,1,ont,case >> .tests/config/pep/samples.csv
      - name: Prepare test data
        if: matrix.rule != 'generate_test_cases'
        run: |
          mkdir -p .tests/data
          curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.1.fastq.gz
          curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.2.fastq.gz
          echo sample_name,fq1,fq2,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
          echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,0,illumina >> .tests/config/pep/samples.csv
      - name: Use smaller reference files for testing
        if: steps.test-resources.outputs.cache-hit != true
        run: |
          # mkdir -p .tests/resources/minikraken-8GB
          # curl -SL https://github.com/thomasbtf/small-kraken-db/raw/master/human_k2db.tar.gz | tar zxvf - -C .tests/resources/minikraken-8GB --strip 1
          mkdir -p .tests/resources/genomes
          curl -SL "https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?id=BA000005.3&db=nuccore&report=fasta" | gzip -c > .tests/resources/genomes/human-genome.fna.gz
      - name: Simulate GISAID download
        run: |
          mkdir -p .tests/results/benchmarking/tables
          echo -e "resources/genomes/B.1.1.7.fasta\nresources/genomes/B.1.351.fasta" > .tests/results/benchmarking/tables/strain-genomes.txt
          mkdir -p .tests/resources/genomes
          curl "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=MZ314997.1&rettype=fasta" | sed '$ d' > .tests/resources/genomes/B.1.1.7.fasta
          curl "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=MZ314998.1&rettype=fasta" | sed '$ d' > .tests/resources/genomes/B.1.351.fasta
      - name: Test rule ${{ matrix.rule }}
        uses: snakemake/snakemake-github-action@v1.24.0
        with:
          directory: .tests
          snakefile: workflow/Snakefile
          args: "-pr --use-conda --show-failed-logs --cores 2 --resources ncbi_api_requests=1 --conda-cleanup-pkgs cache --conda-frontend mamba ${{ matrix.rule }}"

      - name: Test report
        uses: snakemake/snakemake-github-action@v1.24.0
        if: startsWith(matrix.rule, 'all -npr') != true
        with:
          directory: .tests
          snakefile: workflow/Snakefile
          args: "${{ matrix.rule }} --report report.zip"

      # - name: Upload report
      #   uses: actions/upload-artifact@v3
      #   with:
      #     name: report-rule-${{ matrix.rule }}
      #     path: .tests/results/patient-reports/2022-01-01.zip

      - name: Upload logs
        uses: actions/upload-artifact@v3
        with:
          name: log-rule-${{ matrix.rule }}
          path: .tests/logs/

      # - name: Unit test
      #     args: "--generate-unit-tests"
      # - name: Test workflow (singularity)
      #     args: "--use-conda --use-singularity --show-failed-logs --cores 2 --resources ncbi_api_requests=1 --conda-cleanup-pkgs cache --conda-frontend mamba"
      # - name: Test input changes
      #     args: "--use-conda --show-failed-logs --cores 2 --resources ncbi_api_requests=1 --conda-cleanup-pkgs cache --conda-frontend mamba -R `snakemake --list-input-changes`"
      # - name: Test code changes
      #     args: "--use-conda --show-failed-logs --cores 2 --resources ncbi_api_requests=1 --conda-cleanup-pkgs cache --conda-frontend mamba -R `snakemake --list-code-changes`"
      # - name: Test params changes
      #     args: "--use-conda --show-failed-logs --cores 2 --resources ncbi_api_requests=1 --conda-cleanup-pkgs cache --conda-frontend mamba -R `snakemake --list-params-changes`"

      - name: Check strain calling benchmark
        if: matrix.rule == 'benchmark_strain_calling'
        run: |
          cat .tests/results/benchmarking/strain-calling.csv
          if (tail -n+2 .tests/results/benchmarking/strain-calling.csv | grep mismatch > /dev/null)
          then
            echo "Strain calling failed in some cases (see above)."
            exit 1
          else
            echo "Strain calling was successful in all cases."
          fi
      - name: Check pseudoassembly benchmark
        if: matrix.rule == 'benchmark_assembly'
        run: |
          cat .tests/results/benchmarking/assembly/pseudoassembly.csv
          if [[ $(tail -1 .tests/results/benchmarking/assembly/pseudoassembly.csv) < 0.95 ]]
          then
            echo "Pseudoassembly benchmarking failed. There is at least one assembly where the contigs do not cover 95% of the original sequence (see above)."
            exit 1
          else
            echo "Pseudoassembly was successful."
          fi
      - name: Check assembly benchmark
        if: matrix.rule == 'benchmark_assembly'
        run: |
          cat .tests/results/benchmarking/assembly/assembly.csv
          if [[ $(tail -1 .tests/results/benchmarking/assembly/assembly.csv) < 0.8 ]]
          then
            echo "Assembly benchmarking failed. There is at least one assembly where the contigs do not cover 80% of the original sequence (see above)."
            exit 1
          else
            echo "Assembly was successful."
          fi
      - name: Print non-sars-cov-2 kallisto calls
        if: matrix.rule == 'benchmark_non_sars_cov_2'
        run: |
          cat .tests/results/benchmarking/tables/strain-calls/non-cov2-*.strains.kallisto.tsv
      - name: Test non-sars-cov-2 coronaviruses
        if: matrix.rule == 'benchmark_non_sars_cov_2'
        run: |
          cat .tests/results/benchmarking/non-sars-cov-2.csv
          if (cat .tests/results/benchmarking/non-sars-cov-2.csv | grep 'is sars-cov-2' > /dev/null)
          then
              echo "Workflow failed! A non-sars-cov-2 genome was identified as sars-cov-2 (see above)."
              exit 1
          else
              echo "Workflow sucessfully identified samples as non-sars-cov-2 in all cases."
          fi
      - name: Change permissions for caching
        run: sudo chmod -R 755 .tests/.snakemake/conda

      - name: Print disk space
        run: sudo df -h