From 4eccbd00c81e8d7a64eae4ca0f637ac621be99ba Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Thu, 31 Aug 2023 17:25:13 +0200 Subject: [PATCH] chore: initializing repository --- .ci/config/nginx/nginx.conf | 35 ++ .ci/secrets/db-password | 1 + .ci/volumes/reev-static/data/.gitkeep | 0 .editorconfig | 37 ++ .env.ci | 71 ++++ .gitattributes | 1 + .github/ISSUE_TEMPLATE/bug_report.md | 24 ++ .github/ISSUE_TEMPLATE/feature_request.md | 20 + .github/workflows/ci.yml | 45 +++ .github/workflows/conventional-prs.yml | 18 + .github/workflows/release-please.yml | 17 + .gitignore | 19 + CHANGELOG.md | 1 + LICENSE | 21 ++ README.md | 342 ++++++++++++++++++ docker-compose.override.yml-dev | 49 +++ docker-compose.override.yml-traefik-cert | 36 ++ docker-compose.override.yml-traefik-le | 49 +++ docker-compose.yml | 180 +++++++++ env.tpl | 65 ++++ utils/nginx/nginx.conf | 35 ++ utils/terraform/.gitignore | 2 + utils/terraform/main.tf | 22 ++ utils/terraform/provider.tf | 1 + utils/traefik-cert/config/certificates.toml | 6 + utils/traefik-cert/config/tls/.gitignore | 2 + .../config/tls/PLACE_TLS_FILES_HERE | 8 + 27 files changed, 1107 insertions(+) create mode 100644 .ci/config/nginx/nginx.conf create mode 100644 .ci/secrets/db-password create mode 100644 .ci/volumes/reev-static/data/.gitkeep create mode 100644 .editorconfig create mode 100644 .env.ci create mode 100644 .gitattributes create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/conventional-prs.yml create mode 100644 .github/workflows/release-please.yml create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 docker-compose.override.yml-dev create mode 100644 docker-compose.override.yml-traefik-cert create mode 100644 docker-compose.override.yml-traefik-le create mode 100644 docker-compose.yml create mode 100644 env.tpl create mode 100644 utils/nginx/nginx.conf create mode 100644 utils/terraform/.gitignore create mode 100644 utils/terraform/main.tf create mode 100644 utils/terraform/provider.tf create mode 100644 utils/traefik-cert/config/certificates.toml create mode 100644 utils/traefik-cert/config/tls/.gitignore create mode 100644 utils/traefik-cert/config/tls/PLACE_TLS_FILES_HERE diff --git a/.ci/config/nginx/nginx.conf b/.ci/config/nginx/nginx.conf new file mode 100644 index 0000000..5c13ff0 --- /dev/null +++ b/.ci/config/nginx/nginx.conf @@ -0,0 +1,35 @@ +user nginx; +worker_processes auto; + +error_log /var/log/nginx/error.log notice; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + keepalive_timeout 65; + + gzip on; + + # The full static data directory is mounted into the container but we only + # serve the "nginx" sub directory (with indices in the default + # configuration). + server { + location / { + root /data/nginx; + autoindex on; + } + } +} diff --git a/.ci/secrets/db-password b/.ci/secrets/db-password new file mode 100644 index 0000000..f3097ab --- /dev/null +++ b/.ci/secrets/db-password @@ -0,0 +1 @@ +password diff --git a/.ci/volumes/reev-static/data/.gitkeep b/.ci/volumes/reev-static/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..bfaf925 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,37 @@ +# http://editorconfig.org + +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.{py,rst,ini}] +indent_style = space +indent_size = 4 + +[*.conf] +indent_style = space +indent_size = 2 + +[*.py] +line_length=120 +known_first_party=reev +multi_line_output=3 +default_section=THIRDPARTY + +[*.{html,css,scss,json,yml}] +indent_style = space +indent_size = 2 + +[*.md] +trim_trailing_whitespace = false + +[Makefile] +indent_style = tab + +[nginx.conf] +indent_style = space +indent_size = 2 diff --git a/.env.ci b/.env.ci new file mode 100644 index 0000000..d17b3fa --- /dev/null +++ b/.env.ci @@ -0,0 +1,71 @@ +# Docker Compose environment file used in CI. + +# -- Docker Images ----------------------------------------------------------- + +# Name of the registry server and org to use for our images. +# image_base=ghcr.io/bihealth + +# Name of the mehari image to use. +# image_mehari_name=mehari + +# Version of the mehari image to use. +# image_mehari_version=latest + +# Name of the viguno image to use. +# image_viguno_name=viguno + +# Version of the viguno image to use. +# image_viguno_version=latest + +# Name of the annonars image to use. +# image_annonars_name=annonars + +# Version of the annonars image to use. +# image_annonars_version=latest + +# Name of the traefik image to use. +# image_traefik_name=traefik + +# Version of the traefik image to use. +# image_traefik_version=2.10 + +# Name of the postgres image to use. +# image_postgres_name=postgres + +# Version of the postgres image to use. +# image_postgres_version=12 + +# Name of the redis image to use. +# image_redis_name=redis + +# Version of the redis image to use. +# image_redis_version=6 + +# Name of the Minio image to use. +# image_minio_name=quay.io/minio/minio + +# Version of the Minio image to use. +# image_minio_version=latest + +# Name of the "mc" (Minio client) image to use. +# image_mc_name=minio/mc + +# Version of the "mc" (Minio client) image to use. +# image_mc_version=latest + +# -- General Container Configuration ----------------------------------------- + +# Base directory for configuration. +# config_basedir: ./.dev/config +## In CI: set to directory with appropriate config. +config_basedir=./.ci/config + +# Base directory for volumes. +# volumes_basedir: ./volumes +## In CI: set to a directory with minimal data for spinning up the containers. +volumes_basedir=./.ci/volumes + +# Base directory for secrets. +# secrets_basedir: ./secrets +## In CI: set to a directory where the `ci.yml` writes data to. +secrets_basedir=./.ci/secrets diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..4b8421e --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +.ci/volumes/** filter=lfs diff=lfs merge=lfs -text diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..7451498 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,24 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bbcbbe7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..365fdcc --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,45 @@ +name: CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + Lint: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Run YAML linting + uses: ibiqlik/action-yamllint@v3 + with: + file_or_dir: | + *.yml + + Up-Down: + runs-on: ubuntu-latest + needs: Lint + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Prepare empty volumes + run: | + mkdir -p .ci/volumes/postgres/data + mkdir -p .ci/volumes/redis/data + mkdir -p .ci/volumes/minio/data + mkdir -p .ci/volumes/varfish-static/data + + - name: Bring up and shut down services + uses: isbang/compose-action@v1.4.1 + with: + compose-file: docker-compose.yml + compose-flags: | + --env-file .env.ci + env: + DB_PASSWORD: "password" diff --git a/.github/workflows/conventional-prs.yml b/.github/workflows/conventional-prs.yml new file mode 100644 index 0000000..17c8812 --- /dev/null +++ b/.github/workflows/conventional-prs.yml @@ -0,0 +1,18 @@ +name: PR +on: + pull_request_target: + types: + - opened + - reopened + - edited + - synchronize + +jobs: + title-format: + runs-on: ubuntu-latest + steps: + - uses: amannn/action-semantic-pull-request@v3.4.0 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + validateSingleCommit: true diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml new file mode 100644 index 0000000..f24166b --- /dev/null +++ b/.github/workflows/release-please.yml @@ -0,0 +1,17 @@ +on: + push: + branches: + - main + +name: release-please + +jobs: + release-please: + if: github.repository_owner == 'bihealth' + runs-on: ubuntu-latest + steps: + - name: Run release-please + uses: GoogleCloudPlatform/release-please-action@v2 + id: release + with: + release-type: simple diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..60ac8ca --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +## Demo +/.dev + +## CI +/.ci + +## Docker Compose +# Configuration file. +.env +.secrets/ + +# The Docker Compose "override" YAML file. +docker-compose.override.yml + +## Text Editors +/.vscode +*~ +.*.sw? +.idea/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..825c32f --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1 @@ +# Changelog diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..973ac54 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021-2023 Berlin Institute of Health + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..d9a4ad0 --- /dev/null +++ b/README.md @@ -0,0 +1,342 @@ +# REEV Docker Compose Configuration + +This repository contains the [Docker Compose](https://docs.docker.com/compose/) configuration for [REEV](https://github.com/bihealth/reev). + +## Development Setup + +This section describes the steps needed for a development setup. + +### Prerequites + +You will need to fetch some of this from our S3 server. +We recommend the `s5cmd` tool as it is easy to install, use, and fast. +You can download it from [github.com/peak/s5cmd/releases](https://github.com/peak/s5cmd/releases). +For example: + +```bash session +wget -O /tmp/s5cmd_2.1.0_Linux-64bit.tar.gz \ + https://github.com/peak/s5cmd/releases/download/v2.1.0/s5cmd_2.1.0_Linux-64bit.tar.gz +tar -C /tmp -xf /tmp/s5cmd_2.1.0_Linux-64bit.tar.gz +sudo cp /tmp/s5cmd /usr/local/bin/ +``` + +You will need to install Docker Compose. +Note that the "modern" way is to do this by using the docker compose plugin. +Instructions can be found [here on the Docker.com website](https://docs.docker.com/compose/install/linux/#install-using-the-repository). + +### Checkout and Configure + +First, clone the repository: + +```bash session +git clone git@github.com:bihealth/reev-docker-compose.git +``` + +From here on, the commands should be executed from within this repository (`cd reev-docker-compose`). + +We will use the directory `.dev` within the checkout for storing data and secrets. +In a production deployment, these directories should live outside of the checkout, of course. + +Now, we create the directories for data storage. + +```bash session +mkdir -p .dev/volumes/reev-static/data +``` + +Next, we setup some "secrets" for the passwords. + +```bash session +mkdir -p .dev/secrets +echo db-password >.dev/secrets/db-password +``` + +We now copy the `env.tpl` file to the default location for the environment `.env`. + +```bash session +cp env.tpl .env +``` + +Next, create a `docker-compose.override.yml` with the contents of the file `docker-compose.override.yml-dev`. +This will disable everything that we assume is running on your host when you are developing. +This includes the REEV backend, redis, celery workers, postgres. + +```bash session +cp docker-compose.override.yml-dev docker-compose.override.yml +``` + +### Download Dev Data + +Now you need to obtain the data to serve by the mehari, viguno, and annonars container. +For this, we have prepared strongly reduced data sets (overall less than 2GB rather than hundreds of GB of data). +Obtain the annonars data: + +```bash session +mkdir -p .dev/volumes/reev-static/data/download +SRC_DST=" +full/annonars/gnomad-mtdna-grch37-3.1+0.12.8/*:annonars/gnomad-mtdna-grch37-3.1+0.12.8 +full/annonars/gnomad-mtdna-grch38-3.1+0.12.8/*:annonars/gnomad-mtdna-grch38-3.1+0.12.8 +full/annonars/helixmtdb-grch37-20200327+0.12.8/*:annonars/helixmtdb-grch37-20200327+0.12.8 +full/annonars/helixmtdb-grch38-20200327+0.12.8/*:annonars/helixmtdb-grch38-20200327+0.12.8 +full/annonars/genes-3.1+2.1.1+4.4+20230606+10.1+20230828+0.16.0/*:annonars/genes-3.1+2.1.1+4.4+20230606+10.1+20230828+0.16.0 +full/mehari/genes-txs-grch37-0.2.2/*:mehari/genes-txs-grch37-0.2.2 +full/mehari/genes-txs-grch38-0.2.2/*:mehari/genes-txs-grch38-0.2.2 +full/mehari/genes-xlink-20230624/*:mehari/genes-xlink-20230624 +full/tracks/*:tracks +full/worker/*:worker +reduced-dev/annonars/*:annonars +reduced-dev/mehari/*:mehari +reduced-dev/viguno/*:viguno +" +(set -x; for src_dst in $SRC_DST; do \ + src=$(echo $src_dst | cut -d : -f 1); \ + dst=$(echo $src_dst | cut -d : -f 2); \ + mkdir -p .dev/volumes/reev-static/data/download/$dst; \ + s5cmd \ + --endpoint-url=https://ceph-s3-public.cubi.bihealth.org \ + --no-sign-request \ + sync \ + "s3://reev-public/$src" \ + ".dev/volumes/reev-static/data/download/$dst"; \ +done) +``` + +Setup symlink structure so the data is at the expected location. + +```bash session +## +## annonars +## + +mkdir -p .dev/volumes/reev-static/data/annonars + +ln -sr .dev/volumes/reev-static/data/download/annonars/genes-* \ + .dev/volumes/reev-static/data/annonars/genes + +names="cadd dbsnp dbnsfp dbscsnv gnomad-mtdna gnomad-genomes gnomad-exomes helixmtdb cons"; \ +for genome in grch37 grch38; do \ + for name in $names; do \ + mkdir -p .dev/volumes/reev-static/data/annonars/$genome; \ + test -e .dev/volumes/reev-static/data/$genome/$name || \ + ln -sr \ + $(echo .dev/volumes/reev-static/data/download/annonars/$name-$genome-* \ + | tr ' ' '\n' \ + | tail -n 1) \ + .dev/volumes/reev-static/data/annonars/$genome/$name; \ + done; \ +done + +## +## mehari +## + +mkdir -p .dev/volumes/reev-static/data/mehari/grch3{7,8} + +ln -sr .dev/volumes/reev-static/data/download/mehari/genes-txs-grch37-*/mehari-data-txs-grch37-*in.zst \ + .dev/volumes/reev-static/data/mehari/grch37/txs.bin.zst +ln -sr .dev/volumes/reev-static/data/download/mehari/freqs-grch37-* \ + .dev/volumes/reev-static/data/mehari/grch37/freqs +ln -sr .dev/volumes/reev-static/data/download/mehari/genes-txs-grch38-*/mehari-data-txs-grch38-*.bin.zst \ + .dev/volumes/reev-static/data/mehari/grch38/txs.bin.zst +ln -sr .dev/volumes/reev-static/data/download/mehari/freqs-grch38-* \ + .dev/volumes/reev-static/data/mehari/grch38/freqs + +## +## viguno +## + +ln -sr .dev/volumes/reev-static/data/download/mehari/genes-xlink-20230624/genes-xlink.tsv \ + .dev/volumes/reev-static/data/hgnc_xlink.tsv +ln -sr .dev/volumes/reev-static/data/download/viguno/hpo-20230606+0.1.6 \ + .dev/volumes/reev-static/data/hpo + +## +## worker +## + +mkdir -p .dev/volumes/reev-static/data/worker/{grch3{7,8}/strucvars/bgdbs,noref/genes} + +ln -sr .dev/volumes/reev-static/data/download/worker/bgdb-exac-grch37-*/bgdb-exac.bin \ + .dev/volumes/reev-static/data/worker/grch37/strucvars/bgdbs/exac.bin +ln -sr .dev/volumes/reev-static/data/download/worker/bgdb-g1k-grch37-phase3v2+0.9.0/bgdb-g1k.bin \ + .dev/volumes/reev-static/data/worker/grch37/strucvars/bgdbs/g1k.bin +ln -sr .dev/volumes/reev-static/data/download/worker/bgdb-gnomad-grch37-*/bgdb-gnomad.bin \ + .dev/volumes/reev-static/data/worker/grch37/strucvars/bgdbs/gnomad.bin +ln -sr .dev/volumes/reev-static/data/download/worker/bgdb-dbvar-grch37-*/bgdb-dbvar.bin \ + .dev/volumes/reev-static/data/worker/grch37/strucvars/bgdbs/dbvar.bin +ln -sr .dev/volumes/reev-static/data/download/worker/bgdb-dbvar-grch38-*/bgdb-dbvar.bin \ + .dev/volumes/reev-static/data/worker/grch38/strucvars/bgdbs/dbvar.bin +ln -sr .dev/volumes/reev-static/data/download/worker/bgdb-dgv-grch37-*/bgdb-dgv.bin \ + .dev/volumes/reev-static/data/worker/grch37/strucvars/bgdbs/dgv.bin +ln -sr .dev/volumes/reev-static/data/download/worker/bgdb-dgv-grch38-*/bgdb-dgv.bin \ + .dev/volumes/reev-static/data/worker/grch38/strucvars/bgdbs/dgv.bin +ln -sr .dev/volumes/reev-static/data/download/worker/bgdb-dgv-gs-grch37-*/bgdb-dgv-gs.bin \ + .dev/volumes/reev-static/data/worker/grch37/strucvars/bgdbs/dgv-gs.bin +ln -sr .dev/volumes/reev-static/data/download/worker/bgdb-dgv-gs-grch38-*/bgdb-dgv-gs.bin \ + .dev/volumes/reev-static/data/worker/grch38/strucvars/bgdbs/dgv-gs.bin + +ln -sr .dev/volumes/reev-static/data/download/worker/clinvar-strucvars-grch37-*/clinvar-strucvars.bin \ + .dev/volumes/reev-static/data/worker/grch37/strucvars/clinvar.bin +ln -sr .dev/volumes/reev-static/data/download/worker/clinvar-strucvars-grch38-*/clinvar-strucvars.bin \ + .dev/volumes/reev-static/data/worker/grch38/strucvars/clinvar.bin + +ln -sr .dev/volumes/reev-static/data/download/worker/patho-mms-grch37-*/patho-mms.bed \ + .dev/volumes/reev-static/data/worker/grch37/strucvars/patho-mms.bin +ln -sr .dev/volumes/reev-static/data/download/worker/patho-mms-grch38-*/patho-mms.bed \ + .dev/volumes/reev-static/data/worker/grch38/strucvars/patho-mms.bin + +mkdir -p .dev/volumes/reev-static/data/worker/grch3{7,8}/tads + +ln -sr .dev/volumes/reev-static/data/download/worker/tads-grch37-dixon2015/hesc.bed \ + .dev/volumes/reev-static/data/worker/grch37/tads/hesc.bed +ln -sr .dev/volumes/reev-static/data/download/worker/tads-grch38-dixon2015/hesc.bed \ + .dev/volumes/reev-static/data/worker/grch38/tads/hesc.bed + +ln -sr .dev/volumes/reev-static/data/download/worker/genes-xlink-*/genes-xlink.bin \ + .dev/volumes/reev-static/data/worker/noref/genes/xlink.bin +ln -sr .dev/volumes/reev-static/data/download/worker/acmg-sf-*/acmg_sf.tsv \ + .dev/volumes/reev-static/data/worker/noref/genes/acmg.tsv +ln -sr .dev/volumes/reev-static/data/download/worker/mim2gene-*/mim2gene.tsv \ + .dev/volumes/reev-static/data/worker/noref/genes/omim.tsv + +mkdir -p .dev/volumes/reev-static/data/worker/grch3{7,8}/genes + +ln -sr .dev/volumes/reev-static/data/download/worker/genes-regions-grch37-*/ensembl_genes.bin \ + .dev/volumes/reev-static/data/worker/grch37/genes/ensembl_regions.bin +ln -sr .dev/volumes/reev-static/data/download/worker/genes-regions-grch38-*/ensembl_genes.bin \ + .dev/volumes/reev-static/data/worker/grch38/genes/ensembl_regions.bin + +ln -sr .dev/volumes/reev-static/data/download/worker/genes-regions-grch37-*/refseq_genes.bin \ + .dev/volumes/reev-static/data/worker/grch37/genes/refseq_regions.bin +ln -sr .dev/volumes/reev-static/data/download/worker/genes-regions-grch38-*/refseq_genes.bin \ + .dev/volumes/reev-static/data/worker/grch38/genes/refseq_regions.bin + +mkdir -p .dev/volumes/reev-static/data/worker/grch3{7,8}/features + +ln -sr .dev/volumes/reev-static/data/download/worker/masked-repeat-grch37-*/masked-repeat.bin \ + .dev/volumes/reev-static/data/worker/grch37/features/masked_repeat.bin +ln -sr .dev/volumes/reev-static/data/download/worker/masked-segdup-grch37-*/masked-segdup.bin \ + .dev/volumes/reev-static/data/worker/grch37/features/masked_seqdup.bin + +ln -sr .dev/volumes/reev-static/data/download/worker/masked-repeat-grch38-*/masked-repeat.bin \ + .dev/volumes/reev-static/data/worker/grch38/features/masked_repeat.bin +ln -sr .dev/volumes/reev-static/data/download/worker/masked-segdup-grch38-*/masked-segdup.bin \ + .dev/volumes/reev-static/data/worker/grch38/features/masked_seqdup.bin + +## +## tracks +## + +mkdir -p .dev/volumes/reev-static/data/nginx/grch3{7,8} + +paths_37=$(find .dev/volumes/reev-static/data/download/tracks/ -type f -name '*.bed' -or -name '*.bed.gz' | sort | grep grch37) +for path in $paths_37; do + if [[ -e ${path}.tbi ]]; then + ln -sr $path ${path}.tbi .dev/volumes/reev-static/data/nginx/grch37 + else + ln -sr $path .dev/volumes/reev-static/data/nginx/grch37 + fi +done + +paths_38=$(find .dev/volumes/reev-static/data/download/tracks/ -type f -name '*.bed' -or -name '*.bed.gz' | sort | grep grch38) +for path in $paths_38; do + if [[ -e ${path}.tbi ]]; then + ln -sr $path ${path}.tbi .dev/volumes/reev-static/data/nginx/grch38 + else + ln -sr $path .dev/volumes/reev-static/data/nginx/grch38 + fi +done +``` + +To create an in-house database: + +```bash session +reev-server-worker db mk-inhouse \ + --path-output-tsv /tmp/inhouse.tsv \ + $(find YOUR_PATH -name '*.gts.tsv.gz' | sort) + +reev-server-worker db to-bin \ + --input-type strucvar-inhouse \ + --path-input /tmp/inhouse.tsv \ + --path-output-bin .dev/volumes/reev-static/data/worker/grch37/strucvars/inhouse.bin +``` + +### Setup Configuration + +The next step step is to create the configuration files in `.dev/config`. + +```bash session +mkdir -p .dev/config/nginx +cp utils/nginx/nginx.conf .dev/config/nginx +``` + +### Startup and Check + +Now, you can bring up the docker compose environment (stop with `Ctrl+C`). + +```bash session +docker compose up +``` + +To verify the results, have a look at the following URLs: + +- Annonars database infos: http://127.0.0.1:3001/annos/db-info?genome_release=grch37 +- Annonars gene info: http://0.0.0.0:3001/genes/info?hgnc_id=HGNC:12403 +- Annonars variant info: http://0.0.0.0:3001/annos/variant?genome_release=grch37&chromosome=17&pos=41244100&reference=G&alternative=A +- Mehari impact prections: http://127.0.0.1:3002/tx/csq?genome-release=grch37&chromosome=17&position=48275363&reference=C&alternative=A +- Viguno for TGDS: http://127.0.0.1:3003/hpo/genes?gene_symbol=TGDS +- Nginx server with browser tracks http://127.0.0.1:3004/ + +Note that the development subset only has variants for a few genes, including BRCA1 (the example above). + +## Service Information + +This section describes the services that are started with this Docker Compose. + +### Trafik + +[Traefik](https://traefik.io/traefik/) is a reverse proxy that is used as the main entry point for all services behind HTTP(S). +The software is well-documented by its creators. +However, it is central to the setup and for much of the additional setup, touching Trafik configuraiton is needed. +We thus summarize some important points here. + +- Almost all configuration is done using labels on the `traefik` container itself or other containers. +- In the case of using configuration files, you will have to mount them from the host into the container. +- By default, we use "catch-all" configuration based on regular expressions on the host/domain name. + +### Mehari + +Mehari (by the REEV authors) provides information about variants and their effect on individual transcripts. + +### Viguno + +Viguno (by the REEV authors) provides HPO/OMIM related information. + +### Annonars + +Annonars (by the REEV authors) provides variant annotation from public databases. + +### Postgres + +We use postgres for the database backend of REEV. + +### Redis + +The Redis database is used for key-value store, e.g., for caching and the queues in the REEV server. + +## Developer Info + +### Managing GitHub Project with Terraform + +```bash session +$ export GITHUB_OWNER=bihealth +$ export GITHUB_TOKEN=ghp_ + +$ cd utils/terraform +$ terraform init +$ terraform import github_repository.reev-docker-compose reev-docker-compose + +$ terraform validate +$ terraform fmt +$ terraform plan +$ terraform apply +``` diff --git a/docker-compose.override.yml-dev b/docker-compose.override.yml-dev new file mode 100644 index 0000000..5cde5ab --- /dev/null +++ b/docker-compose.override.yml-dev @@ -0,0 +1,49 @@ +# Docker Compose Override YAML fragment that can be used for development +# +# It will: +# +# - set the number of replicas to 0 for all containers where the equivalent +# will be run outside of docker or is not needed; this includes traefik, +# reev, postgres, redis, ... +# - expose the containers that you need runing in docker at the following +# ports: +# - `3001` -- annonars +# - `3002` -- mehari +# - `3003` -- viguno +# - `3004` -- nginx + +services: + # map annonars to port 3001 + annonars: + ports: + - "3001:8080" + + # map mehari to port 3002 + mehari: + ports: + - "3002:8080" + + # map viguno to port 3003 + viguno: + ports: + - "3003:8080" + + # map nginx to port 3004 + nginx: + ports: + - "3004:80" + + # disable traefik + traefik: + deploy: + replicas: 0 + + # disable postgres + postgres: + deploy: + replicas: 0 + + # disable redis + redis: + deploy: + replicas: 0 diff --git a/docker-compose.override.yml-traefik-cert b/docker-compose.override.yml-traefik-cert new file mode 100644 index 0000000..7a12979 --- /dev/null +++ b/docker-compose.override.yml-traefik-cert @@ -0,0 +1,36 @@ +# Docker Compose Override YAML fragment that allows to use custom +# certificates with traefik for TLS. +# +# Quick Manual: +# +# - create a directory `config/traefik` +# - copy the file `utils/traefik-cert/config/certificates.toml` to +# `config/traefik`. +# - create a sub directory `config/traefik/tls` and place the certificate +# (including the whole certificate chain as needed, e.g., for DFN +# certificates) to `config/traefik/tls/server.crt` and the certificate +# key to `config/traefik/tls/server.key`) +# - merge this file into `docker-compose.override.yml` + +services: + # Configuration to override for traefik + traefik: + command: + # Default command lines, see `docker-compose.yml`. + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--entrypoints.web.http.redirections.entryPoint.to=websecure" + - "--entrypoints.web.http.redirections.entryPoint.scheme=https" + - "--entrypoints.web.http.redirections.entrypoint.permanent=true" + - "--entrypoints.web.address=:80" + - "--entrypoints.websecure.address=:443" + # EXTRA command lines to make traefik use the config file from bind + # mount + - "--providers.file.directory=/etc/traefik" + - "--providers.file.watch=true" + volumes: + # Default volume lines. + - "/var/run/docker.sock:/var/run/docker.sock:ro" + # EXTRA volumes lines to mount the (to-be-created) configuration + # directory + - "./config/traefik:/etc/traefik:ro" diff --git a/docker-compose.override.yml-traefik-le b/docker-compose.override.yml-traefik-le new file mode 100644 index 0000000..a51cc58 --- /dev/null +++ b/docker-compose.override.yml-traefik-le @@ -0,0 +1,49 @@ +# Docker Compose Override YAML fragment that allows to use letsencrypt +# with traefik for TLS. +# +# Quick Manual: +# +# - create folder `volumes/traefik/letsencrypt` that will be mounted +# into the container +# - merge this file into `docker-compose.override.yml` and override +# the `YOUR_EMAIL@YOUR_DOMAIN.com` with your email address + +services: + # Configuration to override for traefik + traefik: + command: + # Default command lines, see `docker-compose.yml`. + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--entrypoints.web.http.redirections.entryPoint.to=websecure" + - "--entrypoints.web.http.redirections.entryPoint.scheme=https" + - "--entrypoints.web.http.redirections.entrypoint.permanent=true" + - "--entrypoints.web.address=:80" + - "--entrypoints.websecure.address=:443" + # EXTRA command lines to make traefik use the config file from bind + # mount + # NOTE: update the following line + - "--certificatesresolvers.le.acme.email=YOUR_EMAIL@YOUR_DOMAIN.com" + - "--certificatesresolvers.le.acme.storage=/letsencrypt/acme.json" + - "--certificatesresolvers.le.acme.tlschallenge=true" + volumes: + # Default volume lines. + - "/var/run/docker.sock:/var/run/docker.sock:ro" + # EXTRA volumes lines to mount the (to-be-created) configuration + # directory + - "./volumes/traefik/letsencrypt:/letsencrypt:rw" + + # Configuration to override for `reev`. + reev: + labels: + # Default labels lines for reev. + - "traefik.enable=true" + - "traefik.http.middlewares.xforward.headers.customrequestheaders.X-Forwarded-Proto=https" + - "traefik.http.routers.reev.entrypoints=web,websecure" + - "traefik.http.routers.reev.middlewares=xforward" + - "traefik.http.routers.reev.rule=HostRegexp(`{catchall:.+}`)" + - "traefik.http.services.reev.loadbalancer.server.port=8080" + - "traefik.http.routers.reev.tls=true" + # EXTRA labels lines for reev to enable letsencrypt. + - "traefik.http.routers.reev.tls=true" + - "traefik.http.routers.reev.tls.certresolver=le" diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..43aec4d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,180 @@ +--- + +version: "3.9" + +# == Re-useable Definitions ================================================== + +# Default service definition for all (incl. postgres/redis/...) +x-service-default: &service_default + networks: + - reev + restart: unless-stopped + +# Default service definition for reev services. +# +# We provide the /data directory as read-only to all services as this +# simplifies the configuration considerably. +x-service-reev-default: &service_reev_default + volumes: + - type: bind + source: ${volumes_basedir:-./.dev/volumes}/reev-static/data + target: /data + read_only: true + + +# == Services ================================================================ + +services: + # -- Traefik ---------------------------------------------------------------- + # + # We use traefik as the reverse proxy for all public services. + # + # This file only contains the bare minimal configuraton. We provide example + # override files as `docker-compose.override.yml-traefik-{cert,le}` for + # using custom certificates or letsencrypt. + + traefik: + <<: *service_default + container_name: traefik + hostname: traefik + image: ${image_traefik_name:-traefik}:${image_traefik_version:-2.10} + # Expose the default HTTP and HTTPS ports. + ports: + - "80:80" + - "443:443" + command: + # Enable Docker provider and disable "exposed by default". + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + # Define the "websecure" entrypoint with port 443. + - "--entrypoints.websecure.address=:443" + # Define the "web" entrypoint with port 80 and configure automated + # permantent redirection from web to websecure (HTTP to HTTPS). + - "--entrypoints.web.http.redirections.entryPoint.to=websecure" + - "--entrypoints.web.http.redirections.entryPoint.scheme=https" + - "--entrypoints.web.http.redirections.entrypoint.permanent=true" + - "--entrypoints.web.address=:80" + volumes: + # Mount Docker socket into container so traefik can react to events. + - "/var/run/docker.sock:/var/run/docker.sock:ro" + + # -- nginx ----------------------------------------------------------------- + # + # We serve static files such as browser tracks with nginx. + + nginx: + container_name: nginx + hostname: nginx + image: ${image_nginx_name:-nginx}:${image_nginx_version:-1} + volumes: + - type: bind + source: ${volumes_basedir:-./.dev/volumes}/reev-static/data + target: /data + read_only: true + - type: bind + source: ${config_basedir:-./.dev/config}/nginx/nginx.conf + target: /etc/nginx/nginx.conf + + # -- REEV ------------------------------------------------------------------ + # + # REEV web server + reev: + <<: *service_reev_default + container_name: reev + hostname: reev + image: "${image_base:-ghcr.io/bihealth}/${image_reev_name:-reev}:\ + ${image_reev_version:-main}" + labels: + # Default labels lines for reev. + - "traefik.enable=true" + - "traefik.http.middlewares.xforward.headers.customrequestheaders.X-Forwarded-Proto=https" + - "traefik.http.routers.reev.entrypoints=web,websecure" + - "traefik.http.routers.reev.middlewares=xforward" + - "traefik.http.routers.reev.rule=HostRegexp(`{catchall:.+}`)" + - "traefik.http.services.reev.loadbalancer.server.port=8080" + - "traefik.http.routers.reev.tls=true" + + # -- Mehari ---------------------------------------------------------------- + # + # Mehari provides the transcript-related information. + + mehari: + <<: *service_reev_default + container_name: mehari + hostname: mehari + image: "${image_base:-ghcr.io/bihealth}/${image_mehari_name:-mehari}:\ + ${image_mehari_version:-latest}" + + # -- Viguno ---------------------------------------------------------------- + # + # Viguno provides the disease/phenotype/gene relationships and related + # information.. + + viguno: + <<: *service_reev_default + container_name: viguno + hostname: viguno + image: "${image_base:-ghcr.io/bihealth}/${image_viguno_name:-viguno}:\ + ${image_viguno_version:-latest}" + + # -- Annonars --------------------------------------------------------------- + # + # Annonars provides the variant information but also the gene information. + + annonars: + <<: *service_reev_default + container_name: annonars + hostname: annonars + image: "${image_base:-ghcr.io/bihealth}/${image_annonars_name:-annonars}:\ + ${image_annonars_version:-latest}" + + # -- PostgreSQL Server ----------------------------------------------------- + # + # We use the default configuration, but mount a volume for the data for + # persistent storage. + + postgres: + <<: *service_default + container_name: postgres + hostname: postgres + image: ${image_postgres_name:-postgres}:${image_postgres_version:-12} + environment: + POSTGRES_USER: reev + POSTGRES_PASSWORD_FILE: /run/secrets/db-password + POSTGRES_DB: reev + secrets: + - db-password + volumes: + - type: bind + source: ${volumes_basedir:-./.dev/volumes}/postgres/data + target: /var/lib/postgresql/data + + # -- Redis ----------------------------------------------------------------- + # + # We use the default configuration, but mount a volume for the data for + # persistent storage. + + redis: + <<: *service_default + container_name: redis + hostname: redis + image: ${image_redis_name:-redis}:${image_redis_version:-6} + volumes: + - type: bind + source: ${volumes_basedir:-./.dev/volumes}/redis/data + target: /data + +# == Secrets ================================================================ + +secrets: + # The PostgreSQL database password. + db-password: + file: ${secrets_basedir:-./.dev/secrets}/db-password + +# == Networks ================================================================ + +networks: + # Explicitely configure the "reev" network so we can control its name. + reev: + driver_opts: + com.docker.network.bridge.name: br-reev diff --git a/env.tpl b/env.tpl new file mode 100644 index 0000000..3f0ecaa --- /dev/null +++ b/env.tpl @@ -0,0 +1,65 @@ +# Template for an .env file. + +# -- Docker Images ----------------------------------------------------------- + +# Name of the registry server and org to use for our images. +# image_base=ghcr.io/bihealth + +# Name of the mehari image to use. +# image_mehari_name=mehari + +# Version of the mehari image to use. +# image_mehari_version=latest + +# Name of the viguno image to use. +# image_viguno_name=viguno + +# Version of the viguno image to use. +# image_viguno_version=latest + +# Name of the annonars image to use. +# image_annonars_name=annonars + +# Version of the annonars image to use. +# image_annonars_version=latest + +# Name of the traefik image to use. +# image_traefik_name=traefik + +# Version of the traefik image to use. +# image_traefik_version=2.10 + +# Name of the postgres image to use. +# image_postgres_name=postgres + +# Version of the postgres image to use. +# image_postgres_version=12 + +# Name of the redis image to use. +# image_redis_name=redis + +# Version of the redis image to use. +# image_redis_version=6 + +# Name of the reev image to use. +# image_reev_name=reev + +# Version of the reev image to use. +# image_reev_version=main + +# Name of the nginx image to use. +# image_nginx_name=nginx + +# Version of the nginx image to use. +# image_nginx_version=1 + +# -- General Container Configuration ----------------------------------------- + +# Base directory for configuration. +# config_basedir: ./.dev/config + +# Base directory for volumes. +# volumes_basedir: ./.dev/volumes + +# Base directory for secrets. +# secrets_basedir: ./.dev/secrets diff --git a/utils/nginx/nginx.conf b/utils/nginx/nginx.conf new file mode 100644 index 0000000..5c13ff0 --- /dev/null +++ b/utils/nginx/nginx.conf @@ -0,0 +1,35 @@ +user nginx; +worker_processes auto; + +error_log /var/log/nginx/error.log notice; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + keepalive_timeout 65; + + gzip on; + + # The full static data directory is mounted into the container but we only + # serve the "nginx" sub directory (with indices in the default + # configuration). + server { + location / { + root /data/nginx; + autoindex on; + } + } +} diff --git a/utils/terraform/.gitignore b/utils/terraform/.gitignore new file mode 100644 index 0000000..5dfe310 --- /dev/null +++ b/utils/terraform/.gitignore @@ -0,0 +1,2 @@ +.terraform* +terraform.tfstate* diff --git a/utils/terraform/main.tf b/utils/terraform/main.tf new file mode 100644 index 0000000..b56653f --- /dev/null +++ b/utils/terraform/main.tf @@ -0,0 +1,22 @@ +# Mangement of the GitHub project. + +resource "github_repository" "reev-docker-compose" { + name = "reev-docker-compose" + description = "REEV Docker Compose" + + has_issues = true + visibility = "public" + + allow_rebase_merge = false + allow_merge_commit = false + delete_branch_on_merge = true + + has_downloads = false + has_projects = false + has_wiki = false + + vulnerability_alerts = true + + squash_merge_commit_message = "BLANK" + squash_merge_commit_title = "PR_TITLE" +} diff --git a/utils/terraform/provider.tf b/utils/terraform/provider.tf new file mode 100644 index 0000000..0349641 --- /dev/null +++ b/utils/terraform/provider.tf @@ -0,0 +1 @@ +provider "github" {} diff --git a/utils/traefik-cert/config/certificates.toml b/utils/traefik-cert/config/certificates.toml new file mode 100644 index 0000000..b856eb0 --- /dev/null +++ b/utils/traefik-cert/config/certificates.toml @@ -0,0 +1,6 @@ +[tls] + [tls.stores] + [tls.stores.default] + [tls.stores.default.defaultCertificate] + certFile = "/etc/traefik/tls/server.crt" + keyFile = "/etc/traefik/tls/server.key" diff --git a/utils/traefik-cert/config/tls/.gitignore b/utils/traefik-cert/config/tls/.gitignore new file mode 100644 index 0000000..be870b4 --- /dev/null +++ b/utils/traefik-cert/config/tls/.gitignore @@ -0,0 +1,2 @@ +*.crt +*.key diff --git a/utils/traefik-cert/config/tls/PLACE_TLS_FILES_HERE b/utils/traefik-cert/config/tls/PLACE_TLS_FILES_HERE new file mode 100644 index 0000000..a301db9 --- /dev/null +++ b/utils/traefik-cert/config/tls/PLACE_TLS_FILES_HERE @@ -0,0 +1,8 @@ +# /etc/traefik/ssl + +Place your SSL files in this directory. The `docker-compose.yaml` will make +them available in `/etc/traefik/tls` and `../certificates.toml` wil use them +for certificates. + +- `server.crt` -- server TLS/SSL certificate (chain) +- `server.key` -- unencryptedTLS/SSL private key