From 4c01651f3084875b67d2b2690e3cc555b3b85665 Mon Sep 17 00:00:00 2001 From: Luca Cinquini Date: Tue, 27 Feb 2024 07:05:44 -0700 Subject: [PATCH 1/3] feat(): Add build of Docker image to execute CWL workflows --- .github/workflows/build_docker_images.yml | 24 +++++++++++ airflow/docker/cwl/Dockerfile | 25 ++++++++++++ airflow/docker/cwl/docker_cwl_entrypoint.sh | 44 +++++++++++++++++++++ 3 files changed, 93 insertions(+) create mode 100644 airflow/docker/cwl/Dockerfile create mode 100755 airflow/docker/cwl/docker_cwl_entrypoint.sh diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index 80090b42..e940b3a6 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -12,6 +12,7 @@ env: REGISTRY: ghcr.io TAG: ${{ github.event.inputs.tag }} SPS_AIRFLOW: ${{ github.repository }}/sps-airflow + SPS_DOCKER_CWL: ${{ github.repository }}/sps-docker-cwl jobs: build-sps-airflow: @@ -37,3 +38,26 @@ jobs: push: true tags: ${{ env.REGISTRY }}/${{ env.SPS_AIRFLOW }}:${{ env.TAG }} labels: ${{ steps.metascheduler.outputs.labels }} + build-sps-docker-cwl: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Log in to the Container registry + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for SPS Docker CWL image + id: metascheduler + uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + with: + images: ${{ env.REGISTRY }}/${{ env.SPS_DOCKER_CWL }} + - name: Build and push SPS Docker CWL image + uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc + with: + context: ./airflow/docker/cwl + file: airflow/docker/cwl/Dockerfile + push: true + tags: ${{ env.REGISTRY }}/${{ env.SPS_DOCKER_CWL }}:${{ env.TAG }} + labels: ${{ steps.metascheduler.outputs.labels }} diff --git a/airflow/docker/cwl/Dockerfile b/airflow/docker/cwl/Dockerfile new file mode 100644 index 00000000..781a95b1 --- /dev/null +++ b/airflow/docker/cwl/Dockerfile @@ -0,0 +1,25 @@ +# docker:dind Dockerfile: https://github.com/docker-library/docker/blob/master/Dockerfile-dind.template +# FROM docker:dind +FROM docker:25.0.3-dind + +# install Python +RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python +RUN apk add gcc musl-dev linux-headers python3-dev +RUN apk add --no-cache python3 py3-pip +RUN apk add vim + +# install CWL libraries +RUN mkdir /usr/share/cwl \ + && cd /usr/share/cwl \ + && python -m venv venv \ + && source venv/bin/activate \ + && pip install cwltool cwl-runner docker boto3 awscli + +# install nodejs to parse Javascript in CWL files +RUN apk add --no-cache nodejs npm + +# script to execute a generic CWL workflow with arguments +COPY docker_cwl_entrypoint.sh /usr/share/cwl/docker_cwl_entrypoint.sh + +WORKDIR /usr/share/cwl +ENTRYPOINT ["/usr/share/cwl/docker_cwl_entrypoint.sh"] diff --git a/airflow/docker/cwl/docker_cwl_entrypoint.sh b/airflow/docker/cwl/docker_cwl_entrypoint.sh new file mode 100755 index 00000000..6ea15517 --- /dev/null +++ b/airflow/docker/cwl/docker_cwl_entrypoint.sh @@ -0,0 +1,44 @@ +#!/bin/sh +# Script to execute a CWL workflow that includes Docker containers +# The Docker engine is started before the CWL execution, and stopped afterwards. +# $1: the CWL workflow URL (example: https://raw.githubusercontent.com/unity-sds/unity-sps-prototype/cwl-docker/cwl/cwl_workflows/echo_from_docker.cwl) +# $2: the CWL job parameters as a JSON formatted string (example: { name: John Doe }) +# $3: optional output directory, defaults to the current directory +# Note: $output_dir must be accessible by the Docker container that executes this script + +set -ex +cwl_workflow=$1 +job_args=$2 +output_dir=${3:-.} +echo "Executing CWL workflow: $cwl_workflow with json arguments: $job_args and output directory: $output_dir" +echo "$job_args" > /tmp/job_args.json +cat /tmp/job_args.json + +# create output directory if it doesn't exist +mkdir -p "$output_dir" + +# Start Docker engine +dockerd > dockerd-logfile 2>&1 + +# Wait until Docker engine is running +# Loop until 'docker version' exits with 0. +until docker version > /dev/null 2>&1 +do + sleep 1 +done +echo "done sleeping" + + +# Execute CWL workflow +. /usr/share/cwl/venv/bin/activate +aws s3 ls +aws ssm get-parameter --name /sps/processing/workflows/edl_username --region us-west-2 + +# wait for 60 minutes +sleep 3600 + +cwl-runner --outdir "$output_dir" --no-match-user --no-read-only "$cwl_workflow" /tmp/job_args.json +deactivate + +# Stop Docker engine +pkill -f dockerd From b1ee5e22e7fdaac196447b7012f0683a15475c92 Mon Sep 17 00:00:00 2001 From: Luca Cinquini Date: Tue, 27 Feb 2024 07:12:58 -0700 Subject: [PATCH 2/3] fix: cleaning up the entrypoint script --- airflow/docker/cwl/docker_cwl_entrypoint.sh | 8 -------- 1 file changed, 8 deletions(-) diff --git a/airflow/docker/cwl/docker_cwl_entrypoint.sh b/airflow/docker/cwl/docker_cwl_entrypoint.sh index 6ea15517..da2aacc4 100755 --- a/airflow/docker/cwl/docker_cwl_entrypoint.sh +++ b/airflow/docker/cwl/docker_cwl_entrypoint.sh @@ -26,17 +26,9 @@ until docker version > /dev/null 2>&1 do sleep 1 done -echo "done sleeping" - # Execute CWL workflow . /usr/share/cwl/venv/bin/activate -aws s3 ls -aws ssm get-parameter --name /sps/processing/workflows/edl_username --region us-west-2 - -# wait for 60 minutes -sleep 3600 - cwl-runner --outdir "$output_dir" --no-match-user --no-read-only "$cwl_workflow" /tmp/job_args.json deactivate From a89e1edfddb8596b4f9991b70880df2fc1b39500 Mon Sep 17 00:00:00 2001 From: Luca Cinquini Date: Tue, 27 Feb 2024 07:35:45 -0700 Subject: [PATCH 3/3] fix: the Docker engine wouldn't start with the previous command --- airflow/docker/cwl/docker_cwl_entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/docker/cwl/docker_cwl_entrypoint.sh b/airflow/docker/cwl/docker_cwl_entrypoint.sh index da2aacc4..41916d0d 100755 --- a/airflow/docker/cwl/docker_cwl_entrypoint.sh +++ b/airflow/docker/cwl/docker_cwl_entrypoint.sh @@ -18,7 +18,7 @@ cat /tmp/job_args.json mkdir -p "$output_dir" # Start Docker engine -dockerd > dockerd-logfile 2>&1 +dockerd &> dockerd-logfile & # Wait until Docker engine is running # Loop until 'docker version' exits with 0.