diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index 80090b4..e940b3a 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -12,6 +12,7 @@ env: REGISTRY: ghcr.io TAG: ${{ github.event.inputs.tag }} SPS_AIRFLOW: ${{ github.repository }}/sps-airflow + SPS_DOCKER_CWL: ${{ github.repository }}/sps-docker-cwl jobs: build-sps-airflow: @@ -37,3 +38,26 @@ jobs: push: true tags: ${{ env.REGISTRY }}/${{ env.SPS_AIRFLOW }}:${{ env.TAG }} labels: ${{ steps.metascheduler.outputs.labels }} + build-sps-docker-cwl: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Log in to the Container registry + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for SPS Docker CWL image + id: metascheduler + uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + with: + images: ${{ env.REGISTRY }}/${{ env.SPS_DOCKER_CWL }} + - name: Build and push SPS Docker CWL image + uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc + with: + context: ./airflow/docker/cwl + file: airflow/docker/cwl/Dockerfile + push: true + tags: ${{ env.REGISTRY }}/${{ env.SPS_DOCKER_CWL }}:${{ env.TAG }} + labels: ${{ steps.metascheduler.outputs.labels }} diff --git a/airflow/docker/cwl/Dockerfile b/airflow/docker/cwl/Dockerfile new file mode 100644 index 0000000..781a95b --- /dev/null +++ b/airflow/docker/cwl/Dockerfile @@ -0,0 +1,25 @@ +# docker:dind Dockerfile: https://github.com/docker-library/docker/blob/master/Dockerfile-dind.template +# FROM docker:dind +FROM docker:25.0.3-dind + +# install Python +RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python +RUN apk add gcc musl-dev linux-headers python3-dev +RUN apk add --no-cache python3 py3-pip +RUN apk add vim + +# install CWL libraries +RUN mkdir /usr/share/cwl \ + && cd /usr/share/cwl \ + && python -m venv venv \ + && source venv/bin/activate \ + && pip install cwltool cwl-runner docker boto3 awscli + +# install nodejs to parse Javascript in CWL files +RUN apk add --no-cache nodejs npm + +# script to execute a generic CWL workflow with arguments +COPY docker_cwl_entrypoint.sh /usr/share/cwl/docker_cwl_entrypoint.sh + +WORKDIR /usr/share/cwl +ENTRYPOINT ["/usr/share/cwl/docker_cwl_entrypoint.sh"] diff --git a/airflow/docker/cwl/docker_cwl_entrypoint.sh b/airflow/docker/cwl/docker_cwl_entrypoint.sh new file mode 100755 index 0000000..da2aacc --- /dev/null +++ b/airflow/docker/cwl/docker_cwl_entrypoint.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# Script to execute a CWL workflow that includes Docker containers +# The Docker engine is started before the CWL execution, and stopped afterwards. +# $1: the CWL workflow URL (example: https://raw.githubusercontent.com/unity-sds/unity-sps-prototype/cwl-docker/cwl/cwl_workflows/echo_from_docker.cwl) +# $2: the CWL job parameters as a JSON formatted string (example: { name: John Doe }) +# $3: optional output directory, defaults to the current directory +# Note: $output_dir must be accessible by the Docker container that executes this script + +set -ex +cwl_workflow=$1 +job_args=$2 +output_dir=${3:-.} +echo "Executing CWL workflow: $cwl_workflow with json arguments: $job_args and output directory: $output_dir" +echo "$job_args" > /tmp/job_args.json +cat /tmp/job_args.json + +# create output directory if it doesn't exist +mkdir -p "$output_dir" + +# Start Docker engine +dockerd > dockerd-logfile 2>&1 + +# Wait until Docker engine is running +# Loop until 'docker version' exits with 0. +until docker version > /dev/null 2>&1 +do + sleep 1 +done + +# Execute CWL workflow +. /usr/share/cwl/venv/bin/activate +cwl-runner --outdir "$output_dir" --no-match-user --no-read-only "$cwl_workflow" /tmp/job_args.json +deactivate + +# Stop Docker engine +pkill -f dockerd