From b530301d789f2139eab7cd04d23946b6daff66a4 Mon Sep 17 00:00:00 2001 From: Manasa Venkatakrishnan Date: Fri, 31 Jan 2025 12:20:09 -0800 Subject: [PATCH] Updating the output validation wdl file --- .../infra/validate_dataset-v0.0.2.wdl | 64 +++++++++++++++++++ ingestion_tools/scripts/enqueue_runs.py | 2 +- 2 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 ingestion_tools/infra/validate_dataset-v0.0.2.wdl diff --git a/ingestion_tools/infra/validate_dataset-v0.0.2.wdl b/ingestion_tools/infra/validate_dataset-v0.0.2.wdl new file mode 100644 index 000000000..973e1885e --- /dev/null +++ b/ingestion_tools/infra/validate_dataset-v0.0.2.wdl @@ -0,0 +1,64 @@ +version 1.0 + +task cryoet_data_validation { + input { + String docker_image_id + String aws_region + String input_bucket + String output_bucket + String output_dir + String extra_args + String dataset + } + + command <<< + set -euxo pipefail + export PYTHONUNBUFFERED=1 + python --version 1>&2 + ls -l 1>&2 + pwd 1>&2 + # TODO - create separate image for running validations that includes allure. + echo ==== 1>&2 + echo Installing Allure 1>&2 + echo ==== 1>&2 + apt update && apt install -y default-jre-headless + wget -q https://github.com/allure-framework/allure2/releases/download/2.32.0/allure_2.32.0-1_all.deb + dpkg -i allure_2.32.0-1_all.deb + echo ==== 1>&2 + echo Running tests 1>&2 + echo ==== 1>&2 + cd /usr/src/app/ingestion_tools/scripts/data_validation/standardized + python allure_tests.py --output-dir ~{output_dir} --datasets ~{dataset} --history --input-bucket ~{input_bucket} --output-bucket ~{output_bucket} --extra-args '~{extra_args}' 1>&2 + >>> + + runtime { + docker: docker_image_id + } +} + +workflow cryoet_data_validation_wf { + input { + String docker_image_id = "cryoet_data_ingestion:latest" + String aws_region = "us-west-2" + String input_bucket + String output_bucket + String output_dir + String extra_args + String dataset + } + + call cryoet_data_validation { + input: + docker_image_id = docker_image_id, + aws_region = aws_region, + input_bucket = input_bucket, + output_bucket = output_bucket, + output_dir = output_dir, + extra_args = extra_args, + dataset = dataset + } + + output { + File log = "output.txt" + } +} diff --git a/ingestion_tools/scripts/enqueue_runs.py b/ingestion_tools/scripts/enqueue_runs.py index e7b1ec994..4877c841d 100644 --- a/ingestion_tools/scripts/enqueue_runs.py +++ b/ingestion_tools/scripts/enqueue_runs.py @@ -645,7 +645,7 @@ def sync( "--swipe-wdl-key", type=str, required=True, - default="validate_dataset-v0.0.1.wdl", + default="validate_dataset-v0.0.2.wdl", help="Specify wdl key for custom workload", ) @enqueue_common_options