From df863f994225019e5289a63e3c1d27a3d5cb87e3 Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Thu, 6 Apr 2023 19:18:18 +0000 Subject: [PATCH] Add configuration for parsing Wehe data (#421) * Add wehe parsing for scamper1 & annotation types * Restore public archive source in staging --- apply-cluster.sh | 5 ++--- config/config.yml | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/apply-cluster.sh b/apply-cluster.sh index 574de53c..48618925 100755 --- a/apply-cluster.sh +++ b/apply-cluster.sh @@ -18,10 +18,9 @@ CLUSTER=${CLOUDSDK_CONTAINER_CLUSTER:?Please provide cluster name: $USAGE} DATE_SKIP=${DATE_SKIP:-"0"} # Number of dates to skip between each processed date (for sandbox). TASK_FILE_SKIP=${TASK_FILE_SKIP:-"0"} # Number of files to skip between each processed file (for sandbox). -# Use sandbox in sandbox, staging in staging, measurement-lab in oti. +# Use sandbox in sandbox, measurement-lab in staging & oti. SOURCE_PROJECT=${PROJECT_ID/mlab-oti/measurement-lab} -# TODO(soltesz): restore or remove. -#SOURCE_PROJECT=${SOURCE_PROJECT/mlab-staging/measurement-lab} +SOURCE_PROJECT=${SOURCE_PROJECT/mlab-staging/measurement-lab} sed -i \ -e 's/{{ANNOTATION_SOURCE_PROJECT}}/'${SOURCE_PROJECT}'/g' \ config/config.yml diff --git a/config/config.yml b/config/config.yml index b68d90c8..4184cfe1 100644 --- a/config/config.yml +++ b/config/config.yml @@ -6,6 +6,7 @@ monitor: polling_interval: 1m sources: # NOTE: It now matters what order these are in. +## NDT - bucket: archive-{{ANNOTATION_SOURCE_PROJECT}} experiment: ndt datatype: annotation2 @@ -66,3 +67,26 @@ sources: raw: raw_ndt join: ndt daily_only: true +## WEHE +- bucket: archive-{{ANNOTATION_SOURCE_PROJECT}} + experiment: wehe + datatype: annotation2 + target_datasets: + tmp: tmp_wehe + raw: raw_wehe + daily_only: true +- bucket: archive-{{ANNOTATION_SOURCE_PROJECT}} + experiment: wehe + datatype: hopannotation2 + target_datasets: + tmp: tmp_wehe + raw: raw_wehe + daily_only: true +- bucket: archive-{{ANNOTATION_SOURCE_PROJECT}} + experiment: wehe + datatype: scamper1 + target_datasets: + tmp: tmp_wehe + raw: raw_wehe + join: wehe + daily_only: true