From df181628533bce29bd2fec9a74d50c9d8e1f6cd4 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Wed, 21 Aug 2024 16:20:58 +0100
Subject: [PATCH] Robo in default section (#871)

* Eligibility criteria from seq run meta.

* Added ROBO criteria validation to the can_run method

* Consistent logging of error messages in can_run

* Added robo_qc consideration within the default section
---
 Changes                                       |  2 +
 MANIFEST                                      |  3 +
 lib/npg_qc/autoqc/checks/review.pm            | 64 ++++++++++---------
 t/60-autoqc-checks-review.t                   | 54 +++++++++++++++-
 .../product_release.yml                       | 43 +++++++++++++
 .../default_section/product_release.yml       | 25 ++++++++
 .../product_release.yml                       | 45 +++++++++++++
 7 files changed, 204 insertions(+), 32 deletions(-)
 create mode 100644 t/data/autoqc/review/default_and_study_section/product_release.yml
 create mode 100644 t/data/autoqc/review/default_section/product_release.yml
 create mode 100644 t/data/autoqc/review/wrong_default_and_study_section/product_release.yml

diff --git a/Changes b/Changes
index 9dfffea0..e184e21f 100644
--- a/Changes
+++ b/Changes
@@ -12,6 +12,8 @@ LIST OF CHANGES FOR NPG-QC PACKAGE
    2. Added a new applicability criteria type - 'sequencing_run'
    3. To avoid running the review check unnecessarily in the pipeline context,
       moved ROBO criteria validation to the can_run method.
+   4. Introduced an option of defining the ROBO section within the default
+      section of the product configuration file.
 
 release 72.1.2 (2024-08-05)
  - handle cases where the same tag sequence is seen in both index reads
diff --git a/MANIFEST b/MANIFEST
index 32cf09d1..f41f24c7 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -522,6 +522,9 @@ t/data/autoqc/review/not_hash/product_release.yml
 t/data/autoqc/review/mqc_type/product_release.yml
 t/data/autoqc/review/unknown_qc_type/product_release.yml
 t/data/autoqc/review/uqc_type/product_release.yml
+t/data/autoqc/review/default_and_study_section/product_release.yml
+t/data/autoqc/review/default_section/product_release.yml
+t/data/autoqc/review/wrong_default_and_study_section/product_release.yml
 t/data/autoqc/review/samplesheet_27483.csv
 t/data/autoqc/review/samplesheet_29524.csv
 t/data/autoqc/review/29524#2.composition.json
diff --git a/lib/npg_qc/autoqc/checks/review.pm b/lib/npg_qc/autoqc/checks/review.pm
index aadab26f..08a24267 100644
--- a/lib/npg_qc/autoqc/checks/review.pm
+++ b/lib/npg_qc/autoqc/checks/review.pm
@@ -58,24 +58,26 @@ npg_qc::autoqc::checks::review
 This checks evaluates the results of other autoqc checks
 against a predefined set of criteria.
 
-If data product acceptance criteria for a project are defined, it
-is possible to introduce a degree of automation into the manual
-QC process. To provide interoperability with the API supporting
-the manual QC process, the outcome of the evaluation, which is
-performed by this check, is recorded not only as a simple undefined,
-pass or fail as in other autoqc checks, but also as one of valid
-manual or user QC outcomes.
+If data product acceptance criteria are defined, it is possible to
+introduce a degree of automation into the manual QC process. To
+provide interoperability with the API supporting the manual QC process,
+the outcome of the evaluation, which is performed by this check, is
+recorded not only as a simple undefined, pass or fail as in other autoqc
+checks, but also as one of valid manual or user QC outcomes.
 
 =head2 Types of criteria
 
-The robo section of the product configuration file sits within
-the configuration for a particular study. Evaluation criteria for
-samples in the same study might vary depending on the sequencing
+The robo section of the product configuration file sits either
+within the configuration for a particular study or in the default
+section, or in both locations. A study-specific RoboQC definition
+takes precedence over the default one.
+
+Evaluation criteria for samples vary depending on the sequencing
 instrument type, library type, sample type, etc. There might be a
 need to exclude some samples from RoboQC. The criteria key of the
-robo configuration points to an array of criteria objects each of
-which contains two further keys, one for acceptance and one for
-applicability criteria. The acceptance criteria are evaluated
+robo configuration points to an array of criteria objects. Each of
+the criteria contains two further keys, one for acceptance and one
+for applicability criteria. The acceptance criteria are evaluated
 if either the applicability criteria have been satisfied or no
 applicability criteria are defined.
 
@@ -84,8 +86,11 @@ set in such a way that the order of evaluation of the criteria
 array does not matter. If applicability criteria in all of the
 criteria objects are not satisfied, no QC outcome is assigned
 and the pass attribute of the review result object remains unset.
-Within a study the product can satisfy applicability criteria
-in at most one criteria object.
+
+The product can satisfy applicability criteria in at most one
+criteria object. If none of the study-specific applicability
+criteria are satisfied, the review check does not proceed even if
+the product might satisfy one of the default applicability criteria.
 
 =head2 QC outcomes
 
@@ -455,22 +460,23 @@ sub _build__robo_config {
 
   my $strict = 1; # Parse study section only, ignore the default section.
   my $config = $self->study_config($self->lims(), $strict);
-  if ($config and keys %{$config}) {
+  if ($config) {
     $config = $config->{$ROBO_KEY};
-    $config or carp
-      "$ROBO_KEY section is not present for " . $self->_entity_desc;
-    if ($config) {
-      (ref $config eq 'HASH') or croak
-        'Robo config should be a hash in a config for ' . $self->_entity_desc;
-      if (keys %{$config}) {
-        $self->_validate_criteria($config);
-      } else {
-        carp 'Robo section of the product config is empty for ' .
-          $self->_entity_desc;
-      }
+  }
+
+  if (!$config) {
+    carp 'Study-specific RoboQC config not found for ' . $self->_entity_desc;
+    $config = $self->default_study_config()->{$ROBO_KEY};
+  }
+
+  if ($config) {
+    (ref $config eq 'HASH') or croak
+      'Robo config should be a hash in a config for ' . $self->_entity_desc;
+    if (keys %{$config}) {
+      $self->_validate_criteria($config);
+    } else {
+      carp 'RoboQC section of the product config file is empty';
     }
-  } else {
-    carp 'Study config not found for ' . $self->_entity_desc;
   }
 
   $config ||= {};
diff --git a/t/60-autoqc-checks-review.t b/t/60-autoqc-checks-review.t
index 77279219..36b769a3 100644
--- a/t/60-autoqc-checks-review.t
+++ b/t/60-autoqc-checks-review.t
@@ -1,6 +1,6 @@
 use strict;
 use warnings;
-use Test::More tests => 11;
+use Test::More tests => 12;
 use Test::Exception;
 use Test::Warn;
 use File::Temp qw/tempdir/;
@@ -58,7 +58,7 @@ subtest 'constructing object, deciding whether to run' => sub {
 
   my $can_run;
   warnings_like { $can_run = $check->can_run }
-    [qr/Study config not found/, qr/RoboQC configuration is absent/],
+    [qr/Study-specific RoboQC config not found/, qr/RoboQC configuration is absent/],
     'can_run is accompanied by warnings';
   ok (!$can_run, 'can_run returns false - no study config');
   like ($check->result->comments, qr/RoboQC configuration is absent/,
@@ -74,7 +74,7 @@ subtest 'constructing object, deciding whether to run' => sub {
     qc_in     => $test_data_dir,
     rpt_list  => '27483:1:2');
   warnings_like { $can_run = $check->can_run }
-    [qr/robo_qc section is not present for/, qr/Review check cannot be run/],
+    [qr/Study-specific RoboQC config not found/, qr/Review check cannot be run/],
     'can_run is accompanied by warnings';
   ok (!$can_run, 'can_run returns false - no robo config');
   is ($check->result->comments, 'RoboQC configuration is absent',
@@ -500,6 +500,54 @@ subtest 'evaluation within the execute method' => sub {
   }  
 };
 
+subtest 'study-specific vs default robo definition' => sub {
+  plan tests => 11;
+
+  local $ENV{NPG_CACHED_SAMPLESHEET_FILE} =
+    't/data/autoqc/review/samplesheet_29524.csv';
+  my $rpt_list = '29524:1:2;29524:2:2;29524:3:2;29524:4:2';
+
+  # robo config in the default section only
+  my $check = npg_qc::autoqc::checks::review->new(
+    runfolder_path => $rf_path,
+    conf_path => "$test_data_dir/default_section",
+    qc_in     => $test_data_dir,
+    rpt_list  => $rpt_list
+  );
+  ok ($check->can_run, 'the check can run');
+  lives_ok { $check->execute } 'execute method runs OK';
+  is ($check->result->pass, 1, 'result pass attribute is set to 1');
+  my %expected = map { $_ => 1 } @{$criteria_list};
+  is_deeply ($check->result->evaluation_results(), \%expected,
+    'evaluation results are saved');
+  is ($check->result->qc_outcome->{'mqc_outcome'} , 'Accepted preliminary',
+    'correct outcome string');
+
+  # robo config in the default section and in the study section (empty)
+  $check = npg_qc::autoqc::checks::review->new(
+    runfolder_path => $rf_path,
+    conf_path => "$test_data_dir/default_and_study_section",
+    qc_in     => $test_data_dir,
+    rpt_list  => $rpt_list
+  );
+  ok ($check->can_run, 'the check cannot run');
+
+  # invalid robo config in the default section, valid in the study section
+  $check = npg_qc::autoqc::checks::review->new(
+    runfolder_path => $rf_path,
+    conf_path => "$test_data_dir/wrong_default_and_study_section",
+    qc_in     => $test_data_dir,
+    rpt_list  => $rpt_list
+  );
+  ok ($check->can_run, 'the check can run');
+  lives_ok { $check->execute } 'execute method runs OK';
+  is ($check->result->pass, 1, 'result pass attribute is set to 1');
+  is_deeply ($check->result->evaluation_results(), \%expected,
+    'evaluation results are saved');
+  is ($check->result->qc_outcome->{'mqc_outcome'} , 'Accepted preliminary',
+    'correct outcome string');
+};
+
 subtest 'error in evaluation' => sub {
   plan tests => 5;
 
diff --git a/t/data/autoqc/review/default_and_study_section/product_release.yml b/t/data/autoqc/review/default_and_study_section/product_release.yml
new file mode 100644
index 00000000..2b465394
--- /dev/null
+++ b/t/data/autoqc/review/default_and_study_section/product_release.yml
@@ -0,0 +1,43 @@
+---
+default:
+  s3:
+    enable: false
+    url: null
+    notify: false
+  irods:
+    enable: true
+    notify: false
+  robo_qc:
+    criteria:
+      - applicability_criteria:
+          lims:
+            library_type:
+              - "HiSeqX PCR free"
+          sequencing_run:
+            platform_NovaSeq:
+              - 1
+            workflow_type:
+              - "NovaSeqStandard"
+        acceptance_criteria :
+          - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95"
+          - "bam_flagstats.target_mapped_bases > 85_000_000_000"
+          - "bam_flagstats.target_percent_gt_coverage_threshold > 95"
+          - "verify_bam_id.freemix < 0.01"
+          - "( bcfstats.genotypes_nrd_dividend / bcfstats.genotypes_nrd_divisor ) < 0.02"
+
+study:
+  - study_id: "5392"
+    s3:
+      enable: true
+      url: "gs://profile_one-europe-west2"
+      date_binning: true
+      customer_name: "UK Organisation"
+      profile: "profile_one"
+      notify: true
+      receipts: "/data_product_receipts/5392/"
+    irods:
+      enable: false
+      notify: true
+    merge:
+      component_cache_dir: "/merge_component_cache/5392/"
+    robo_qc:
diff --git a/t/data/autoqc/review/default_section/product_release.yml b/t/data/autoqc/review/default_section/product_release.yml
new file mode 100644
index 00000000..ca3d9efa
--- /dev/null
+++ b/t/data/autoqc/review/default_section/product_release.yml
@@ -0,0 +1,25 @@
+---
+default:
+  irods:
+    enable: true
+  robo_qc:
+    criteria:
+      - applicability_criteria:
+          lims:
+            library_type:
+              - "HiSeqX PCR free"
+          sequencing_run:
+            platform_NovaSeq:
+              - 1
+            workflow_type:
+              - "NovaSeqStandard"
+        acceptance_criteria :
+          - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95"
+          - "bam_flagstats.target_mapped_bases > 85_000_000_000"
+          - "bam_flagstats.target_percent_gt_coverage_threshold > 95"
+          - "verify_bam_id.freemix < 0.01"
+          - "( bcfstats.genotypes_nrd_dividend / bcfstats.genotypes_nrd_divisor ) < 0.02"
+study:
+  - study_id: "5392"
+    irods:
+      enable: false
diff --git a/t/data/autoqc/review/wrong_default_and_study_section/product_release.yml b/t/data/autoqc/review/wrong_default_and_study_section/product_release.yml
new file mode 100644
index 00000000..ecc24494
--- /dev/null
+++ b/t/data/autoqc/review/wrong_default_and_study_section/product_release.yml
@@ -0,0 +1,45 @@
+---
+default:
+  s3:
+    enable: false
+    url: null
+    notify: false
+  irods:
+    enable: true
+    notify: false
+  robo_qc:
+    - acceptance_criteria:
+        - "verify_bam_id.freemix < 0.001"
+
+study:
+  - study_id: "5392"
+    s3:
+      enable: true
+      url: "gs://profile_one-europe-west2"
+      date_binning: true
+      customer_name: "UK Organisation"
+      profile: "profile_one"
+      notify: true
+      receipts: "/data_product_receipts/5392/"
+    irods:
+      enable: false
+      notify: true
+    merge:
+      component_cache_dir: "/merge_component_cache/5392/"
+    robo_qc:
+      criteria:
+        - applicability_criteria:
+            lims:
+              library_type:
+                - "HiSeqX PCR free"
+            sequencing_run:
+              platform_NovaSeq:
+                - 1
+              workflow_type:
+                - "NovaSeqStandard"
+          acceptance_criteria:
+            - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95"
+            - "bam_flagstats.target_mapped_bases > 85_000_000_000"
+            - "bam_flagstats.target_percent_gt_coverage_threshold > 95"
+            - "verify_bam_id.freemix < 0.01"
+            - "( bcfstats.genotypes_nrd_dividend / bcfstats.genotypes_nrd_divisor ) < 0.02"