Robo in default section (#871)

* Eligibility criteria from seq run meta. * Added ROBO criteria validation to the can_run method * Consistent logging of error messages in can_run * Added robo_qc consideration within the default section
wtsi-npg · Aug 21, 2024 · 1a80739 · 1a80739
1 parent 7a9af15
commit 1a80739
Show file tree

Hide file tree

Showing 7 changed files with 204 additions and 32 deletions.
diff --git a/Changes b/Changes
@@ -7,6 +7,8 @@ LIST OF CHANGES FOR NPG-QC PACKAGE
    2. Added a new applicability criteria type - 'sequencing_run'
    3. To avoid running the review check unnecessarily in the pipeline context,
       moved ROBO criteria validation to the can_run method.
+   4. Introduced an option of defining the ROBO section within the default
+      section of the product configuration file.
 
 release 72.1.2 (2024-08-05)
  - handle cases where the same tag sequence is seen in both index reads

diff --git a/MANIFEST b/MANIFEST
@@ -522,6 +522,9 @@ t/data/autoqc/review/not_hash/product_release.yml
 t/data/autoqc/review/mqc_type/product_release.yml
 t/data/autoqc/review/unknown_qc_type/product_release.yml
 t/data/autoqc/review/uqc_type/product_release.yml
+t/data/autoqc/review/default_and_study_section/product_release.yml
+t/data/autoqc/review/default_section/product_release.yml
+t/data/autoqc/review/wrong_default_and_study_section/product_release.yml
 t/data/autoqc/review/samplesheet_27483.csv
 t/data/autoqc/review/samplesheet_29524.csv
 t/data/autoqc/review/29524#2.composition.json

diff --git a/lib/npg_qc/autoqc/checks/review.pm b/lib/npg_qc/autoqc/checks/review.pm
@@ -58,24 +58,26 @@ npg_qc::autoqc::checks::review
 This checks evaluates the results of other autoqc checks
 against a predefined set of criteria.
 
-If data product acceptance criteria for a project are defined, it
-is possible to introduce a degree of automation into the manual
-QC process. To provide interoperability with the API supporting
-the manual QC process, the outcome of the evaluation, which is
-performed by this check, is recorded not only as a simple undefined,
-pass or fail as in other autoqc checks, but also as one of valid
-manual or user QC outcomes.
+If data product acceptance criteria are defined, it is possible to
+introduce a degree of automation into the manual QC process. To
+provide interoperability with the API supporting the manual QC process,
+the outcome of the evaluation, which is performed by this check, is
+recorded not only as a simple undefined, pass or fail as in other autoqc
+checks, but also as one of valid manual or user QC outcomes.
 
 =head2 Types of criteria
 
-The robo section of the product configuration file sits within
-the configuration for a particular study. Evaluation criteria for
-samples in the same study might vary depending on the sequencing
+The robo section of the product configuration file sits either
+within the configuration for a particular study or in the default
+section, or in both locations. A study-specific RoboQC definition
+takes precedence over the default one.
+
+Evaluation criteria for samples vary depending on the sequencing
 instrument type, library type, sample type, etc. There might be a
 need to exclude some samples from RoboQC. The criteria key of the
-robo configuration points to an array of criteria objects each of
-which contains two further keys, one for acceptance and one for
-applicability criteria. The acceptance criteria are evaluated
+robo configuration points to an array of criteria objects. Each of
+the criteria contains two further keys, one for acceptance and one
+for applicability criteria. The acceptance criteria are evaluated
 if either the applicability criteria have been satisfied or no
 applicability criteria are defined.
 
@@ -84,8 +86,11 @@ set in such a way that the order of evaluation of the criteria
 array does not matter. If applicability criteria in all of the
 criteria objects are not satisfied, no QC outcome is assigned
 and the pass attribute of the review result object remains unset.
-Within a study the product can satisfy applicability criteria
-in at most one criteria object.
+
+The product can satisfy applicability criteria in at most one
+criteria object. If none of the study-specific applicability
+criteria are satisfied, the review check does not proceed even if
+the product might satisfy one of the default applicability criteria.
 
 =head2 QC outcomes
 
@@ -455,22 +460,23 @@ sub _build__robo_config {
 
   my $strict = 1; # Parse study section only, ignore the default section.
   my $config = $self->study_config($self->lims(), $strict);
-  if ($config and keys %{$config}) {
+  if ($config) {
     $config = $config->{$ROBO_KEY};
-    $config or carp
-      "$ROBO_KEY section is not present for " . $self->_entity_desc;
-    if ($config) {
-      (ref $config eq 'HASH') or croak
-        'Robo config should be a hash in a config for ' . $self->_entity_desc;
-      if (keys %{$config}) {
-        $self->_validate_criteria($config);
-      } else {
-        carp 'Robo section of the product config is empty for ' .
-          $self->_entity_desc;
-      }
+  }
+
+  if (!$config) {
+    carp 'Study-specific RoboQC config not found for ' . $self->_entity_desc;
+    $config = $self->default_study_config()->{$ROBO_KEY};
+  }
+
+  if ($config) {
+    (ref $config eq 'HASH') or croak
+      'Robo config should be a hash in a config for ' . $self->_entity_desc;
+    if (keys %{$config}) {
+      $self->_validate_criteria($config);
+    } else {
+      carp 'RoboQC section of the product config file is empty';
     }
-  } else {
-    carp 'Study config not found for ' . $self->_entity_desc;
   }
 
   $config ||= {};

diff --git a/t/60-autoqc-checks-review.t b/t/60-autoqc-checks-review.t
@@ -1,6 +1,6 @@
 use strict;
 use warnings;
-use Test::More tests => 11;
+use Test::More tests => 12;
 use Test::Exception;
 use Test::Warn;
 use File::Temp qw/tempdir/;
@@ -58,7 +58,7 @@ subtest 'constructing object, deciding whether to run' => sub {
 
   my $can_run;
   warnings_like { $can_run = $check->can_run }
-    [qr/Study config not found/, qr/RoboQC configuration is absent/],
+    [qr/Study-specific RoboQC config not found/, qr/RoboQC configuration is absent/],
     'can_run is accompanied by warnings';
   ok (!$can_run, 'can_run returns false - no study config');
   like ($check->result->comments, qr/RoboQC configuration is absent/,
@@ -74,7 +74,7 @@ subtest 'constructing object, deciding whether to run' => sub {
     qc_in     => $test_data_dir,
     rpt_list  => '27483:1:2');
   warnings_like { $can_run = $check->can_run }
-    [qr/robo_qc section is not present for/, qr/Review check cannot be run/],
+    [qr/Study-specific RoboQC config not found/, qr/Review check cannot be run/],
     'can_run is accompanied by warnings';
   ok (!$can_run, 'can_run returns false - no robo config');
   is ($check->result->comments, 'RoboQC configuration is absent',
@@ -500,6 +500,54 @@ subtest 'evaluation within the execute method' => sub {
   }  
 };
 
+subtest 'study-specific vs default robo definition' => sub {
+  plan tests => 11;
+
+  local $ENV{NPG_CACHED_SAMPLESHEET_FILE} =
+    't/data/autoqc/review/samplesheet_29524.csv';
+  my $rpt_list = '29524:1:2;29524:2:2;29524:3:2;29524:4:2';
+
+  # robo config in the default section only
+  my $check = npg_qc::autoqc::checks::review->new(
+    runfolder_path => $rf_path,
+    conf_path => "$test_data_dir/default_section",
+    qc_in     => $test_data_dir,
+    rpt_list  => $rpt_list
+  );
+  ok ($check->can_run, 'the check can run');
+  lives_ok { $check->execute } 'execute method runs OK';
+  is ($check->result->pass, 1, 'result pass attribute is set to 1');
+  my %expected = map { $_ => 1 } @{$criteria_list};
+  is_deeply ($check->result->evaluation_results(), \%expected,
+    'evaluation results are saved');
+  is ($check->result->qc_outcome->{'mqc_outcome'} , 'Accepted preliminary',
+    'correct outcome string');
+
+  # robo config in the default section and in the study section (empty)
+  $check = npg_qc::autoqc::checks::review->new(
+    runfolder_path => $rf_path,
+    conf_path => "$test_data_dir/default_and_study_section",
+    qc_in     => $test_data_dir,
+    rpt_list  => $rpt_list
+  );
+  ok ($check->can_run, 'the check cannot run');
+
+  # invalid robo config in the default section, valid in the study section
+  $check = npg_qc::autoqc::checks::review->new(
+    runfolder_path => $rf_path,
+    conf_path => "$test_data_dir/wrong_default_and_study_section",
+    qc_in     => $test_data_dir,
+    rpt_list  => $rpt_list
+  );
+  ok ($check->can_run, 'the check can run');
+  lives_ok { $check->execute } 'execute method runs OK';
+  is ($check->result->pass, 1, 'result pass attribute is set to 1');
+  is_deeply ($check->result->evaluation_results(), \%expected,
+    'evaluation results are saved');
+  is ($check->result->qc_outcome->{'mqc_outcome'} , 'Accepted preliminary',
+    'correct outcome string');
+};
+
 subtest 'error in evaluation' => sub {
   plan tests => 5;
 

diff --git a/t/data/autoqc/review/default_and_study_section/product_release.yml b/t/data/autoqc/review/default_and_study_section/product_release.yml
@@ -0,0 +1,43 @@
+---
+default:
+  s3:
+    enable: false
+    url: null
+    notify: false
+  irods:
+    enable: true
+    notify: false
+  robo_qc:
+    criteria:
+      - applicability_criteria:
+          lims:
+            library_type:
+              - "HiSeqX PCR free"
+          sequencing_run:
+            platform_NovaSeq:
+              - 1
+            workflow_type:
+              - "NovaSeqStandard"
+        acceptance_criteria :
+          - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95"
+          - "bam_flagstats.target_mapped_bases > 85_000_000_000"
+          - "bam_flagstats.target_percent_gt_coverage_threshold > 95"
+          - "verify_bam_id.freemix < 0.01"
+          - "( bcfstats.genotypes_nrd_dividend / bcfstats.genotypes_nrd_divisor ) < 0.02"
+
+study:
+  - study_id: "5392"
+    s3:
+      enable: true
+      url: "gs://profile_one-europe-west2"
+      date_binning: true
+      customer_name: "UK Organisation"
+      profile: "profile_one"
+      notify: true
+      receipts: "/data_product_receipts/5392/"
+    irods:
+      enable: false
+      notify: true
+    merge:
+      component_cache_dir: "/merge_component_cache/5392/"
+    robo_qc:
diff --git a/t/data/autoqc/review/default_section/product_release.yml b/t/data/autoqc/review/default_section/product_release.yml
@@ -0,0 +1,25 @@
+---
+default:
+  irods:
+    enable: true
+  robo_qc:
+    criteria:
+      - applicability_criteria:
+          lims:
+            library_type:
+              - "HiSeqX PCR free"
+          sequencing_run:
+            platform_NovaSeq:
+              - 1
+            workflow_type:
+              - "NovaSeqStandard"
+        acceptance_criteria :
+          - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95"
+          - "bam_flagstats.target_mapped_bases > 85_000_000_000"
+          - "bam_flagstats.target_percent_gt_coverage_threshold > 95"
+          - "verify_bam_id.freemix < 0.01"
+          - "( bcfstats.genotypes_nrd_dividend / bcfstats.genotypes_nrd_divisor ) < 0.02"
+study:
+  - study_id: "5392"
+    irods:
+      enable: false
diff --git a/t/data/autoqc/review/wrong_default_and_study_section/product_release.yml b/t/data/autoqc/review/wrong_default_and_study_section/product_release.yml
@@ -0,0 +1,45 @@
+---
+default:
+  s3:
+    enable: false
+    url: null
+    notify: false
+  irods:
+    enable: true
+    notify: false
+  robo_qc:
+    - acceptance_criteria:
+        - "verify_bam_id.freemix < 0.001"
+
+study:
+  - study_id: "5392"
+    s3:
+      enable: true
+      url: "gs://profile_one-europe-west2"
+      date_binning: true
+      customer_name: "UK Organisation"
+      profile: "profile_one"
+      notify: true
+      receipts: "/data_product_receipts/5392/"
+    irods:
+      enable: false
+      notify: true
+    merge:
+      component_cache_dir: "/merge_component_cache/5392/"
+    robo_qc:
+      criteria:
+        - applicability_criteria:
+            lims:
+              library_type:
+                - "HiSeqX PCR free"
+            sequencing_run:
+              platform_NovaSeq:
+                - 1
+              workflow_type:
+                - "NovaSeqStandard"
+          acceptance_criteria:
+            - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95"
+            - "bam_flagstats.target_mapped_bases > 85_000_000_000"
+            - "bam_flagstats.target_percent_gt_coverage_threshold > 95"
+            - "verify_bam_id.freemix < 0.01"
+            - "( bcfstats.genotypes_nrd_dividend / bcfstats.genotypes_nrd_divisor ) < 0.02"