From 24e32ceb05dfdaec57ff63244c8dedaba8139796 Mon Sep 17 00:00:00 2001
From: Marina Gourtovaia <mg8@sanger.ac.uk>
Date: Wed, 30 Oct 2024 14:38:56 +0000
Subject: [PATCH] Updated the expression evaluator.

Updated the expression evaluator to allow for multiple
 types of result objects to be used in a single expression.
---
 Changes                                    |   4 +-
 lib/npg_qc/autoqc/checks/review.pm         | 144 +++++++++++++--------
 t/60-autoqc-checks-review.t                |  19 ++-
 t/data/runfolder_49285/product_release.yml |   1 +
 4 files changed, 105 insertions(+), 63 deletions(-)

diff --git a/Changes b/Changes
index 3ed8684c..0b380a64 100644
--- a/Changes
+++ b/Changes
@@ -1,7 +1,7 @@
 LIST OF CHANGES FOR NPG-QC PACKAGE
 
  - npg_qc::autoqc::qc_store - changed loading of ORM classes into memory
-   to on demand at ru time.
+   to on demand at run time.
  - Added lane-level assessment to RoboQC:
     - Extended npg_qc::Schema::Result::Review class to enable saving of
       sequencing (lane-level) QC outcomes to database
@@ -24,6 +24,8 @@ LIST OF CHANGES FOR NPG-QC PACKAGE
       The fix is to infer the list of class names from the criteria,
       which are going to be evaluated for the entity, rather than
       from a collection of criteria from the whole 'robo' section.
+    - Updated the expression evaluator to allow for multiple types of
+      result objects to be used in a single expression.
 
 release 73.0.0 (2024-10-24)
  - Removed from the repository unused test data.
diff --git a/lib/npg_qc/autoqc/checks/review.pm b/lib/npg_qc/autoqc/checks/review.pm
index 4d29c2a0..7e9a8822 100644
--- a/lib/npg_qc/autoqc/checks/review.pm
+++ b/lib/npg_qc/autoqc/checks/review.pm
@@ -24,16 +24,15 @@ our $VERSION = '0';
 Readonly::Scalar my $CONJUNCTION_OP => q[and];
 Readonly::Scalar my $DISJUNCTION_OP => q[or];
 
-Readonly::Scalar my $ROBO_KEY         => q[robo_qc];
-Readonly::Scalar my $CRITERIA_KEY     => q[criteria];
-Readonly::Scalar my $QC_TYPE_KEY      => q[qc_type];
+Readonly::Scalar my $ROBO_KEY => q[robo_qc];
+Readonly::Scalar my $CRITERIA_KEY => q[criteria];
 Readonly::Scalar my $APPLICABILITY_CRITERIA_KEY => q[applicability_criteria];
 Readonly::Scalar my $LIMS_APPLICABILITY_CRITERIA_KEY => q[lims];
 Readonly::Scalar my $SEQ_APPLICABILITY_CRITERIA_KEY => q[sequencing_run];
 Readonly::Array  my @APPLICABILITY_CRITERIA_TYPES => (
   $LIMS_APPLICABILITY_CRITERIA_KEY, $SEQ_APPLICABILITY_CRITERIA_KEY
                                                      );
-Readonly::Scalar my $ACCEPTANCE_CRITERIA_KEY    => q[acceptance_criteria];
+Readonly::Scalar my $ACCEPTANCE_CRITERIA_KEY => q[acceptance_criteria];
 
 Readonly::Scalar my $QC_TYPE_LIB => q[mqc];
 Readonly::Scalar my $QC_TYPE_SEQ => q[mqc_seq];
@@ -41,8 +40,20 @@ Readonly::Scalar my $QC_TYPE_SEQ => q[mqc_seq];
 Readonly::Scalar my $TIMESTAMP_FORMAT_WOFFSET => q[%Y-%m-%dT%T%z];
 
 Readonly::Scalar my $CLASS_NAME_SPEC_DELIM => q[:];
-Readonly::Scalar my $CLASS_NAME_SPEC_RE    =>
-  qr/\A(?:\W+)? (\w+) (?: $CLASS_NAME_SPEC_DELIM (\w+))?[.]/xms;
+Readonly::Scalar my $CLASS_NAME_SPEC_RE =>
+  qr{
+    # Examples:
+    # "generic:ncov2019_artic_nf.doc->{meta}->{'num_input_reads'} > 10000"
+    # "tag_metrics.all_reads && (qX_yield.yield1_q30/(tag_metrics.all_reads * 302) > 78)"
+
+    (?:\W+)? # optional non-word characters
+    ( # start of capture group
+    [[:alpha:]_]+ # result class name
+    (?: $CLASS_NAME_SPEC_DELIM \w+)? # an optional spec for a generic result
+                                     # class prepended by a delimeter
+    ) # end of capture group
+    [.]  # ends with a dot
+  }xms;
 
 ## no critic (Documentation::RequirePodAtEnd)
 
@@ -652,7 +663,7 @@ sub _build__result_class_names {
   my $self = shift;
 
   my @class_names = uniq sort
-                    map { (_class_name_from_expression($_))[0] }
+                    map { _class_names_from_expression($_) }
                     map { @{$_} } # dereference the array
                     values %{$self->_criteria()};
 
@@ -721,11 +732,26 @@ sub _build__results {
   return \%h;
 }
 
-sub _class_name_from_expression {
+sub _class_names_with_spec_from_expression {
   my $e = shift;
-  my ($class_name, $spec) = $e =~ $CLASS_NAME_SPEC_RE;
-  $class_name or croak "Failed to infer class name from $e";
-  return ($class_name, $spec);
+  # Note 'g' flag at the end of the regexp. We will get a list of matches.
+  # The list might contain multiple class names for the same class. Depending
+  # on the expression, the return value contains one or more class names.
+  my @extended_class_names = $e =~ /$CLASS_NAME_SPEC_RE/xmsg;
+  @extended_class_names = uniq @extended_class_names; # Remove repetitions.
+  @extended_class_names or croak "Failed to infer class names from $e";
+  return @extended_class_names;
+}
+
+sub _class_names_from_expression {
+  my $e = shift;
+  my @names = ();
+  foreach my $name (_class_names_with_spec_from_expression($e)) {
+    ($name) = $name =~ /\A(\w+)/smx; # Remove the spec part if present.
+    push @names, $name;
+  }
+  @names = uniq @names;
+  return @names;
 }
 
 #####
@@ -785,6 +811,30 @@ sub _apply_operator {
   return $outcome ? 1 : 0;
 }
 
+sub _pp_name2spec {
+  # To get a match with what would have been used in the robo config,
+  # replace all 'non-word' characters.
+  my $pp_name = shift;
+  $pp_name =~ s/\W/_/gsmx;
+  return $pp_name;
+};
+
+sub _get_evaluation_result {
+  my ($perl_e, $e, $results) = @_;
+
+  use warnings FATAL => 'uninitialized'; # Force an error when operations on
+                                         # undefined values are attempted.
+  ##no critic (BuiltinFunctions::ProhibitStringyEval)
+  my $o = eval $perl_e; # Evaluate Perl expression
+  ##use critic
+  if ($EVAL_ERROR) {
+    my $err = $EVAL_ERROR;
+    croak "Error evaluating expression '$perl_e' derived from '$e': $err";
+  }
+
+  return $o ? 1 : 0;
+}
+
 #####
 # Evaluates a single expression in the context of available autoqc results.
 # If runs successfully, returns 0 or 1, otherwise throws an error.
@@ -792,55 +842,37 @@ sub _apply_operator {
 sub _evaluate_expression {
   my ($self, $e) = @_;
 
-  my ($class_name, $spec) = _class_name_from_expression($e);
-  my $obj_a = $self->_results->{$class_name};
-  # We should not get this far with an error in the configuration
-  # file, but just in case...
-  $obj_a and @{$obj_a} or croak "No autoqc result for evaluation of '$e'";
-
-  if ($spec) {
-    my $pp_name2spec = sub {    # To get a match with what would have been
-      my $pp_name = shift;      # used in the robo config, replace all
-      $pp_name =~ s/\W/_/gsmx;  # 'non-word' characters.
-      return $pp_name;
-    };
-    # Now have to choose one result. If the object is not an instance of
-    # the generic autoqc result class, there will be an error at this point.
-    # Making the code less specific is not worth the effort at this point.
-    my @on_spec = grep { $pp_name2spec->($_->pp_name) eq $spec } @{$obj_a};
-    @on_spec or croak "No autoqc $class_name result for $spec";
-    $obj_a = \@on_spec;
-  }
-
-  (@{$obj_a} == 1) or croak "Multiple autoqc results for evaluation of '$e'";
-  my $obj = $obj_a->[0];
-
-  $obj or croak "No autoqc result for evaluation of '$e'";
-
-  # Prepare the expression from the robo config for evaluation.
-  my $placeholder = $class_name;
-  $spec and ($placeholder .= $CLASS_NAME_SPEC_DELIM . $spec);
-  my $replacement = q[$] . q[result->];
+  my @names = _class_names_with_spec_from_expression($e);
+  my $results = {};
   my $perl_e = $e;
-  $perl_e =~ s/$placeholder[.]/$replacement/xmsg;
-
-  my $evaluator = sub { # Evaluation function
-    my $result = shift;
-    # Force an error when operations on undefined values are
-    # are attempted.
-    use warnings FATAL => 'uninitialized';
-    ##no critic (BuiltinFunctions::ProhibitStringyEval)
-    my $o = eval $perl_e; # Evaluate Perl string expression
-    ##use critic
-    if ($EVAL_ERROR) {
-      my $err = $EVAL_ERROR;
-      croak "Error evaluating expression '$perl_e' derived from '$e': $err";
+  # Prepare the expression from the robo config for evaluation.
+  foreach my $name (@names) {
+    my ($class_name, $spec) = split /$CLASS_NAME_SPEC_DELIM/smx, $name;
+    my $obj_a = $self->_results->{$class_name};
+    # We should not get this far with an error in the configuration
+    # file, but just in case...
+    $obj_a and @{$obj_a} or croak "No $class_name autoqc result for evaluation of '$e'";
+    if ($spec) {
+      # Now have to choose one result. If the object is not an instance of
+      # the generic autoqc result class, there will be an error at this point.
+      # Making the code less specific is not worth the effort at this point.
+      my @on_spec = grep { _pp_name2spec($_->pp_name) eq $spec } @{$obj_a};
+      @on_spec or croak "No $name autoqc result for evaluation of '$e'";
+      $obj_a = \@on_spec;
     }
-    return $o ? 1 : 0;
-  };
+    (@{$obj_a} == 1) or croak "Multiple $name autoqc results for evaluation of '$e'";
+    my $obj = $obj_a->[0];
+    $obj or croak "No $class_name autoqc result for evaluation of '$e'";
+    $results->{$name} = $obj;
+
+    ## no critic (ValuesAndExpressions::RequireInterpolationOfMetachars)
+    my $replacement = q[$results->{'] . $name . q['}->];
+    ## use critic
+    $perl_e =~ s/$name[.]/$replacement/xmsg;
+  }
 
   # Evaluate and return the outcome.
-  return $evaluator->($obj);
+  return _get_evaluation_result($perl_e, $e, $results);
 }
 
 __PACKAGE__->meta->make_immutable();
diff --git a/t/60-autoqc-checks-review.t b/t/60-autoqc-checks-review.t
index ac7ebd23..f7120540 100644
--- a/t/60-autoqc-checks-review.t
+++ b/t/60-autoqc-checks-review.t
@@ -405,7 +405,7 @@ subtest 'single expression evaluation' => sub {
     rpt_list  => $rpt_list);
 
   throws_ok {$check->_evaluate_expression('verify_bam.freemix < 0.01')}
-    qr/No autoqc result for evaluation of/,
+    qr/No verify_bam autoqc result for evaluation of/,
     'error if check name is unknown';
   throws_ok {$check->_evaluate_expression('verify_bam_id.freemix_free < 0.01')}
     qr/Can't locate object method \"freemix_free\"/,
@@ -646,7 +646,7 @@ subtest 'evaluating generic for artic results' => sub {
     final_qc_outcome => 1);
   is ($check->can_run, 1, 'check can run');
   throws_ok { $check->execute }
-    qr/Not able to run evaluation: No autoqc generic result for ncov2019_artic_nf/,
+    qr/Not able to run evaluation: No generic:ncov2019_artic_nf autoqc result/,
     'message as an error';
 
   # qc_in contains two artic generic results for the same product
@@ -657,7 +657,7 @@ subtest 'evaluating generic for artic results' => sub {
     final_qc_outcome => 1);
   is ($check->can_run, 1, 'check can run');
   throws_ok { $check->execute }
-    qr/Not able to run evaluation: Multiple autoqc results/,
+    qr/Not able to run evaluation: Multiple generic:ncov2019_artic_nf autoqc results/,
     'message as an error';
 
   # qc_in contains other autoqc results for this entity, including
@@ -812,7 +812,7 @@ subtest 'evaluating generic for artic results' => sub {
 };
 
 subtest 'evaluating for LCMB library type' => sub { 
-  plan tests => 14;
+  plan tests => 17;
 
   my $test_data_path = 't/data/runfolder_49285';
   my $runfolder_name = '240802_A00537_1044_BHJKCGDSXC';
@@ -834,6 +834,7 @@ subtest 'evaluating for LCMB library type' => sub {
     verbose => 0
   )->load();
 
+  # Sample-level evaluation
   my $check = npg_qc::autoqc::checks::review->new(
     runfolder_path => $runfolder_path,
     conf_path      => $test_data_path,
@@ -849,6 +850,7 @@ subtest 'evaluating for LCMB library type' => sub {
     'verify_bam_id.pass'
   );
   my $result = $check->result();
+  is ($result->comments(), undef, 'no comments');
   is_deeply ($result->evaluation_results(), \%expected_evaluation_results,
     'sample evaluation results as expected');
   is ($result->pass, 1, 'the check passed');
@@ -860,6 +862,7 @@ subtest 'evaluating for LCMB library type' => sub {
     'sample QC outcome is saved correctly'
   );
 
+  # Lane-level evaluation
   $check = npg_qc::autoqc::checks::review->new(
     runfolder_path => $runfolder_path,
     conf_path      => $test_data_path,
@@ -869,10 +872,13 @@ subtest 'evaluating for LCMB library type' => sub {
   );
   lives_ok { $check->execute() } 'lane level check runs OK';
   $result = $check->result();
+  is ($result->comments(), undef, 'no comments');
   %expected_evaluation_results = (
     'tag_metrics.matches_pf_percent && (tag_metrics.perfect_matches_percent +' .
-    ' tag_metrics.one_mismatch_percent) > 93' => 1,
-    'tag_metrics.all_reads * 302 > 750000000000' => 1
+      ' tag_metrics.one_mismatch_percent) > 93' => 1,
+    'tag_metrics.all_reads * 302 > 750000000000' => 1,
+    'tag_metrics.all_reads && (((qX_yield.yield1_q30 + qX_yield.yield2_q30) ' .
+      '* 1000 * 100)/(tag_metrics.all_reads * 302) >= 78)' => 1,
   );
   is_deeply ($result->evaluation_results(), \%expected_evaluation_results,
     'lane evaluation results as expected');
@@ -897,6 +903,7 @@ subtest 'evaluating for LCMB library type' => sub {
     $expected_evaluation_results{$key} = 0;
   }
   $result = $check->result();
+  is ($result->comments(), undef, 'no comments');
   is_deeply ($result->evaluation_results(), \%expected_evaluation_results,
     'lane evaluation results as expected');
   is ($result->pass, 0, 'the check failed');
diff --git a/t/data/runfolder_49285/product_release.yml b/t/data/runfolder_49285/product_release.yml
index fdb1a4c7..23df7387 100644
--- a/t/data/runfolder_49285/product_release.yml
+++ b/t/data/runfolder_49285/product_release.yml
@@ -37,4 +37,5 @@ default:
         acceptance_criteria:
           - "tag_metrics.matches_pf_percent && (tag_metrics.perfect_matches_percent + tag_metrics.one_mismatch_percent) > 93"
           - "tag_metrics.all_reads * 302 > 750000000000"
+          - "tag_metrics.all_reads && (((qX_yield.yield1_q30 + qX_yield.yield2_q30) * 1000 * 100)/(tag_metrics.all_reads * 302) >= 78)"