From 24e32ceb05dfdaec57ff63244c8dedaba8139796 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Wed, 30 Oct 2024 14:38:56 +0000 Subject: [PATCH] Updated the expression evaluator. Updated the expression evaluator to allow for multiple types of result objects to be used in a single expression. --- Changes | 4 +- lib/npg_qc/autoqc/checks/review.pm | 144 +++++++++++++-------- t/60-autoqc-checks-review.t | 19 ++- t/data/runfolder_49285/product_release.yml | 1 + 4 files changed, 105 insertions(+), 63 deletions(-) diff --git a/Changes b/Changes index 3ed8684c..0b380a64 100644 --- a/Changes +++ b/Changes @@ -1,7 +1,7 @@ LIST OF CHANGES FOR NPG-QC PACKAGE - npg_qc::autoqc::qc_store - changed loading of ORM classes into memory - to on demand at ru time. + to on demand at run time. - Added lane-level assessment to RoboQC: - Extended npg_qc::Schema::Result::Review class to enable saving of sequencing (lane-level) QC outcomes to database @@ -24,6 +24,8 @@ LIST OF CHANGES FOR NPG-QC PACKAGE The fix is to infer the list of class names from the criteria, which are going to be evaluated for the entity, rather than from a collection of criteria from the whole 'robo' section. + - Updated the expression evaluator to allow for multiple types of + result objects to be used in a single expression. release 73.0.0 (2024-10-24) - Removed from the repository unused test data. diff --git a/lib/npg_qc/autoqc/checks/review.pm b/lib/npg_qc/autoqc/checks/review.pm index 4d29c2a0..7e9a8822 100644 --- a/lib/npg_qc/autoqc/checks/review.pm +++ b/lib/npg_qc/autoqc/checks/review.pm @@ -24,16 +24,15 @@ our $VERSION = '0'; Readonly::Scalar my $CONJUNCTION_OP => q[and]; Readonly::Scalar my $DISJUNCTION_OP => q[or]; -Readonly::Scalar my $ROBO_KEY => q[robo_qc]; -Readonly::Scalar my $CRITERIA_KEY => q[criteria]; -Readonly::Scalar my $QC_TYPE_KEY => q[qc_type]; +Readonly::Scalar my $ROBO_KEY => q[robo_qc]; +Readonly::Scalar my $CRITERIA_KEY => q[criteria]; Readonly::Scalar my $APPLICABILITY_CRITERIA_KEY => q[applicability_criteria]; Readonly::Scalar my $LIMS_APPLICABILITY_CRITERIA_KEY => q[lims]; Readonly::Scalar my $SEQ_APPLICABILITY_CRITERIA_KEY => q[sequencing_run]; Readonly::Array my @APPLICABILITY_CRITERIA_TYPES => ( $LIMS_APPLICABILITY_CRITERIA_KEY, $SEQ_APPLICABILITY_CRITERIA_KEY ); -Readonly::Scalar my $ACCEPTANCE_CRITERIA_KEY => q[acceptance_criteria]; +Readonly::Scalar my $ACCEPTANCE_CRITERIA_KEY => q[acceptance_criteria]; Readonly::Scalar my $QC_TYPE_LIB => q[mqc]; Readonly::Scalar my $QC_TYPE_SEQ => q[mqc_seq]; @@ -41,8 +40,20 @@ Readonly::Scalar my $QC_TYPE_SEQ => q[mqc_seq]; Readonly::Scalar my $TIMESTAMP_FORMAT_WOFFSET => q[%Y-%m-%dT%T%z]; Readonly::Scalar my $CLASS_NAME_SPEC_DELIM => q[:]; -Readonly::Scalar my $CLASS_NAME_SPEC_RE => - qr/\A(?:\W+)? (\w+) (?: $CLASS_NAME_SPEC_DELIM (\w+))?[.]/xms; +Readonly::Scalar my $CLASS_NAME_SPEC_RE => + qr{ + # Examples: + # "generic:ncov2019_artic_nf.doc->{meta}->{'num_input_reads'} > 10000" + # "tag_metrics.all_reads && (qX_yield.yield1_q30/(tag_metrics.all_reads * 302) > 78)" + + (?:\W+)? # optional non-word characters + ( # start of capture group + [[:alpha:]_]+ # result class name + (?: $CLASS_NAME_SPEC_DELIM \w+)? # an optional spec for a generic result + # class prepended by a delimeter + ) # end of capture group + [.] # ends with a dot + }xms; ## no critic (Documentation::RequirePodAtEnd) @@ -652,7 +663,7 @@ sub _build__result_class_names { my $self = shift; my @class_names = uniq sort - map { (_class_name_from_expression($_))[0] } + map { _class_names_from_expression($_) } map { @{$_} } # dereference the array values %{$self->_criteria()}; @@ -721,11 +732,26 @@ sub _build__results { return \%h; } -sub _class_name_from_expression { +sub _class_names_with_spec_from_expression { my $e = shift; - my ($class_name, $spec) = $e =~ $CLASS_NAME_SPEC_RE; - $class_name or croak "Failed to infer class name from $e"; - return ($class_name, $spec); + # Note 'g' flag at the end of the regexp. We will get a list of matches. + # The list might contain multiple class names for the same class. Depending + # on the expression, the return value contains one or more class names. + my @extended_class_names = $e =~ /$CLASS_NAME_SPEC_RE/xmsg; + @extended_class_names = uniq @extended_class_names; # Remove repetitions. + @extended_class_names or croak "Failed to infer class names from $e"; + return @extended_class_names; +} + +sub _class_names_from_expression { + my $e = shift; + my @names = (); + foreach my $name (_class_names_with_spec_from_expression($e)) { + ($name) = $name =~ /\A(\w+)/smx; # Remove the spec part if present. + push @names, $name; + } + @names = uniq @names; + return @names; } ##### @@ -785,6 +811,30 @@ sub _apply_operator { return $outcome ? 1 : 0; } +sub _pp_name2spec { + # To get a match with what would have been used in the robo config, + # replace all 'non-word' characters. + my $pp_name = shift; + $pp_name =~ s/\W/_/gsmx; + return $pp_name; +}; + +sub _get_evaluation_result { + my ($perl_e, $e, $results) = @_; + + use warnings FATAL => 'uninitialized'; # Force an error when operations on + # undefined values are attempted. + ##no critic (BuiltinFunctions::ProhibitStringyEval) + my $o = eval $perl_e; # Evaluate Perl expression + ##use critic + if ($EVAL_ERROR) { + my $err = $EVAL_ERROR; + croak "Error evaluating expression '$perl_e' derived from '$e': $err"; + } + + return $o ? 1 : 0; +} + ##### # Evaluates a single expression in the context of available autoqc results. # If runs successfully, returns 0 or 1, otherwise throws an error. @@ -792,55 +842,37 @@ sub _apply_operator { sub _evaluate_expression { my ($self, $e) = @_; - my ($class_name, $spec) = _class_name_from_expression($e); - my $obj_a = $self->_results->{$class_name}; - # We should not get this far with an error in the configuration - # file, but just in case... - $obj_a and @{$obj_a} or croak "No autoqc result for evaluation of '$e'"; - - if ($spec) { - my $pp_name2spec = sub { # To get a match with what would have been - my $pp_name = shift; # used in the robo config, replace all - $pp_name =~ s/\W/_/gsmx; # 'non-word' characters. - return $pp_name; - }; - # Now have to choose one result. If the object is not an instance of - # the generic autoqc result class, there will be an error at this point. - # Making the code less specific is not worth the effort at this point. - my @on_spec = grep { $pp_name2spec->($_->pp_name) eq $spec } @{$obj_a}; - @on_spec or croak "No autoqc $class_name result for $spec"; - $obj_a = \@on_spec; - } - - (@{$obj_a} == 1) or croak "Multiple autoqc results for evaluation of '$e'"; - my $obj = $obj_a->[0]; - - $obj or croak "No autoqc result for evaluation of '$e'"; - - # Prepare the expression from the robo config for evaluation. - my $placeholder = $class_name; - $spec and ($placeholder .= $CLASS_NAME_SPEC_DELIM . $spec); - my $replacement = q[$] . q[result->]; + my @names = _class_names_with_spec_from_expression($e); + my $results = {}; my $perl_e = $e; - $perl_e =~ s/$placeholder[.]/$replacement/xmsg; - - my $evaluator = sub { # Evaluation function - my $result = shift; - # Force an error when operations on undefined values are - # are attempted. - use warnings FATAL => 'uninitialized'; - ##no critic (BuiltinFunctions::ProhibitStringyEval) - my $o = eval $perl_e; # Evaluate Perl string expression - ##use critic - if ($EVAL_ERROR) { - my $err = $EVAL_ERROR; - croak "Error evaluating expression '$perl_e' derived from '$e': $err"; + # Prepare the expression from the robo config for evaluation. + foreach my $name (@names) { + my ($class_name, $spec) = split /$CLASS_NAME_SPEC_DELIM/smx, $name; + my $obj_a = $self->_results->{$class_name}; + # We should not get this far with an error in the configuration + # file, but just in case... + $obj_a and @{$obj_a} or croak "No $class_name autoqc result for evaluation of '$e'"; + if ($spec) { + # Now have to choose one result. If the object is not an instance of + # the generic autoqc result class, there will be an error at this point. + # Making the code less specific is not worth the effort at this point. + my @on_spec = grep { _pp_name2spec($_->pp_name) eq $spec } @{$obj_a}; + @on_spec or croak "No $name autoqc result for evaluation of '$e'"; + $obj_a = \@on_spec; } - return $o ? 1 : 0; - }; + (@{$obj_a} == 1) or croak "Multiple $name autoqc results for evaluation of '$e'"; + my $obj = $obj_a->[0]; + $obj or croak "No $class_name autoqc result for evaluation of '$e'"; + $results->{$name} = $obj; + + ## no critic (ValuesAndExpressions::RequireInterpolationOfMetachars) + my $replacement = q[$results->{'] . $name . q['}->]; + ## use critic + $perl_e =~ s/$name[.]/$replacement/xmsg; + } # Evaluate and return the outcome. - return $evaluator->($obj); + return _get_evaluation_result($perl_e, $e, $results); } __PACKAGE__->meta->make_immutable(); diff --git a/t/60-autoqc-checks-review.t b/t/60-autoqc-checks-review.t index ac7ebd23..f7120540 100644 --- a/t/60-autoqc-checks-review.t +++ b/t/60-autoqc-checks-review.t @@ -405,7 +405,7 @@ subtest 'single expression evaluation' => sub { rpt_list => $rpt_list); throws_ok {$check->_evaluate_expression('verify_bam.freemix < 0.01')} - qr/No autoqc result for evaluation of/, + qr/No verify_bam autoqc result for evaluation of/, 'error if check name is unknown'; throws_ok {$check->_evaluate_expression('verify_bam_id.freemix_free < 0.01')} qr/Can't locate object method \"freemix_free\"/, @@ -646,7 +646,7 @@ subtest 'evaluating generic for artic results' => sub { final_qc_outcome => 1); is ($check->can_run, 1, 'check can run'); throws_ok { $check->execute } - qr/Not able to run evaluation: No autoqc generic result for ncov2019_artic_nf/, + qr/Not able to run evaluation: No generic:ncov2019_artic_nf autoqc result/, 'message as an error'; # qc_in contains two artic generic results for the same product @@ -657,7 +657,7 @@ subtest 'evaluating generic for artic results' => sub { final_qc_outcome => 1); is ($check->can_run, 1, 'check can run'); throws_ok { $check->execute } - qr/Not able to run evaluation: Multiple autoqc results/, + qr/Not able to run evaluation: Multiple generic:ncov2019_artic_nf autoqc results/, 'message as an error'; # qc_in contains other autoqc results for this entity, including @@ -812,7 +812,7 @@ subtest 'evaluating generic for artic results' => sub { }; subtest 'evaluating for LCMB library type' => sub { - plan tests => 14; + plan tests => 17; my $test_data_path = 't/data/runfolder_49285'; my $runfolder_name = '240802_A00537_1044_BHJKCGDSXC'; @@ -834,6 +834,7 @@ subtest 'evaluating for LCMB library type' => sub { verbose => 0 )->load(); + # Sample-level evaluation my $check = npg_qc::autoqc::checks::review->new( runfolder_path => $runfolder_path, conf_path => $test_data_path, @@ -849,6 +850,7 @@ subtest 'evaluating for LCMB library type' => sub { 'verify_bam_id.pass' ); my $result = $check->result(); + is ($result->comments(), undef, 'no comments'); is_deeply ($result->evaluation_results(), \%expected_evaluation_results, 'sample evaluation results as expected'); is ($result->pass, 1, 'the check passed'); @@ -860,6 +862,7 @@ subtest 'evaluating for LCMB library type' => sub { 'sample QC outcome is saved correctly' ); + # Lane-level evaluation $check = npg_qc::autoqc::checks::review->new( runfolder_path => $runfolder_path, conf_path => $test_data_path, @@ -869,10 +872,13 @@ subtest 'evaluating for LCMB library type' => sub { ); lives_ok { $check->execute() } 'lane level check runs OK'; $result = $check->result(); + is ($result->comments(), undef, 'no comments'); %expected_evaluation_results = ( 'tag_metrics.matches_pf_percent && (tag_metrics.perfect_matches_percent +' . - ' tag_metrics.one_mismatch_percent) > 93' => 1, - 'tag_metrics.all_reads * 302 > 750000000000' => 1 + ' tag_metrics.one_mismatch_percent) > 93' => 1, + 'tag_metrics.all_reads * 302 > 750000000000' => 1, + 'tag_metrics.all_reads && (((qX_yield.yield1_q30 + qX_yield.yield2_q30) ' . + '* 1000 * 100)/(tag_metrics.all_reads * 302) >= 78)' => 1, ); is_deeply ($result->evaluation_results(), \%expected_evaluation_results, 'lane evaluation results as expected'); @@ -897,6 +903,7 @@ subtest 'evaluating for LCMB library type' => sub { $expected_evaluation_results{$key} = 0; } $result = $check->result(); + is ($result->comments(), undef, 'no comments'); is_deeply ($result->evaluation_results(), \%expected_evaluation_results, 'lane evaluation results as expected'); is ($result->pass, 0, 'the check failed'); diff --git a/t/data/runfolder_49285/product_release.yml b/t/data/runfolder_49285/product_release.yml index fdb1a4c7..23df7387 100644 --- a/t/data/runfolder_49285/product_release.yml +++ b/t/data/runfolder_49285/product_release.yml @@ -37,4 +37,5 @@ default: acceptance_criteria: - "tag_metrics.matches_pf_percent && (tag_metrics.perfect_matches_percent + tag_metrics.one_mismatch_percent) > 93" - "tag_metrics.all_reads * 302 > 750000000000" + - "tag_metrics.all_reads && (((qX_yield.yield1_q30 + qX_yield.yield2_q30) * 1000 * 100)/(tag_metrics.all_reads * 302) >= 78)"