Skip to content

Commit

Permalink
Updated the expression evaluator.
Browse files Browse the repository at this point in the history
Updated the expression evaluator to allow for multiple
 types of result objects to be used in a single expression.
  • Loading branch information
mgcam committed Oct 30, 2024
1 parent 825d5a0 commit 24e32ce
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 63 deletions.
4 changes: 3 additions & 1 deletion Changes
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
LIST OF CHANGES FOR NPG-QC PACKAGE

- npg_qc::autoqc::qc_store - changed loading of ORM classes into memory
to on demand at ru time.
to on demand at run time.
- Added lane-level assessment to RoboQC:
- Extended npg_qc::Schema::Result::Review class to enable saving of
sequencing (lane-level) QC outcomes to database
Expand All @@ -24,6 +24,8 @@ LIST OF CHANGES FOR NPG-QC PACKAGE
The fix is to infer the list of class names from the criteria,
which are going to be evaluated for the entity, rather than
from a collection of criteria from the whole 'robo' section.
- Updated the expression evaluator to allow for multiple types of
result objects to be used in a single expression.

release 73.0.0 (2024-10-24)
- Removed from the repository unused test data.
Expand Down
144 changes: 88 additions & 56 deletions lib/npg_qc/autoqc/checks/review.pm
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,36 @@ our $VERSION = '0';
Readonly::Scalar my $CONJUNCTION_OP => q[and];
Readonly::Scalar my $DISJUNCTION_OP => q[or];

Readonly::Scalar my $ROBO_KEY => q[robo_qc];
Readonly::Scalar my $CRITERIA_KEY => q[criteria];
Readonly::Scalar my $QC_TYPE_KEY => q[qc_type];
Readonly::Scalar my $ROBO_KEY => q[robo_qc];
Readonly::Scalar my $CRITERIA_KEY => q[criteria];
Readonly::Scalar my $APPLICABILITY_CRITERIA_KEY => q[applicability_criteria];
Readonly::Scalar my $LIMS_APPLICABILITY_CRITERIA_KEY => q[lims];
Readonly::Scalar my $SEQ_APPLICABILITY_CRITERIA_KEY => q[sequencing_run];
Readonly::Array my @APPLICABILITY_CRITERIA_TYPES => (
$LIMS_APPLICABILITY_CRITERIA_KEY, $SEQ_APPLICABILITY_CRITERIA_KEY
);
Readonly::Scalar my $ACCEPTANCE_CRITERIA_KEY => q[acceptance_criteria];
Readonly::Scalar my $ACCEPTANCE_CRITERIA_KEY => q[acceptance_criteria];

Readonly::Scalar my $QC_TYPE_LIB => q[mqc];
Readonly::Scalar my $QC_TYPE_SEQ => q[mqc_seq];

Readonly::Scalar my $TIMESTAMP_FORMAT_WOFFSET => q[%Y-%m-%dT%T%z];

Readonly::Scalar my $CLASS_NAME_SPEC_DELIM => q[:];
Readonly::Scalar my $CLASS_NAME_SPEC_RE =>
qr/\A(?:\W+)? (\w+) (?: $CLASS_NAME_SPEC_DELIM (\w+))?[.]/xms;
Readonly::Scalar my $CLASS_NAME_SPEC_RE =>
qr{
# Examples:
# "generic:ncov2019_artic_nf.doc->{meta}->{'num_input_reads'} > 10000"
# "tag_metrics.all_reads && (qX_yield.yield1_q30/(tag_metrics.all_reads * 302) > 78)"
(?:\W+)? # optional non-word characters
( # start of capture group
[[:alpha:]_]+ # result class name
(?: $CLASS_NAME_SPEC_DELIM \w+)? # an optional spec for a generic result
# class prepended by a delimeter
) # end of capture group
[.] # ends with a dot
}xms;

## no critic (Documentation::RequirePodAtEnd)

Expand Down Expand Up @@ -652,7 +663,7 @@ sub _build__result_class_names {
my $self = shift;

my @class_names = uniq sort
map { (_class_name_from_expression($_))[0] }
map { _class_names_from_expression($_) }
map { @{$_} } # dereference the array
values %{$self->_criteria()};

Expand Down Expand Up @@ -721,11 +732,26 @@ sub _build__results {
return \%h;
}

sub _class_name_from_expression {
sub _class_names_with_spec_from_expression {
my $e = shift;
my ($class_name, $spec) = $e =~ $CLASS_NAME_SPEC_RE;
$class_name or croak "Failed to infer class name from $e";
return ($class_name, $spec);
# Note 'g' flag at the end of the regexp. We will get a list of matches.
# The list might contain multiple class names for the same class. Depending
# on the expression, the return value contains one or more class names.
my @extended_class_names = $e =~ /$CLASS_NAME_SPEC_RE/xmsg;
@extended_class_names = uniq @extended_class_names; # Remove repetitions.
@extended_class_names or croak "Failed to infer class names from $e";
return @extended_class_names;
}

sub _class_names_from_expression {
my $e = shift;
my @names = ();
foreach my $name (_class_names_with_spec_from_expression($e)) {
($name) = $name =~ /\A(\w+)/smx; # Remove the spec part if present.
push @names, $name;
}
@names = uniq @names;
return @names;
}

#####
Expand Down Expand Up @@ -785,62 +811,68 @@ sub _apply_operator {
return $outcome ? 1 : 0;
}

sub _pp_name2spec {
# To get a match with what would have been used in the robo config,
# replace all 'non-word' characters.
my $pp_name = shift;
$pp_name =~ s/\W/_/gsmx;
return $pp_name;
};

sub _get_evaluation_result {
my ($perl_e, $e, $results) = @_;

use warnings FATAL => 'uninitialized'; # Force an error when operations on
# undefined values are attempted.
##no critic (BuiltinFunctions::ProhibitStringyEval)
my $o = eval $perl_e; # Evaluate Perl expression
##use critic
if ($EVAL_ERROR) {
my $err = $EVAL_ERROR;
croak "Error evaluating expression '$perl_e' derived from '$e': $err";
}

return $o ? 1 : 0;
}

#####
# Evaluates a single expression in the context of available autoqc results.
# If runs successfully, returns 0 or 1, otherwise throws an error.
#
sub _evaluate_expression {
my ($self, $e) = @_;

my ($class_name, $spec) = _class_name_from_expression($e);
my $obj_a = $self->_results->{$class_name};
# We should not get this far with an error in the configuration
# file, but just in case...
$obj_a and @{$obj_a} or croak "No autoqc result for evaluation of '$e'";

if ($spec) {
my $pp_name2spec = sub { # To get a match with what would have been
my $pp_name = shift; # used in the robo config, replace all
$pp_name =~ s/\W/_/gsmx; # 'non-word' characters.
return $pp_name;
};
# Now have to choose one result. If the object is not an instance of
# the generic autoqc result class, there will be an error at this point.
# Making the code less specific is not worth the effort at this point.
my @on_spec = grep { $pp_name2spec->($_->pp_name) eq $spec } @{$obj_a};
@on_spec or croak "No autoqc $class_name result for $spec";
$obj_a = \@on_spec;
}

(@{$obj_a} == 1) or croak "Multiple autoqc results for evaluation of '$e'";
my $obj = $obj_a->[0];

$obj or croak "No autoqc result for evaluation of '$e'";

# Prepare the expression from the robo config for evaluation.
my $placeholder = $class_name;
$spec and ($placeholder .= $CLASS_NAME_SPEC_DELIM . $spec);
my $replacement = q[$] . q[result->];
my @names = _class_names_with_spec_from_expression($e);
my $results = {};
my $perl_e = $e;
$perl_e =~ s/$placeholder[.]/$replacement/xmsg;

my $evaluator = sub { # Evaluation function
my $result = shift;
# Force an error when operations on undefined values are
# are attempted.
use warnings FATAL => 'uninitialized';
##no critic (BuiltinFunctions::ProhibitStringyEval)
my $o = eval $perl_e; # Evaluate Perl string expression
##use critic
if ($EVAL_ERROR) {
my $err = $EVAL_ERROR;
croak "Error evaluating expression '$perl_e' derived from '$e': $err";
# Prepare the expression from the robo config for evaluation.
foreach my $name (@names) {
my ($class_name, $spec) = split /$CLASS_NAME_SPEC_DELIM/smx, $name;
my $obj_a = $self->_results->{$class_name};
# We should not get this far with an error in the configuration
# file, but just in case...
$obj_a and @{$obj_a} or croak "No $class_name autoqc result for evaluation of '$e'";
if ($spec) {
# Now have to choose one result. If the object is not an instance of
# the generic autoqc result class, there will be an error at this point.
# Making the code less specific is not worth the effort at this point.
my @on_spec = grep { _pp_name2spec($_->pp_name) eq $spec } @{$obj_a};
@on_spec or croak "No $name autoqc result for evaluation of '$e'";
$obj_a = \@on_spec;
}
return $o ? 1 : 0;
};
(@{$obj_a} == 1) or croak "Multiple $name autoqc results for evaluation of '$e'";
my $obj = $obj_a->[0];
$obj or croak "No $class_name autoqc result for evaluation of '$e'";
$results->{$name} = $obj;

## no critic (ValuesAndExpressions::RequireInterpolationOfMetachars)
my $replacement = q[$results->{'] . $name . q['}->];
## use critic
$perl_e =~ s/$name[.]/$replacement/xmsg;
}

# Evaluate and return the outcome.
return $evaluator->($obj);
return _get_evaluation_result($perl_e, $e, $results);
}

__PACKAGE__->meta->make_immutable();
Expand Down
19 changes: 13 additions & 6 deletions t/60-autoqc-checks-review.t
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ subtest 'single expression evaluation' => sub {
rpt_list => $rpt_list);

throws_ok {$check->_evaluate_expression('verify_bam.freemix < 0.01')}
qr/No autoqc result for evaluation of/,
qr/No verify_bam autoqc result for evaluation of/,
'error if check name is unknown';
throws_ok {$check->_evaluate_expression('verify_bam_id.freemix_free < 0.01')}
qr/Can't locate object method \"freemix_free\"/,
Expand Down Expand Up @@ -646,7 +646,7 @@ subtest 'evaluating generic for artic results' => sub {
final_qc_outcome => 1);
is ($check->can_run, 1, 'check can run');
throws_ok { $check->execute }
qr/Not able to run evaluation: No autoqc generic result for ncov2019_artic_nf/,
qr/Not able to run evaluation: No generic:ncov2019_artic_nf autoqc result/,
'message as an error';

# qc_in contains two artic generic results for the same product
Expand All @@ -657,7 +657,7 @@ subtest 'evaluating generic for artic results' => sub {
final_qc_outcome => 1);
is ($check->can_run, 1, 'check can run');
throws_ok { $check->execute }
qr/Not able to run evaluation: Multiple autoqc results/,
qr/Not able to run evaluation: Multiple generic:ncov2019_artic_nf autoqc results/,
'message as an error';

# qc_in contains other autoqc results for this entity, including
Expand Down Expand Up @@ -812,7 +812,7 @@ subtest 'evaluating generic for artic results' => sub {
};

subtest 'evaluating for LCMB library type' => sub {
plan tests => 14;
plan tests => 17;

my $test_data_path = 't/data/runfolder_49285';
my $runfolder_name = '240802_A00537_1044_BHJKCGDSXC';
Expand All @@ -834,6 +834,7 @@ subtest 'evaluating for LCMB library type' => sub {
verbose => 0
)->load();

# Sample-level evaluation
my $check = npg_qc::autoqc::checks::review->new(
runfolder_path => $runfolder_path,
conf_path => $test_data_path,
Expand All @@ -849,6 +850,7 @@ subtest 'evaluating for LCMB library type' => sub {
'verify_bam_id.pass'
);
my $result = $check->result();
is ($result->comments(), undef, 'no comments');
is_deeply ($result->evaluation_results(), \%expected_evaluation_results,
'sample evaluation results as expected');
is ($result->pass, 1, 'the check passed');
Expand All @@ -860,6 +862,7 @@ subtest 'evaluating for LCMB library type' => sub {
'sample QC outcome is saved correctly'
);

# Lane-level evaluation
$check = npg_qc::autoqc::checks::review->new(
runfolder_path => $runfolder_path,
conf_path => $test_data_path,
Expand All @@ -869,10 +872,13 @@ subtest 'evaluating for LCMB library type' => sub {
);
lives_ok { $check->execute() } 'lane level check runs OK';
$result = $check->result();
is ($result->comments(), undef, 'no comments');
%expected_evaluation_results = (
'tag_metrics.matches_pf_percent && (tag_metrics.perfect_matches_percent +' .
' tag_metrics.one_mismatch_percent) > 93' => 1,
'tag_metrics.all_reads * 302 > 750000000000' => 1
' tag_metrics.one_mismatch_percent) > 93' => 1,
'tag_metrics.all_reads * 302 > 750000000000' => 1,
'tag_metrics.all_reads && (((qX_yield.yield1_q30 + qX_yield.yield2_q30) ' .
'* 1000 * 100)/(tag_metrics.all_reads * 302) >= 78)' => 1,
);
is_deeply ($result->evaluation_results(), \%expected_evaluation_results,
'lane evaluation results as expected');
Expand All @@ -897,6 +903,7 @@ subtest 'evaluating for LCMB library type' => sub {
$expected_evaluation_results{$key} = 0;
}
$result = $check->result();
is ($result->comments(), undef, 'no comments');
is_deeply ($result->evaluation_results(), \%expected_evaluation_results,
'lane evaluation results as expected');
is ($result->pass, 0, 'the check failed');
Expand Down
1 change: 1 addition & 0 deletions t/data/runfolder_49285/product_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,5 @@ default:
acceptance_criteria:
- "tag_metrics.matches_pf_percent && (tag_metrics.perfect_matches_percent + tag_metrics.one_mismatch_percent) > 93"
- "tag_metrics.all_reads * 302 > 750000000000"
- "tag_metrics.all_reads && (((qX_yield.yield1_q30 + qX_yield.yield2_q30) * 1000 * 100)/(tag_metrics.all_reads * 302) >= 78)"

0 comments on commit 24e32ce

Please sign in to comment.