diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 3b984087..7afb8bef 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -34,7 +34,8 @@ jobs: sudo apt-get update # https://github.com/actions/runner-images/issues/2139 sudo apt-get remove -y nginx libgd3 - sudo apt-get install -y libgd-dev uuid-dev libgd-text-perl + # libdevel-patchperl-perl is needed for perlbrew + sudo apt-get install -y libgd-dev uuid-dev libgd-text-perl libdevel-patchperl-perl - name: "Install Miniforge" run: | diff --git a/Changes b/Changes index 3014ee17..b6dfbd43 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,15 @@ LIST OF CHANGES FOR NPG-QC PACKAGE +release 74.0.1 (2024-12-17) + - Fix perlbrew installation by installing libdevel-patchperl-perl in runner + - Fixes/updates for lane-level RoboQC lane-level assessment: + - Excluded study-specific lane-lane assessment in npg_qc::autoqc::check::review + This helps to avoid uncertainty in evaluating lanes where samples belong to + different studies at a price of not being able to perform study-specific + evaluation of one-library lanes. + - Updated the expression evaluator to allow for multiple types of + result objects to be used in a single expression. + release 74.0.0 (2024-12-02) - npg_qc::autoqc::qc_store - changed loading of ORM classes into memory to on demand at run time. diff --git a/lib/npg_qc/autoqc/checks/review.pm b/lib/npg_qc/autoqc/checks/review.pm index 8be4800e..fb69c820 100644 --- a/lib/npg_qc/autoqc/checks/review.pm +++ b/lib/npg_qc/autoqc/checks/review.pm @@ -24,16 +24,15 @@ our $VERSION = '0'; Readonly::Scalar my $CONJUNCTION_OP => q[and]; Readonly::Scalar my $DISJUNCTION_OP => q[or]; -Readonly::Scalar my $ROBO_KEY => q[robo_qc]; -Readonly::Scalar my $CRITERIA_KEY => q[criteria]; -Readonly::Scalar my $QC_TYPE_KEY => q[qc_type]; +Readonly::Scalar my $ROBO_KEY => q[robo_qc]; +Readonly::Scalar my $CRITERIA_KEY => q[criteria]; Readonly::Scalar my $APPLICABILITY_CRITERIA_KEY => q[applicability_criteria]; Readonly::Scalar my $LIMS_APPLICABILITY_CRITERIA_KEY => q[lims]; Readonly::Scalar my $SEQ_APPLICABILITY_CRITERIA_KEY => q[sequencing_run]; Readonly::Array my @APPLICABILITY_CRITERIA_TYPES => ( $LIMS_APPLICABILITY_CRITERIA_KEY, $SEQ_APPLICABILITY_CRITERIA_KEY ); -Readonly::Scalar my $ACCEPTANCE_CRITERIA_KEY => q[acceptance_criteria]; +Readonly::Scalar my $ACCEPTANCE_CRITERIA_KEY => q[acceptance_criteria]; Readonly::Scalar my $QC_TYPE_LIB => q[mqc]; Readonly::Scalar my $QC_TYPE_SEQ => q[mqc_seq]; @@ -41,8 +40,20 @@ Readonly::Scalar my $QC_TYPE_SEQ => q[mqc_seq]; Readonly::Scalar my $TIMESTAMP_FORMAT_WOFFSET => q[%Y-%m-%dT%T%z]; Readonly::Scalar my $CLASS_NAME_SPEC_DELIM => q[:]; -Readonly::Scalar my $CLASS_NAME_SPEC_RE => - qr/\A(?:\W+)? (\w+) (?: $CLASS_NAME_SPEC_DELIM (\w+))?[.]/xms; +Readonly::Scalar my $CLASS_NAME_SPEC_RE => + qr{ + # Examples: + # "generic:ncov2019_artic_nf.doc->{meta}->{'num_input_reads'} > 10000" + # "tag_metrics.all_reads && (qX_yield.yield1_q30/(tag_metrics.all_reads * 302) > 78)" + + (?:\W+)? # optional non-word characters + ( # start of capture group + [[:alpha:]_]+ # result class name + (?: $CLASS_NAME_SPEC_DELIM \w+)? # an optional spec for a generic result + # class prepended by a delimeter + ) # end of capture group + [.] # ends with a dot + }xms; ## no critic (Documentation::RequirePodAtEnd) @@ -74,7 +85,8 @@ outcomes. The C section of the product configuration file sits either within the configuration for a particular study or in the C section, or in both locations. For a given entity a study-specific RoboQC definition -takes precedence over the default one. +takes precedence over the default one. For lanes only the C section +of the product configuration file is examined. Evaluation criteria for samples vary depending on the sequencing instrument and flowcell type, library type, sample reference, etc. Each of the @@ -487,14 +499,24 @@ has '_robo_config' => ( sub _build__robo_config { my $self = shift; - my $strict = 1; # Parse study section only, ignore the default section. - my $config = $self->study_config($self->lims(), $strict); - if ($config) { - $config = $config->{$ROBO_KEY}; + my $config; + + # Do not examine study-specific section for lane entities, meaning that + # one-sample lanes, pools or not, cannot be evaluated according to the + # study criteria of their sample. Helps to avoid uncertainty of dealing + # with pool's samples belonging to different studies. + if (!$self->lims->is_lane) { + my $strict = 1; # Parse study section only, ignore the 'default' section. + $config = $self->study_config($self->lims(), $strict); + if ($config) { + $config = $config->{$ROBO_KEY}; + } + if (!$config) { + carp 'Study-specific RoboQC config not found for ' . $self->_entity_desc; + } } - if (!$config) { - carp 'Study-specific RoboQC config not found for ' . $self->_entity_desc; + if (!$config) { # Look at the 'default' section of the configuration file. $config = $self->default_study_config()->{$ROBO_KEY}; } @@ -652,7 +674,7 @@ sub _build__result_class_names { my $self = shift; my @class_names = uniq sort - map { (_class_name_from_expression($_))[0] } + map { _class_names_from_expression($_) } map { @{$_} } # dereference the array values %{$self->_criteria()}; @@ -721,11 +743,26 @@ sub _build__results { return \%h; } -sub _class_name_from_expression { +sub _class_names_with_spec_from_expression { + my $e = shift; + # Note 'g' flag at the end of the regexp. We will get a list of matches. + # The list might contain multiple class names for the same class. Depending + # on the expression, the return value contains one or more class names. + my @extended_class_names = $e =~ /$CLASS_NAME_SPEC_RE/xmsg; + @extended_class_names = uniq @extended_class_names; # Remove repetitions. + @extended_class_names or croak "Failed to infer class names from $e"; + return @extended_class_names; +} + +sub _class_names_from_expression { my $e = shift; - my ($class_name, $spec) = $e =~ $CLASS_NAME_SPEC_RE; - $class_name or croak "Failed to infer class name from $e"; - return ($class_name, $spec); + my @names = (); + foreach my $name (_class_names_with_spec_from_expression($e)) { + ($name) = $name =~ /\A(\w+)/smx; # Remove the spec part if present. + push @names, $name; + } + @names = uniq @names; + return @names; } ##### @@ -785,6 +822,30 @@ sub _apply_operator { return $outcome ? 1 : 0; } +sub _pp_name2spec { + # To get a match with what would have been used in the robo config, + # replace all 'non-word' characters. + my $pp_name = shift; + $pp_name =~ s/\W/_/gsmx; + return $pp_name; +}; + +sub _get_evaluation_result { + my ($perl_e, $e, $results) = @_; + + use warnings FATAL => 'uninitialized'; # Force an error when operations on + # undefined values are attempted. + ##no critic (BuiltinFunctions::ProhibitStringyEval) + my $o = eval $perl_e; # Evaluate Perl expression + ##use critic + if ($EVAL_ERROR) { + my $err = $EVAL_ERROR; + croak "Error evaluating expression '$perl_e' derived from '$e': $err"; + } + + return $o ? 1 : 0; +} + ##### # Evaluates a single expression in the context of available autoqc results. # If runs successfully, returns 0 or 1, otherwise throws an error. @@ -792,55 +853,37 @@ sub _apply_operator { sub _evaluate_expression { my ($self, $e) = @_; - my ($class_name, $spec) = _class_name_from_expression($e); - my $obj_a = $self->_results->{$class_name}; - # We should not get this far with an error in the configuration - # file, but just in case... - $obj_a and @{$obj_a} or croak "No autoqc result for evaluation of '$e'"; - - if ($spec) { - my $pp_name2spec = sub { # To get a match with what would have been - my $pp_name = shift; # used in the robo config, replace all - $pp_name =~ s/\W/_/gsmx; # 'non-word' characters. - return $pp_name; - }; - # Now have to choose one result. If the object is not an instance of - # the generic autoqc result class, there will be an error at this point. - # Making the code less specific is not worth the effort at this point. - my @on_spec = grep { $pp_name2spec->($_->pp_name) eq $spec } @{$obj_a}; - @on_spec or croak "No autoqc $class_name result for $spec"; - $obj_a = \@on_spec; - } - - (@{$obj_a} == 1) or croak "Multiple autoqc results for evaluation of '$e'"; - my $obj = $obj_a->[0]; - - $obj or croak "No autoqc result for evaluation of '$e'"; - - # Prepare the expression from the robo config for evaluation. - my $placeholder = $class_name; - $spec and ($placeholder .= $CLASS_NAME_SPEC_DELIM . $spec); - my $replacement = q[$] . q[result->]; + my @names = _class_names_with_spec_from_expression($e); + my $results = {}; my $perl_e = $e; - $perl_e =~ s/$placeholder[.]/$replacement/xmsg; - - my $evaluator = sub { # Evaluation function - my $result = shift; - # Force an error when operations on undefined values are - # are attempted. - use warnings FATAL => 'uninitialized'; - ##no critic (BuiltinFunctions::ProhibitStringyEval) - my $o = eval $perl_e; # Evaluate Perl string expression - ##use critic - if ($EVAL_ERROR) { - my $err = $EVAL_ERROR; - croak "Error evaluating expression '$perl_e' derived from '$e': $err"; + # Prepare the expression from the robo config for evaluation. + foreach my $name (@names) { + my ($class_name, $spec) = split /$CLASS_NAME_SPEC_DELIM/smx, $name; + my $obj_a = $self->_results->{$class_name}; + # We should not get this far with an error in the configuration + # file, but just in case... + $obj_a and @{$obj_a} or croak "No $class_name autoqc result for evaluation of '$e'"; + if ($spec) { + # Now have to choose one result. If the object is not an instance of + # the generic autoqc result class, there will be an error at this point. + # Making the code less specific is not worth the effort at this point. + my @on_spec = grep { _pp_name2spec($_->pp_name) eq $spec } @{$obj_a}; + @on_spec or croak "No $name autoqc result for evaluation of '$e'"; + $obj_a = \@on_spec; } - return $o ? 1 : 0; - }; + (@{$obj_a} == 1) or croak "Multiple $name autoqc results for evaluation of '$e'"; + my $obj = $obj_a->[0]; + $obj or croak "No $class_name autoqc result for evaluation of '$e'"; + $results->{$name} = $obj; + + ## no critic (ValuesAndExpressions::RequireInterpolationOfMetachars) + my $replacement = q[$results->{'] . $name . q['}->]; + ## use critic + $perl_e =~ s/$name[.]/$replacement/xmsg; + } # Evaluate and return the outcome. - return $evaluator->($obj); + return _get_evaluation_result($perl_e, $e, $results); } __PACKAGE__->meta->make_immutable(); diff --git a/scripts/install_miniforge.sh b/scripts/install_miniforge.sh index 9082c716..48f4e095 100755 --- a/scripts/install_miniforge.sh +++ b/scripts/install_miniforge.sh @@ -8,8 +8,8 @@ set -ex -MINIFORGE_VERSION="24.9.0-0" -MINIFORGE_SHA256="77fb505f6266ffa1b5d59604cf6ba25948627e908928cbff148813957b1c28af" +MINIFORGE_VERSION="24.9.2-0" +MINIFORGE_SHA256="ca8c544254c40ae5192eb7db4e133ff4eb9f942a1fec737dba8205ac3f626322" CONDA_HOME=${CONDA_HOME:="$HOME/conda"} export CONDA_HOME @@ -27,7 +27,7 @@ channels: - conda-forge EOF -curl -sSL "https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/Mambaforge-${MINIFORGE_VERSION}-Linux-x86_64.sh" -o ./miniforge.sh +curl -sSL "https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/Miniforge3-${MINIFORGE_VERSION}-Linux-x86_64.sh" -o ./miniforge.sh sha256sum ./miniforge.sh | grep "$MINIFORGE_SHA256" /bin/bash ./miniforge.sh -b -p "$CONDA_HOME" rm ./miniforge.sh diff --git a/t/60-autoqc-checks-review.t b/t/60-autoqc-checks-review.t index ac7ebd23..01f6e35b 100644 --- a/t/60-autoqc-checks-review.t +++ b/t/60-autoqc-checks-review.t @@ -38,7 +38,7 @@ my $schema = Moose::Meta::Class->create_anon_class( ->create_test_db(q[npg_qc::Schema]); subtest 'constructing object, deciding whether to run' => sub { - plan tests => 33; + plan tests => 34; my $check = npg_qc::autoqc::checks::review->new( conf_path => $test_data_dir, @@ -148,6 +148,15 @@ subtest 'constructing object, deciding whether to run' => sub { } 'can set lims via the constructor'; ok ($check->can_run, 'can_run returns true'); + # No robo config in the default section, all samples belong to the same + # study, for which robo config is defined. + $check = npg_qc::autoqc::checks::review->new( + conf_path => "$test_data_dir/with_criteria", + qc_in => $test_data_dir, + rpt_list => '27483:1' + ); + ok (!$check->can_run, 'can_run returns false for a lane'); + $check = npg_qc::autoqc::checks::review->new( conf_path => "$test_data_dir/no_applicability4single", qc_in => $test_data_dir, @@ -405,7 +414,7 @@ subtest 'single expression evaluation' => sub { rpt_list => $rpt_list); throws_ok {$check->_evaluate_expression('verify_bam.freemix < 0.01')} - qr/No autoqc result for evaluation of/, + qr/No verify_bam autoqc result for evaluation of/, 'error if check name is unknown'; throws_ok {$check->_evaluate_expression('verify_bam_id.freemix_free < 0.01')} qr/Can't locate object method \"freemix_free\"/, @@ -646,7 +655,7 @@ subtest 'evaluating generic for artic results' => sub { final_qc_outcome => 1); is ($check->can_run, 1, 'check can run'); throws_ok { $check->execute } - qr/Not able to run evaluation: No autoqc generic result for ncov2019_artic_nf/, + qr/Not able to run evaluation: No generic:ncov2019_artic_nf autoqc result/, 'message as an error'; # qc_in contains two artic generic results for the same product @@ -657,7 +666,7 @@ subtest 'evaluating generic for artic results' => sub { final_qc_outcome => 1); is ($check->can_run, 1, 'check can run'); throws_ok { $check->execute } - qr/Not able to run evaluation: Multiple autoqc results/, + qr/Not able to run evaluation: Multiple generic:ncov2019_artic_nf autoqc results/, 'message as an error'; # qc_in contains other autoqc results for this entity, including @@ -812,14 +821,14 @@ subtest 'evaluating generic for artic results' => sub { }; subtest 'evaluating for LCMB library type' => sub { - plan tests => 14; + plan tests => 19; my $test_data_path = 't/data/runfolder_49285'; my $runfolder_name = '240802_A00537_1044_BHJKCGDSXC'; my $staging_dir = tempdir( CLEANUP => 1 ); my $id_run = 49285; local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = - 't/data/runfolder_49285/samplesheet_49285.csv'; + "$test_data_path/samplesheet_49285.csv"; my $ae = Archive::Extract->new( archive => "${test_data_path}/${runfolder_name}.tar.gz" @@ -834,6 +843,7 @@ subtest 'evaluating for LCMB library type' => sub { verbose => 0 )->load(); + # Sample-level evaluation my $check = npg_qc::autoqc::checks::review->new( runfolder_path => $runfolder_path, conf_path => $test_data_path, @@ -849,6 +859,7 @@ subtest 'evaluating for LCMB library type' => sub { 'verify_bam_id.pass' ); my $result = $check->result(); + is ($result->comments(), undef, 'no comments'); is_deeply ($result->evaluation_results(), \%expected_evaluation_results, 'sample evaluation results as expected'); is ($result->pass, 1, 'the check passed'); @@ -860,6 +871,7 @@ subtest 'evaluating for LCMB library type' => sub { 'sample QC outcome is saved correctly' ); + # Lane-level evaluation $check = npg_qc::autoqc::checks::review->new( runfolder_path => $runfolder_path, conf_path => $test_data_path, @@ -869,10 +881,13 @@ subtest 'evaluating for LCMB library type' => sub { ); lives_ok { $check->execute() } 'lane level check runs OK'; $result = $check->result(); + is ($result->comments(), undef, 'no comments'); %expected_evaluation_results = ( 'tag_metrics.matches_pf_percent && (tag_metrics.perfect_matches_percent +' . - ' tag_metrics.one_mismatch_percent) > 93' => 1, - 'tag_metrics.all_reads * 302 > 750000000000' => 1 + ' tag_metrics.one_mismatch_percent) > 93' => 1, + 'tag_metrics.all_reads * 302 > 750000000000' => 1, + 'tag_metrics.all_reads && (((qX_yield.yield1_q30 + qX_yield.yield2_q30) ' . + '* 1000 * 100)/(tag_metrics.all_reads * 302) >= 78)' => 1, ); is_deeply ($result->evaluation_results(), \%expected_evaluation_results, 'lane evaluation results as expected'); @@ -897,6 +912,7 @@ subtest 'evaluating for LCMB library type' => sub { $expected_evaluation_results{$key} = 0; } $result = $check->result(); + is ($result->comments(), undef, 'no comments'); is_deeply ($result->evaluation_results(), \%expected_evaluation_results, 'lane evaluation results as expected'); is ($result->pass, 0, 'the check failed'); @@ -906,6 +922,18 @@ subtest 'evaluating for LCMB library type' => sub { {'mqc_seq_outcome' => 'Rejected preliminary', 'username' => 'robo_qc'}, 'lane QC outcome is saved correctly' ); + + $check = npg_qc::autoqc::checks::review->new( + runfolder_path => $runfolder_path, + conf_path => $test_data_path, + rpt_list => "${id_run}:4", + use_db => 1, + _qc_schema => $schema + ); + my $with_control = 0; + is_deeply ([$check->lims->study_ids($with_control)], [qw(7396 7397)], + 'lane 4 samples belong to two different studies'); + lives_and (sub {is $check->can_run, 1}, 'lane-level check can run'); }; 1; diff --git a/t/data/runfolder_49285/product_release.yml b/t/data/runfolder_49285/product_release.yml index fdb1a4c7..23df7387 100644 --- a/t/data/runfolder_49285/product_release.yml +++ b/t/data/runfolder_49285/product_release.yml @@ -37,4 +37,5 @@ default: acceptance_criteria: - "tag_metrics.matches_pf_percent && (tag_metrics.perfect_matches_percent + tag_metrics.one_mismatch_percent) > 93" - "tag_metrics.all_reads * 302 > 750000000000" + - "tag_metrics.all_reads && (((qX_yield.yield1_q30 + qX_yield.yield2_q30) * 1000 * 100)/(tag_metrics.all_reads * 302) >= 78)" diff --git a/t/data/runfolder_49285/samplesheet_49285.csv b/t/data/runfolder_49285/samplesheet_49285.csv index a9d2cccb..ccfaf6cf 100644 --- a/t/data/runfolder_49285/samplesheet_49285.csv +++ b/t/data/runfolder_49285/samplesheet_49285.csv @@ -26,5 +26,5 @@ Lane,Sample_ID,Sample_Name,GenomeFolder,Index,Index2,bait_name,default_library_t 4,71446140,EGAN00004549103,,CGTCGTCG,CTCAGAAA,,LCMB,CGTCGTCG,CTCAGAAA,,0,0,72659884,3,71446140,,9606,from:450 to:450,Normal,Homo sapiens,0,,,9725055,0,7396STDY14893009,,888,1,0,0,7396,Some interesting study,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,3, 4,71446152,EGAN00004549109,,TTCTCTTT,TCAGCCTG,,LCMB,TTCTCTTT,TCAGCCTG,,0,0,72659884,3,71446152,,9606,from:450 to:450,Normal,Homo sapiens,0,,,9725056,0,7396STDY14893010,,888,1,0,0,7396,Some interesting study,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,4, 4,71446164,EGAN00004549107,,TCTCATAT,TTTGCACC,,LCMB,TCTCATAT,TTTGCACC,,0,0,72659884,3,71446164,,9606,from:450 to:450,Normal,Homo sapiens,0,,,9725057,0,7396STDY14893011,,888,1,0,0,7396,Some interesting study,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,5, -4,71446176,EGAN00004549106,,TCGGGCTG,CGATCTGG,,LCMB,TCGGGCTG,CGATCTGG,,0,0,72659884,3,71446176,,9606,from:450 to:450,Normal,Homo sapiens,0,,,9725058,0,7396STDY14893012,,888,1,0,0,7396,Some interesting study,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,6, -4,51702672,phiX_for_spiked_buffers,,TGTGCAGC,ACTGATGT,,,TGTGCAGC,ACTGATGT,,1,0,72659884,3,51702672,,10847,,,,0,,,1255141,,phiX_for_spiked_buffers,PhiX (Sanger-SNPs),888,1,0,0,198,Illumina Controls, ,0,888, \ No newline at end of file +4,71446176,EGAN00004549106,,TCGGGCTG,CGATCTGG,,LCMB,TCGGGCTG,CGATCTGG,,0,0,72659884,3,71446176,,9606,from:450 to:450,Normal,Homo sapiens,0,,,9725058,0,7396STDY14893012,,888,1,0,0,7397,One more interesting study,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,6, +4,51702672,phiX_for_spiked_buffers,,TGTGCAGC,ACTGATGT,,,TGTGCAGC,ACTGATGT,,1,0,72659884,3,51702672,,10847,,,,0,,,1255141,,phiX_for_spiked_buffers,PhiX (Sanger-SNPs),888,1,0,0,198,Illumina Controls, ,0,888,