From af9c7dabc5b1939e384dc11c461243a3d5a2ac0b Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Wed, 18 Sep 2024 16:59:59 +0100 Subject: [PATCH] Disallow emply applicability criteria --- Changes | 8 ++++ MANIFEST | 2 + lib/npg_qc/autoqc/checks/review.pm | 44 ++++++++++++------- t/60-autoqc-checks-review.t | 38 +++++++++++----- .../product_release.yml | 36 +++++++++++++++ .../review/mqc_type/product_release.yml | 2 + .../product_release.yml | 37 ++++++++++++++++ 7 files changed, 141 insertions(+), 26 deletions(-) create mode 100644 t/data/autoqc/review/lims_applicability_empty/product_release.yml create mode 100644 t/data/autoqc/review/no_known_applicability_type/product_release.yml diff --git a/Changes b/Changes index f03a4be0..d2b04cd8 100644 --- a/Changes +++ b/Changes @@ -16,6 +16,14 @@ LIST OF CHANGES FOR NPG-QC PACKAGE consideration. The library_type attribute is now set when possible, no error if it is undefined. + Previously the code allowed for an empty applicability_criteria hash, which + resulted in a particular set of QC criteria being applied to every and any + product. Very early on this was an intended behaviour for UKB data. The main + filter was the study id. In the default section of the product configuration + file there is no external filter, so there is a real danger of the review + check being run indiscriminately for any product. While this will never be + an intension, small errors in the YML file might have this effect. + release 72.2.0 (2024-08-30) - npg_qc::autoqc::check::review: 1. To enable access to information about a sequencing run (from RunInfo.xml, diff --git a/MANIFEST b/MANIFEST index f41f24c7..7951deeb 100644 --- a/MANIFEST +++ b/MANIFEST @@ -520,6 +520,8 @@ t/data/autoqc/review/with_na_criteria/product_release.yml t/data/autoqc/review/no_criteria_section/product_release.yml t/data/autoqc/review/not_hash/product_release.yml t/data/autoqc/review/mqc_type/product_release.yml +t/data/autoqc/review/lims_applicability_empty/product_release.yml +t/data/autoqc/review/no_known_applicability_type/product_release.yml t/data/autoqc/review/unknown_qc_type/product_release.yml t/data/autoqc/review/uqc_type/product_release.yml t/data/autoqc/review/default_and_study_section/product_release.yml diff --git a/lib/npg_qc/autoqc/checks/review.pm b/lib/npg_qc/autoqc/checks/review.pm index 4c1972ab..85eb3bc5 100644 --- a/lib/npg_qc/autoqc/checks/review.pm +++ b/lib/npg_qc/autoqc/checks/review.pm @@ -29,6 +29,9 @@ Readonly::Scalar my $QC_TYPE_KEY => q[qc_type]; Readonly::Scalar my $APPLICABILITY_CRITERIA_KEY => q[applicability_criteria]; Readonly::Scalar my $LIMS_APPLICABILITY_CRITERIA_KEY => q[lims]; Readonly::Scalar my $SEQ_APPLICABILITY_CRITERIA_KEY => q[sequencing_run]; +Readonly::Array my @APPLICABILITY_CRITERIA_TYPES => ( + $LIMS_APPLICABILITY_CRITERIA_KEY, $SEQ_APPLICABILITY_CRITERIA_KEY + ); Readonly::Scalar my $ACCEPTANCE_CRITERIA_KEY => q[acceptance_criteria]; Readonly::Scalar my $QC_TYPE_DEFAULT => q[mqc]; @@ -514,31 +517,42 @@ has '_applicable_criteria' => ( sub _build__applicable_criteria { my $self = shift; - my $criteria_objs = $self->_robo_config->{$CRITERIA_KEY}; my @applicable = (); - foreach my $co ( @{$criteria_objs} ) { + foreach my $criteria_definition ( @{$self->_robo_config->{$CRITERIA_KEY}} ) { + + my $applicability_definition = $criteria_definition->{$APPLICABILITY_CRITERIA_KEY}; + $applicability_definition or croak + "$APPLICABILITY_CRITERIA_KEY is not defined for one of RoboQC criteria"; + my $c_applicable = 1; - for my $c_type ($LIMS_APPLICABILITY_CRITERIA_KEY, $SEQ_APPLICABILITY_CRITERIA_KEY) { - my $c = $co->{$APPLICABILITY_CRITERIA_KEY}->{$c_type}; - if ($c && !$self->_applicability($c, $c_type)) { - $c_applicable = 0; - last; - } + my $one_found = 0; + for my $c_type (@APPLICABILITY_CRITERIA_TYPES) { + exists $applicability_definition->{$c_type} or next; + $one_found = 1; + my $ac = $applicability_definition->{$c_type}; + (defined $ac and keys %{$ac}) or croak + "$c_type type applicability criteria is not defined"; + $c_applicable = $self->_is_applicable($c_type, $ac); + !$c_applicable && last; # Stop on the first non applicable. } - $c_applicable or next; - push @applicable, $co; + $one_found or croak 'None of known applicability type criteria is defined. ' . + 'Known types: ' . join q[, ], @APPLICABILITY_CRITERIA_TYPES; + $c_applicable && push @applicable, $criteria_definition; # Save if fully applicable. } return \@applicable; } -sub _applicability { - my ($self, $acriteria, $criteria_type) = @_; +sub _is_applicable { + my ($self, $criteria_type, $acriteria) = @_; - ($acriteria && $criteria_type) or croak - 'The criterium and its type type should be defined'; + $criteria_type or croak + 'Applicability criteria type is not defined'; + $acriteria or croak + "$criteria_type applicability criteria is not defined"; (ref $acriteria eq 'HASH') or croak sprintf - '%s section should be a hash in a robo config for %', $criteria_type, $self->_entity_desc; + '%s section should be a hash in a robo config for %', + $criteria_type, $self->_entity_desc; my $test = {}; foreach my $prop ( keys %{$acriteria} ) { diff --git a/t/60-autoqc-checks-review.t b/t/60-autoqc-checks-review.t index b2f74853..968dd2a2 100644 --- a/t/60-autoqc-checks-review.t +++ b/t/60-autoqc-checks-review.t @@ -33,7 +33,7 @@ my $criteria_list = [ ]; subtest 'constructing object, deciding whether to run' => sub { - plan tests => 29; + plan tests => 33; my $check = npg_qc::autoqc::checks::review->new( conf_path => $test_data_dir, @@ -147,8 +147,10 @@ subtest 'constructing object, deciding whether to run' => sub { conf_path => "$test_data_dir/no_applicability4single", qc_in => $test_data_dir, rpt_list => '27483:1:2'); - ok ($check->can_run, 'can_run returns true'); - ok (!$check->result->comments, 'No comments logged'); + ok (!$check->can_run, 'can_run returns false'); + like ($check->result->comments, + qr/applicability_criteria is not defined for one of RoboQC criteria/, + 'Error logged'); $check = npg_qc::autoqc::checks::review->new( conf_path => "$test_data_dir/with_na_criteria", @@ -158,10 +160,31 @@ subtest 'constructing object, deciding whether to run' => sub { is ($check->result->comments, 'None of the RoboQC applicability criteria is satisfied', 'Comment logged'); + + local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = + 't/data/autoqc/review/samplesheet_29524.csv'; + + $check = npg_qc::autoqc::checks::review->new( + conf_path => "$test_data_dir/no_known_applicability_type", + qc_in => $test_data_dir, + rpt_list => '27483:1:2'); + ok (!$check->can_run, 'can_run returns false'); + like ($check->result->comments, + qr/None of known applicability type criteria is defined/, + 'Error logged'); + + $check = npg_qc::autoqc::checks::review->new( + conf_path => "$test_data_dir/lims_applicability_empty", + qc_in => $test_data_dir, + rpt_list => '27483:1:2'); + ok (!$check->can_run, 'can_run returns false'); + like ($check->result->comments, + qr/lims type applicability criteria is not defined/, + 'Error logged'); }; subtest 'caching appropriate criteria object' => sub { - plan tests => 3; + plan tests => 2; my $check = npg_qc::autoqc::checks::review->new( conf_path => "$test_data_dir/with_criteria", @@ -171,13 +194,6 @@ subtest 'caching appropriate criteria object' => sub { is_deeply ($check->_criteria, {'and' => \@list}, 'criteria parsed correctly'); - $check = npg_qc::autoqc::checks::review->new( - conf_path => "$test_data_dir/no_applicability4single", - qc_in => $test_data_dir, - rpt_list => '27483:1:2'); - is_deeply ($check->_criteria, {'and' => $criteria_list}, - 'criteria parsed correctly'); - $check = npg_qc::autoqc::checks::review->new( conf_path => "$test_data_dir/with_na_criteria", qc_in => $test_data_dir, diff --git a/t/data/autoqc/review/lims_applicability_empty/product_release.yml b/t/data/autoqc/review/lims_applicability_empty/product_release.yml new file mode 100644 index 00000000..f76137d5 --- /dev/null +++ b/t/data/autoqc/review/lims_applicability_empty/product_release.yml @@ -0,0 +1,36 @@ +--- +default: + s3: + enable: false + url: null + notify: false + irods: + enable: true + notify: false + +study: + - study_id: "5392" + s3: + enable: true + url: "gs://profile_one-europe-west2" + date_binning: true + customer_name: "UK Organisation" + profile: "profile_one" + notify: true + receipts: "/data_product_receipts/5392/" + irods: + enable: false + notify: true + merge: + component_cache_dir: "/merge_component_cache/5392/" + robo_qc: + qc_type: "mqc" + criteria: + - applicability_criteria: + lims: + acceptance_criteria : + - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95" + - "bam_flagstats.target_mapped_bases > 85_000_000_000" + - "bam_flagstats.target_percent_gt_coverage_threshold > 95" + - "verify_bam_id.freemix < 0.01" + - "( bcfstats.genotypes_nrd_dividend / bcfstats.genotypes_nrd_divisor ) < 0.02" diff --git a/t/data/autoqc/review/mqc_type/product_release.yml b/t/data/autoqc/review/mqc_type/product_release.yml index b0db0bd6..7fcdc3d8 100644 --- a/t/data/autoqc/review/mqc_type/product_release.yml +++ b/t/data/autoqc/review/mqc_type/product_release.yml @@ -27,6 +27,8 @@ study: qc_type: "mqc" criteria: - applicability_criteria: + lims: + library_type: "HiSeqX PCR free" acceptance_criteria : - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95" - "bam_flagstats.target_mapped_bases > 85_000_000_000" diff --git a/t/data/autoqc/review/no_known_applicability_type/product_release.yml b/t/data/autoqc/review/no_known_applicability_type/product_release.yml new file mode 100644 index 00000000..64070318 --- /dev/null +++ b/t/data/autoqc/review/no_known_applicability_type/product_release.yml @@ -0,0 +1,37 @@ +--- +default: + s3: + enable: false + url: null + notify: false + irods: + enable: true + notify: false + +study: + - study_id: "5392" + s3: + enable: true + url: "gs://profile_one-europe-west2" + date_binning: true + customer_name: "UK Organisation" + profile: "profile_one" + notify: true + receipts: "/data_product_receipts/5392/" + irods: + enable: false + notify: true + merge: + component_cache_dir: "/merge_component_cache/5392/" + robo_qc: + qc_type: "mqc" + criteria: + - applicability_criteria: + libs: + library_type: "HiSeqX PCR free" + acceptance_criteria : + - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95" + - "bam_flagstats.target_mapped_bases > 85_000_000_000" + - "bam_flagstats.target_percent_gt_coverage_threshold > 95" + - "verify_bam_id.freemix < 0.01" + - "( bcfstats.genotypes_nrd_dividend / bcfstats.genotypes_nrd_divisor ) < 0.02"