From 80761246bec867c77e86b9d218132378fb09597b Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Fri, 13 Oct 2023 15:35:27 +0100 Subject: [PATCH 1/4] Ensure snp_call_set attribute is always defined. Previously if the genotype check failed to run and exited early, this attribute was not defined in the result object. This gave an error on loading the result to the database. --- lib/npg_qc/autoqc/checks/genotype.pm | 13 +++--- t/60-autoqc-checks-genotype.t | 40 ++++++++++++++++--- t/60-autoqc-db_loader.t | 16 +++++++- .../short_results/47646_1#999.genotype.json | 1 + .../autoqc/samplesheets/samplesheet_47646.csv | 6 +++ 5 files changed, 65 insertions(+), 11 deletions(-) create mode 100644 t/data/autoqc/dbix_loader/short_results/47646_1#999.genotype.json create mode 100644 t/data/autoqc/samplesheets/samplesheet_47646.csv diff --git a/lib/npg_qc/autoqc/checks/genotype.pm b/lib/npg_qc/autoqc/checks/genotype.pm index f2d19ff92..ddc34f424 100644 --- a/lib/npg_qc/autoqc/checks/genotype.pm +++ b/lib/npg_qc/autoqc/checks/genotype.pm @@ -100,8 +100,8 @@ has 'sequenom_plex' => ( ); # snp_call_set - this specifies the set of loci to be called. This may be -# the same for different sequenom_plex data sets. It is used to construct -# the name of some information files (positions, alleles, etc) +# the same for different sequenom_plex data sets. It is used to construct +# the name of some information files (positions, alleles, etc) has 'snp_call_set' => ( is => 'ro', isa => 'Str', @@ -426,9 +426,9 @@ override 'can_run' => sub { if(! any { $_ eq fileparse($self->reference_fasta); } (keys %{$self->_ref_to_snppos_suffix_map})) { $self->result->add_comment('Specified reference genome may be non-human'); return 0; - } + } - return 1; + return 1; }; override 'execute' => sub { @@ -444,6 +444,9 @@ override 'execute' => sub { }; if(!$self->can_run()) { + # The value has to be set in order to be able to upload the result + # to the database; it is a part of the unique constraint. + $self->result->snp_call_set($self->snp_call_set); return 1; } @@ -658,7 +661,7 @@ sub _set_attrib_by_geno_refset { my $name = fileparse($self->geno_refset); $self->_set_sequenom_plex($name); - $self->_set_snp_call_set($name); + $self->_set_ssnp_call_setnp_call_set($name); $self->_set_gt_db($self->geno_refset); if($self->alternate_sample_name) { diff --git a/t/60-autoqc-checks-genotype.t b/t/60-autoqc-checks-genotype.t index 123cac606..3e88edbe4 100644 --- a/t/60-autoqc-checks-genotype.t +++ b/t/60-autoqc-checks-genotype.t @@ -2,11 +2,15 @@ use strict; use warnings; use Cwd; use File::Temp qw/ tempdir /; -use Test::More tests => 10; +use Test::More tests => 11; use Test::Exception; +use File::Copy; +use Perl6::Slurp; + use WTSI::NPG::iRODS; use_ok ('npg_qc::autoqc::checks::genotype'); +use_ok ('npg_qc::autoqc::results::genotype'); my $dir = tempdir(CLEANUP => 1); my $pid = $$; @@ -23,6 +27,8 @@ if ($env_file && $have_irods_execs) { $irods_tmp_coll = $irods->add_collection("GenotypeTest.$pid") ; } +my $ref_repos = cwd . '/t/data/autoqc'; + sub exist_irods_executables { return 0 unless `which ienv`; return 0 unless `which imkdir`; @@ -37,10 +43,35 @@ END { } } +subtest 'Test early exit' => sub { + plan tests => 5; + + local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} = + 't/data/autoqc/samplesheets/samplesheet_47646.csv'; + my $input_file = "$dir/47646_1#999.cram"; + my $output_file = "$dir/47646_1#999.genotype.json"; + copy('t/data/autoqc/alignment.bam', $input_file); + mkdir("$dir/genotypes"); + + my $check = npg_qc::autoqc::checks::genotype->new( + rpt_list => '47646:1:999', + input_files => [$input_file], + qc_out => $dir, + repository => $ref_repos, + genotypes_repository => "$dir/genotypes" + ); + isa_ok ($check, 'npg_qc::autoqc::checks::genotype'); + lives_ok { $check->run() } 'check executed'; + ok (-e $output_file, "output file $output_file has been generated"); + my $result = npg_qc::autoqc::results::genotype->thaw(slurp($output_file)); + is ($result->comments(), 'Specified reference genome may be non-human', + 'correct comment is captured'); + ok ($result->snp_call_set(), 'the snp_call_set attribute is set'); +}; + SKIP: { - skip 'iRODS not available', 9 unless $irods_tmp_coll; + skip 'iRODS not available', 8 unless $irods_tmp_coll; - my $ref_repos = cwd . '/t/data/autoqc'; my $expected_md5 = q[a4790111996a3f1c0247d65f4998e492]; my $st = join q[/], $dir, q[samtools]; @@ -52,7 +83,7 @@ SKIP: { local $ENV{PATH} = join q[:], $dir, $ENV{PATH}; my $data_dir = $dir."/data"; mkdir($data_dir); - `cp t/data/autoqc/alignment.bam $data_dir/2_1.bam`; + copy('t/data/autoqc/alignment.bam', "$data_dir/2_1.bam"); `echo -n $expected_md5 > $data_dir/2_1.bam.md5`; # populate a temporary iRODS collection @@ -66,7 +97,6 @@ SKIP: { input_files => ["$data_dir/2_1.bam"], repository => $ref_repos, ); - isa_ok ($r, 'npg_qc::autoqc::checks::genotype'); lives_ok { $r->result; } 'No error creating result object'; lives_ok {$r->samtools } 'No error calling "samtools" accessor'; is($r->samtools, $st, 'correct samtools path'); diff --git a/t/60-autoqc-db_loader.t b/t/60-autoqc-db_loader.t index 2d46c2ca1..9197015de 100644 --- a/t/60-autoqc-db_loader.t +++ b/t/60-autoqc-db_loader.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 21; +use Test::More tests => 22; use Test::Exception; use Test::Warn; use Moose::Meta::Class; @@ -874,4 +874,18 @@ subtest 'loading review and other results from path' => sub { is( $row->criteria_md5, '27c522a795e99e3aea57162541de75b1', 'criteria_md5 column populated'); }; +subtest 'loading partially defined results' => sub { + plan tests => 2; + + my $db = $db_helper->create_test_db(q[npg_qc::Schema], 't/data/fixtures'); + my $db_loader = npg_qc::autoqc::db_loader->new( + path => ['t/data/autoqc/dbix_loader/short_results'], + schema => $db, + verbose => 0, + ); + lives_ok { $db_loader->load() } 'no error loading an ncomplete result'; + is ($db->resultset('Genotype')->search({})->count(), 1, + 'one genotype record is created'); +}; + 1; diff --git a/t/data/autoqc/dbix_loader/short_results/47646_1#999.genotype.json b/t/data/autoqc/dbix_loader/short_results/47646_1#999.genotype.json new file mode 100644 index 000000000..1b2e4607d --- /dev/null +++ b/t/data/autoqc/dbix_loader/short_results/47646_1#999.genotype.json @@ -0,0 +1 @@ +{"__CLASS__":"npg_qc::autoqc::results::genotype","comments":"Specified reference genome may be non-human","composition":{"__CLASS__":"npg_tracking::glossary::composition-96.0.0","components":[{"__CLASS__":"npg_tracking::glossary::composition::component::illumina-96.0.0","id_run":47646,"position":1,"tag_index":999}]},"id_run":47646,"info":{"Check":"npg_qc::autoqc::checks::genotype","Check_version":"0"},"position":1,"snp_call_set":"W30467","tag_index":999} \ No newline at end of file diff --git a/t/data/autoqc/samplesheets/samplesheet_47646.csv b/t/data/autoqc/samplesheets/samplesheet_47646.csv new file mode 100644 index 000000000..33afb7e1c --- /dev/null +++ b/t/data/autoqc/samplesheets/samplesheet_47646.csv @@ -0,0 +1,6 @@ +[Data],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Sample_ID,Sample_Name,GenomeFolder,Index,Index2,bait_name,default_library_type,default_tag_sequence,default_tagtwo_sequence,email_addresses,email_addresses_of_followers,email_addresses_of_managers,email_addresses_of_owners,gbs_plex_name,is_control,is_pool,lane_id,lane_priority,library_name,organism,organism_taxon_id,project_cost_code,project_id,project_name,purpose,qc_state,request_id,required_insert_size_range,sample_accession_number,sample_cohort,sample_common_name,sample_consent_withdrawn,sample_control_type,sample_description,sample_donor_id,sample_id,sample_is_control,sample_name,sample_public_name,sample_reference_genome,sample_supplier_name,spiked_phix_tag_index,study_accession_number,study_alignments_in_bam,study_contains_nonconsented_human,study_contains_nonconsented_xahuman,study_description,study_id,study_name,study_reference_genome,study_separate_y_chromosome_data,study_title,tag_index, +67859722,3073STDY13850377,,TGAACTGG,AGAGTAGA,,GBS,TGAACTGG,AGAGTAGA,user1@some.ac.uk user2@some.ac.uk user3@some.ac.uk user4@some.ac.uk,,user1@some.ac.uk user1@some.ac.uk user2@some.ac.uk user2@some.ac.uk user3@some.ac.uk user3@some.ac.uk user4@some.ac.uk user4@some.ac.uk,,PFA_GRC1_v1.0,0,0,68525973,0,67859722,,5833,S0910,,,standard,,,from:450 to:450,,,Plasmodium falciparum,0,,,3073STDY13850377,8762888,0,3073STDY13850377,,,110723_negative36,,,1,0,0,Development of sequencing and library prep protocols using Human DNA ,3073,Team51_Development,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,Team51_Development,997, +67859770,3073STDY13850378,,TACTTCGG,AGAGTAGA,,GBS,TACTTCGG,AGAGTAGA,user1@some.ac.uk user2@some.ac.uk user3@some.ac.uk user4@some.ac.uk,,user1@some.ac.uk user1@some.ac.uk user2@some.ac.uk user2@some.ac.uk user3@some.ac.uk user3@some.ac.uk user4@some.ac.uk user4@some.ac.uk,,PFA_GRC1_v1.0,0,0,68525973,0,67859770,,5833,S0910,,,standard,,,from:450 to:450,,,Plasmodium falciparum,0,,,3073STDY13850378,8762889,0,3073STDY13850378,,,110723_negative37,,,1,0,0,Development of sequencing and library prep protocols using Human DNA ,3073,Team51_Development,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,Team51_Development,998, +67859818,3073STDY13850379,,TCTCACGG,AGAGTAGA,,GBS,TCTCACGG,AGAGTAGA,user1@some.ac.uk user2@some.ac.uk user3@some.ac.uk user4@some.ac.uk,,user1@some.ac.uk user1@some.ac.uk user2@some.ac.uk user2@some.ac.uk user3@some.ac.uk user3@some.ac.uk user4@some.ac.uk user4@some.ac.uk,,PFA_GRC1_v1.0,0,0,68525973,0,67859818,,5833,S0910,,,standard,,,from:450 to:450,,,Plasmodium falciparum,0,,,3073STDY13850379,8762890,0,3073STDY13850379,,,110723_negative38,,,1,0,0,Development of sequencing and library prep protocols using Human DNA ,3073,Team51_Development,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,Team51_Development,999, +67859866,3073STDY13850380,,TCAGGAGG,AGAGTAGA,,GBS,TCAGGAGG,AGAGTAGA,user1@some.ac.uk user2@some.ac.uk user3@some.ac.uk user4@some.ac.uk,,user1@some.ac.uk user1@some.ac.uk user2@some.ac.uk user2@some.ac.uk user3@some.ac.uk user3@some.ac.uk user4@some.ac.uk user4@some.ac.uk,,PFA_GRC1_v1.0,0,0,68525973,0,67859866,,5833,S0910,,,standard,,,from:450 to:450,,,Plasmodium falciparum,0,,,3073STDY13850380,8762891,0,3073STDY13850380,,,110723_negative39,,,1,0,0,Development of sequencing and library prep protocols using Human DNA ,3073,Team51_Development,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,Team51_Development,1000, From a68fd7946bdfca53d1297467fa607b62bb5374cf Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Fri, 13 Oct 2023 17:46:33 +0100 Subject: [PATCH 2/4] Set the reference_fasta attribute in the new test --- t/60-autoqc-checks-genotype.t | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/t/60-autoqc-checks-genotype.t b/t/60-autoqc-checks-genotype.t index 3e88edbe4..90ef5f55f 100644 --- a/t/60-autoqc-checks-genotype.t +++ b/t/60-autoqc-checks-genotype.t @@ -52,13 +52,19 @@ subtest 'Test early exit' => sub { my $output_file = "$dir/47646_1#999.genotype.json"; copy('t/data/autoqc/alignment.bam', $input_file); mkdir("$dir/genotypes"); + my $fasta = join q[/], $ref_repos, 'references', + 'Homo_sapiens/GRCh38_full_analysis_set_plus_decoy_hla/all/fasta', + 'GRCh38_full_analysis_set_plus_decoy_hla.fasta'; + # Set the reference_fasta path explicitly, the CI pipeline has + # difficulty inferring it. my $check = npg_qc::autoqc::checks::genotype->new( rpt_list => '47646:1:999', input_files => [$input_file], qc_out => $dir, repository => $ref_repos, - genotypes_repository => "$dir/genotypes" + genotypes_repository => "$dir/genotypes", + reference_fasta => $fasta ); isa_ok ($check, 'npg_qc::autoqc::checks::genotype'); lives_ok { $check->run() } 'check executed'; From c875044dfa815b0557c38741a2bd6e7079698faf Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Fri, 13 Oct 2023 18:23:33 +0100 Subject: [PATCH 3/4] Explained what is being tested. --- t/60-autoqc-checks-genotype.t | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/t/60-autoqc-checks-genotype.t b/t/60-autoqc-checks-genotype.t index 90ef5f55f..181004ea9 100644 --- a/t/60-autoqc-checks-genotype.t +++ b/t/60-autoqc-checks-genotype.t @@ -44,7 +44,7 @@ END { } subtest 'Test early exit' => sub { - plan tests => 5; + plan tests => 6; local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} = 't/data/autoqc/samplesheets/samplesheet_47646.csv'; @@ -67,12 +67,17 @@ subtest 'Test early exit' => sub { reference_fasta => $fasta ); isa_ok ($check, 'npg_qc::autoqc::checks::genotype'); + # For a check that cannot be run in full in a meaningful way we + # need to save enough data in the output JSON file to allow for + # this output to be uploaded to the database. lives_ok { $check->run() } 'check executed'; ok (-e $output_file, "output file $output_file has been generated"); my $result = npg_qc::autoqc::results::genotype->thaw(slurp($output_file)); is ($result->comments(), 'Specified reference genome may be non-human', - 'correct comment is captured'); + 'correct reason for not running in full is captured'); ok ($result->snp_call_set(), 'the snp_call_set attribute is set'); + # Testing ability to run last to avoid duplicates in comments. + ok (!$check->can_run(), 'the check cannot be run in full'); }; SKIP: { From 8315fa71575f52e703192d887931daedc610d53e Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Wed, 18 Oct 2023 09:53:52 +0100 Subject: [PATCH 4/4] Fixed accidental code edit --- lib/npg_qc/autoqc/checks/genotype.pm | 2 +- t/60-autoqc-db_loader.t | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/npg_qc/autoqc/checks/genotype.pm b/lib/npg_qc/autoqc/checks/genotype.pm index ddc34f424..cb34168e0 100644 --- a/lib/npg_qc/autoqc/checks/genotype.pm +++ b/lib/npg_qc/autoqc/checks/genotype.pm @@ -661,7 +661,7 @@ sub _set_attrib_by_geno_refset { my $name = fileparse($self->geno_refset); $self->_set_sequenom_plex($name); - $self->_set_ssnp_call_setnp_call_set($name); + $self->_set_snp_call_set($name); $self->_set_gt_db($self->geno_refset); if($self->alternate_sample_name) { diff --git a/t/60-autoqc-db_loader.t b/t/60-autoqc-db_loader.t index 9197015de..e90c75a76 100644 --- a/t/60-autoqc-db_loader.t +++ b/t/60-autoqc-db_loader.t @@ -883,7 +883,7 @@ subtest 'loading partially defined results' => sub { schema => $db, verbose => 0, ); - lives_ok { $db_loader->load() } 'no error loading an ncomplete result'; + lives_ok { $db_loader->load() } 'no error loading an incomplete result'; is ($db->resultset('Genotype')->search({})->count(), 1, 'one genotype record is created'); };