From 80761246bec867c77e86b9d218132378fb09597b Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Fri, 13 Oct 2023 15:35:27 +0100 Subject: [PATCH] Ensure snp_call_set attribute is always defined. Previously if the genotype check failed to run and exited early, this attribute was not defined in the result object. This gave an error on loading the result to the database. --- lib/npg_qc/autoqc/checks/genotype.pm | 13 +++--- t/60-autoqc-checks-genotype.t | 40 ++++++++++++++++--- t/60-autoqc-db_loader.t | 16 +++++++- .../short_results/47646_1#999.genotype.json | 1 + .../autoqc/samplesheets/samplesheet_47646.csv | 6 +++ 5 files changed, 65 insertions(+), 11 deletions(-) create mode 100644 t/data/autoqc/dbix_loader/short_results/47646_1#999.genotype.json create mode 100644 t/data/autoqc/samplesheets/samplesheet_47646.csv diff --git a/lib/npg_qc/autoqc/checks/genotype.pm b/lib/npg_qc/autoqc/checks/genotype.pm index f2d19ff92..ddc34f424 100644 --- a/lib/npg_qc/autoqc/checks/genotype.pm +++ b/lib/npg_qc/autoqc/checks/genotype.pm @@ -100,8 +100,8 @@ has 'sequenom_plex' => ( ); # snp_call_set - this specifies the set of loci to be called. This may be -# the same for different sequenom_plex data sets. It is used to construct -# the name of some information files (positions, alleles, etc) +# the same for different sequenom_plex data sets. It is used to construct +# the name of some information files (positions, alleles, etc) has 'snp_call_set' => ( is => 'ro', isa => 'Str', @@ -426,9 +426,9 @@ override 'can_run' => sub { if(! any { $_ eq fileparse($self->reference_fasta); } (keys %{$self->_ref_to_snppos_suffix_map})) { $self->result->add_comment('Specified reference genome may be non-human'); return 0; - } + } - return 1; + return 1; }; override 'execute' => sub { @@ -444,6 +444,9 @@ override 'execute' => sub { }; if(!$self->can_run()) { + # The value has to be set in order to be able to upload the result + # to the database; it is a part of the unique constraint. + $self->result->snp_call_set($self->snp_call_set); return 1; } @@ -658,7 +661,7 @@ sub _set_attrib_by_geno_refset { my $name = fileparse($self->geno_refset); $self->_set_sequenom_plex($name); - $self->_set_snp_call_set($name); + $self->_set_ssnp_call_setnp_call_set($name); $self->_set_gt_db($self->geno_refset); if($self->alternate_sample_name) { diff --git a/t/60-autoqc-checks-genotype.t b/t/60-autoqc-checks-genotype.t index 123cac606..3e88edbe4 100644 --- a/t/60-autoqc-checks-genotype.t +++ b/t/60-autoqc-checks-genotype.t @@ -2,11 +2,15 @@ use strict; use warnings; use Cwd; use File::Temp qw/ tempdir /; -use Test::More tests => 10; +use Test::More tests => 11; use Test::Exception; +use File::Copy; +use Perl6::Slurp; + use WTSI::NPG::iRODS; use_ok ('npg_qc::autoqc::checks::genotype'); +use_ok ('npg_qc::autoqc::results::genotype'); my $dir = tempdir(CLEANUP => 1); my $pid = $$; @@ -23,6 +27,8 @@ if ($env_file && $have_irods_execs) { $irods_tmp_coll = $irods->add_collection("GenotypeTest.$pid") ; } +my $ref_repos = cwd . '/t/data/autoqc'; + sub exist_irods_executables { return 0 unless `which ienv`; return 0 unless `which imkdir`; @@ -37,10 +43,35 @@ END { } } +subtest 'Test early exit' => sub { + plan tests => 5; + + local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} = + 't/data/autoqc/samplesheets/samplesheet_47646.csv'; + my $input_file = "$dir/47646_1#999.cram"; + my $output_file = "$dir/47646_1#999.genotype.json"; + copy('t/data/autoqc/alignment.bam', $input_file); + mkdir("$dir/genotypes"); + + my $check = npg_qc::autoqc::checks::genotype->new( + rpt_list => '47646:1:999', + input_files => [$input_file], + qc_out => $dir, + repository => $ref_repos, + genotypes_repository => "$dir/genotypes" + ); + isa_ok ($check, 'npg_qc::autoqc::checks::genotype'); + lives_ok { $check->run() } 'check executed'; + ok (-e $output_file, "output file $output_file has been generated"); + my $result = npg_qc::autoqc::results::genotype->thaw(slurp($output_file)); + is ($result->comments(), 'Specified reference genome may be non-human', + 'correct comment is captured'); + ok ($result->snp_call_set(), 'the snp_call_set attribute is set'); +}; + SKIP: { - skip 'iRODS not available', 9 unless $irods_tmp_coll; + skip 'iRODS not available', 8 unless $irods_tmp_coll; - my $ref_repos = cwd . '/t/data/autoqc'; my $expected_md5 = q[a4790111996a3f1c0247d65f4998e492]; my $st = join q[/], $dir, q[samtools]; @@ -52,7 +83,7 @@ SKIP: { local $ENV{PATH} = join q[:], $dir, $ENV{PATH}; my $data_dir = $dir."/data"; mkdir($data_dir); - `cp t/data/autoqc/alignment.bam $data_dir/2_1.bam`; + copy('t/data/autoqc/alignment.bam', "$data_dir/2_1.bam"); `echo -n $expected_md5 > $data_dir/2_1.bam.md5`; # populate a temporary iRODS collection @@ -66,7 +97,6 @@ SKIP: { input_files => ["$data_dir/2_1.bam"], repository => $ref_repos, ); - isa_ok ($r, 'npg_qc::autoqc::checks::genotype'); lives_ok { $r->result; } 'No error creating result object'; lives_ok {$r->samtools } 'No error calling "samtools" accessor'; is($r->samtools, $st, 'correct samtools path'); diff --git a/t/60-autoqc-db_loader.t b/t/60-autoqc-db_loader.t index 2d46c2ca1..9197015de 100644 --- a/t/60-autoqc-db_loader.t +++ b/t/60-autoqc-db_loader.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 21; +use Test::More tests => 22; use Test::Exception; use Test::Warn; use Moose::Meta::Class; @@ -874,4 +874,18 @@ subtest 'loading review and other results from path' => sub { is( $row->criteria_md5, '27c522a795e99e3aea57162541de75b1', 'criteria_md5 column populated'); }; +subtest 'loading partially defined results' => sub { + plan tests => 2; + + my $db = $db_helper->create_test_db(q[npg_qc::Schema], 't/data/fixtures'); + my $db_loader = npg_qc::autoqc::db_loader->new( + path => ['t/data/autoqc/dbix_loader/short_results'], + schema => $db, + verbose => 0, + ); + lives_ok { $db_loader->load() } 'no error loading an ncomplete result'; + is ($db->resultset('Genotype')->search({})->count(), 1, + 'one genotype record is created'); +}; + 1; diff --git a/t/data/autoqc/dbix_loader/short_results/47646_1#999.genotype.json b/t/data/autoqc/dbix_loader/short_results/47646_1#999.genotype.json new file mode 100644 index 000000000..1b2e4607d --- /dev/null +++ b/t/data/autoqc/dbix_loader/short_results/47646_1#999.genotype.json @@ -0,0 +1 @@ +{"__CLASS__":"npg_qc::autoqc::results::genotype","comments":"Specified reference genome may be non-human","composition":{"__CLASS__":"npg_tracking::glossary::composition-96.0.0","components":[{"__CLASS__":"npg_tracking::glossary::composition::component::illumina-96.0.0","id_run":47646,"position":1,"tag_index":999}]},"id_run":47646,"info":{"Check":"npg_qc::autoqc::checks::genotype","Check_version":"0"},"position":1,"snp_call_set":"W30467","tag_index":999} \ No newline at end of file diff --git a/t/data/autoqc/samplesheets/samplesheet_47646.csv b/t/data/autoqc/samplesheets/samplesheet_47646.csv new file mode 100644 index 000000000..33afb7e1c --- /dev/null +++ b/t/data/autoqc/samplesheets/samplesheet_47646.csv @@ -0,0 +1,6 @@ +[Data],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Sample_ID,Sample_Name,GenomeFolder,Index,Index2,bait_name,default_library_type,default_tag_sequence,default_tagtwo_sequence,email_addresses,email_addresses_of_followers,email_addresses_of_managers,email_addresses_of_owners,gbs_plex_name,is_control,is_pool,lane_id,lane_priority,library_name,organism,organism_taxon_id,project_cost_code,project_id,project_name,purpose,qc_state,request_id,required_insert_size_range,sample_accession_number,sample_cohort,sample_common_name,sample_consent_withdrawn,sample_control_type,sample_description,sample_donor_id,sample_id,sample_is_control,sample_name,sample_public_name,sample_reference_genome,sample_supplier_name,spiked_phix_tag_index,study_accession_number,study_alignments_in_bam,study_contains_nonconsented_human,study_contains_nonconsented_xahuman,study_description,study_id,study_name,study_reference_genome,study_separate_y_chromosome_data,study_title,tag_index, +67859722,3073STDY13850377,,TGAACTGG,AGAGTAGA,,GBS,TGAACTGG,AGAGTAGA,user1@some.ac.uk user2@some.ac.uk user3@some.ac.uk user4@some.ac.uk,,user1@some.ac.uk user1@some.ac.uk user2@some.ac.uk user2@some.ac.uk user3@some.ac.uk user3@some.ac.uk user4@some.ac.uk user4@some.ac.uk,,PFA_GRC1_v1.0,0,0,68525973,0,67859722,,5833,S0910,,,standard,,,from:450 to:450,,,Plasmodium falciparum,0,,,3073STDY13850377,8762888,0,3073STDY13850377,,,110723_negative36,,,1,0,0,Development of sequencing and library prep protocols using Human DNA ,3073,Team51_Development,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,Team51_Development,997, +67859770,3073STDY13850378,,TACTTCGG,AGAGTAGA,,GBS,TACTTCGG,AGAGTAGA,user1@some.ac.uk user2@some.ac.uk user3@some.ac.uk user4@some.ac.uk,,user1@some.ac.uk user1@some.ac.uk user2@some.ac.uk user2@some.ac.uk user3@some.ac.uk user3@some.ac.uk user4@some.ac.uk user4@some.ac.uk,,PFA_GRC1_v1.0,0,0,68525973,0,67859770,,5833,S0910,,,standard,,,from:450 to:450,,,Plasmodium falciparum,0,,,3073STDY13850378,8762889,0,3073STDY13850378,,,110723_negative37,,,1,0,0,Development of sequencing and library prep protocols using Human DNA ,3073,Team51_Development,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,Team51_Development,998, +67859818,3073STDY13850379,,TCTCACGG,AGAGTAGA,,GBS,TCTCACGG,AGAGTAGA,user1@some.ac.uk user2@some.ac.uk user3@some.ac.uk user4@some.ac.uk,,user1@some.ac.uk user1@some.ac.uk user2@some.ac.uk user2@some.ac.uk user3@some.ac.uk user3@some.ac.uk user4@some.ac.uk user4@some.ac.uk,,PFA_GRC1_v1.0,0,0,68525973,0,67859818,,5833,S0910,,,standard,,,from:450 to:450,,,Plasmodium falciparum,0,,,3073STDY13850379,8762890,0,3073STDY13850379,,,110723_negative38,,,1,0,0,Development of sequencing and library prep protocols using Human DNA ,3073,Team51_Development,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,Team51_Development,999, +67859866,3073STDY13850380,,TCAGGAGG,AGAGTAGA,,GBS,TCAGGAGG,AGAGTAGA,user1@some.ac.uk user2@some.ac.uk user3@some.ac.uk user4@some.ac.uk,,user1@some.ac.uk user1@some.ac.uk user2@some.ac.uk user2@some.ac.uk user3@some.ac.uk user3@some.ac.uk user4@some.ac.uk user4@some.ac.uk,,PFA_GRC1_v1.0,0,0,68525973,0,67859866,,5833,S0910,,,standard,,,from:450 to:450,,,Plasmodium falciparum,0,,,3073STDY13850380,8762891,0,3073STDY13850380,,,110723_negative39,,,1,0,0,Development of sequencing and library prep protocols using Human DNA ,3073,Team51_Development,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,Team51_Development,1000,