Skip to content

Commit

Permalink
Merge pull request wtsi-npg#842 from mgcam/archive_incomplete_genotyp…
Browse files Browse the repository at this point in the history
…e_results

Archive incomplete genotype results
  • Loading branch information
nerdstrike authored Oct 18, 2023
2 parents 3ac9f03 + 8315fa7 commit ba422cf
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 10 deletions.
11 changes: 7 additions & 4 deletions lib/npg_qc/autoqc/checks/genotype.pm
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ has 'sequenom_plex' => (
);

# snp_call_set - this specifies the set of loci to be called. This may be
# the same for different sequenom_plex data sets. It is used to construct
# the name of some information files (positions, alleles, etc)
# the same for different sequenom_plex data sets. It is used to construct
# the name of some information files (positions, alleles, etc)
has 'snp_call_set' => (
is => 'ro',
isa => 'Str',
Expand Down Expand Up @@ -426,9 +426,9 @@ override 'can_run' => sub {
if(! any { $_ eq fileparse($self->reference_fasta); } (keys %{$self->_ref_to_snppos_suffix_map})) {
$self->result->add_comment('Specified reference genome may be non-human');
return 0;
}
}

return 1;
return 1;
};

override 'execute' => sub {
Expand All @@ -444,6 +444,9 @@ override 'execute' => sub {
};

if(!$self->can_run()) {
# The value has to be set in order to be able to upload the result
# to the database; it is a part of the unique constraint.
$self->result->snp_call_set($self->snp_call_set);
return 1;
}

Expand Down
51 changes: 46 additions & 5 deletions t/60-autoqc-checks-genotype.t
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@ use strict;
use warnings;
use Cwd;
use File::Temp qw/ tempdir /;
use Test::More tests => 10;
use Test::More tests => 11;
use Test::Exception;
use File::Copy;
use Perl6::Slurp;

use WTSI::NPG::iRODS;

use_ok ('npg_qc::autoqc::checks::genotype');
use_ok ('npg_qc::autoqc::results::genotype');

my $dir = tempdir(CLEANUP => 1);
my $pid = $$;
Expand All @@ -23,6 +27,8 @@ if ($env_file && $have_irods_execs) {
$irods_tmp_coll = $irods->add_collection("GenotypeTest.$pid") ;
}

my $ref_repos = cwd . '/t/data/autoqc';

sub exist_irods_executables {
return 0 unless `which ienv`;
return 0 unless `which imkdir`;
Expand All @@ -37,10 +43,46 @@ END {
}
}

subtest 'Test early exit' => sub {
plan tests => 6;

local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} =
't/data/autoqc/samplesheets/samplesheet_47646.csv';
my $input_file = "$dir/47646_1#999.cram";
my $output_file = "$dir/47646_1#999.genotype.json";
copy('t/data/autoqc/alignment.bam', $input_file);
mkdir("$dir/genotypes");
my $fasta = join q[/], $ref_repos, 'references',
'Homo_sapiens/GRCh38_full_analysis_set_plus_decoy_hla/all/fasta',
'GRCh38_full_analysis_set_plus_decoy_hla.fasta';

# Set the reference_fasta path explicitly, the CI pipeline has
# difficulty inferring it.
my $check = npg_qc::autoqc::checks::genotype->new(
rpt_list => '47646:1:999',
input_files => [$input_file],
qc_out => $dir,
repository => $ref_repos,
genotypes_repository => "$dir/genotypes",
reference_fasta => $fasta
);
isa_ok ($check, 'npg_qc::autoqc::checks::genotype');
# For a check that cannot be run in full in a meaningful way we
# need to save enough data in the output JSON file to allow for
# this output to be uploaded to the database.
lives_ok { $check->run() } 'check executed';
ok (-e $output_file, "output file $output_file has been generated");
my $result = npg_qc::autoqc::results::genotype->thaw(slurp($output_file));
is ($result->comments(), 'Specified reference genome may be non-human',
'correct reason for not running in full is captured');
ok ($result->snp_call_set(), 'the snp_call_set attribute is set');
# Testing ability to run last to avoid duplicates in comments.
ok (!$check->can_run(), 'the check cannot be run in full');
};

SKIP: {
skip 'iRODS not available', 9 unless $irods_tmp_coll;
skip 'iRODS not available', 8 unless $irods_tmp_coll;

my $ref_repos = cwd . '/t/data/autoqc';
my $expected_md5 = q[a4790111996a3f1c0247d65f4998e492];

my $st = join q[/], $dir, q[samtools];
Expand All @@ -52,7 +94,7 @@ SKIP: {
local $ENV{PATH} = join q[:], $dir, $ENV{PATH};
my $data_dir = $dir."/data";
mkdir($data_dir);
`cp t/data/autoqc/alignment.bam $data_dir/2_1.bam`;
copy('t/data/autoqc/alignment.bam', "$data_dir/2_1.bam");
`echo -n $expected_md5 > $data_dir/2_1.bam.md5`;

# populate a temporary iRODS collection
Expand All @@ -66,7 +108,6 @@ SKIP: {
input_files => ["$data_dir/2_1.bam"],
repository => $ref_repos,
);
isa_ok ($r, 'npg_qc::autoqc::checks::genotype');
lives_ok { $r->result; } 'No error creating result object';
lives_ok {$r->samtools } 'No error calling "samtools" accessor';
is($r->samtools, $st, 'correct samtools path');
Expand Down
16 changes: 15 additions & 1 deletion t/60-autoqc-db_loader.t
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use strict;
use warnings;
use Test::More tests => 21;
use Test::More tests => 22;
use Test::Exception;
use Test::Warn;
use Moose::Meta::Class;
Expand Down Expand Up @@ -874,4 +874,18 @@ subtest 'loading review and other results from path' => sub {
is( $row->criteria_md5, '27c522a795e99e3aea57162541de75b1', 'criteria_md5 column populated');
};

subtest 'loading partially defined results' => sub {
plan tests => 2;

my $db = $db_helper->create_test_db(q[npg_qc::Schema], 't/data/fixtures');
my $db_loader = npg_qc::autoqc::db_loader->new(
path => ['t/data/autoqc/dbix_loader/short_results'],
schema => $db,
verbose => 0,
);
lives_ok { $db_loader->load() } 'no error loading an incomplete result';
is ($db->resultset('Genotype')->search({})->count(), 1,
'one genotype record is created');
};

1;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"__CLASS__":"npg_qc::autoqc::results::genotype","comments":"Specified reference genome may be non-human","composition":{"__CLASS__":"npg_tracking::glossary::composition-96.0.0","components":[{"__CLASS__":"npg_tracking::glossary::composition::component::illumina-96.0.0","id_run":47646,"position":1,"tag_index":999}]},"id_run":47646,"info":{"Check":"npg_qc::autoqc::checks::genotype","Check_version":"0"},"position":1,"snp_call_set":"W30467","tag_index":999}
6 changes: 6 additions & 0 deletions t/data/autoqc/samplesheets/samplesheet_47646.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[Data],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Sample_ID,Sample_Name,GenomeFolder,Index,Index2,bait_name,default_library_type,default_tag_sequence,default_tagtwo_sequence,email_addresses,email_addresses_of_followers,email_addresses_of_managers,email_addresses_of_owners,gbs_plex_name,is_control,is_pool,lane_id,lane_priority,library_name,organism,organism_taxon_id,project_cost_code,project_id,project_name,purpose,qc_state,request_id,required_insert_size_range,sample_accession_number,sample_cohort,sample_common_name,sample_consent_withdrawn,sample_control_type,sample_description,sample_donor_id,sample_id,sample_is_control,sample_name,sample_public_name,sample_reference_genome,sample_supplier_name,spiked_phix_tag_index,study_accession_number,study_alignments_in_bam,study_contains_nonconsented_human,study_contains_nonconsented_xahuman,study_description,study_id,study_name,study_reference_genome,study_separate_y_chromosome_data,study_title,tag_index,
67859722,3073STDY13850377,,TGAACTGG,AGAGTAGA,,GBS,TGAACTGG,AGAGTAGA,[email protected] [email protected] [email protected] [email protected],,[email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected],,PFA_GRC1_v1.0,0,0,68525973,0,67859722,,5833,S0910,,,standard,,,from:450 to:450,,,Plasmodium falciparum,0,,,3073STDY13850377,8762888,0,3073STDY13850377,,,110723_negative36,,,1,0,0,Development of sequencing and library prep protocols using Human DNA ,3073,Team51_Development,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,Team51_Development,997,
67859770,3073STDY13850378,,TACTTCGG,AGAGTAGA,,GBS,TACTTCGG,AGAGTAGA,[email protected] [email protected] [email protected] [email protected],,[email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected],,PFA_GRC1_v1.0,0,0,68525973,0,67859770,,5833,S0910,,,standard,,,from:450 to:450,,,Plasmodium falciparum,0,,,3073STDY13850378,8762889,0,3073STDY13850378,,,110723_negative37,,,1,0,0,Development of sequencing and library prep protocols using Human DNA ,3073,Team51_Development,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,Team51_Development,998,
67859818,3073STDY13850379,,TCTCACGG,AGAGTAGA,,GBS,TCTCACGG,AGAGTAGA,[email protected] [email protected] [email protected] [email protected],,[email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected],,PFA_GRC1_v1.0,0,0,68525973,0,67859818,,5833,S0910,,,standard,,,from:450 to:450,,,Plasmodium falciparum,0,,,3073STDY13850379,8762890,0,3073STDY13850379,,,110723_negative38,,,1,0,0,Development of sequencing and library prep protocols using Human DNA ,3073,Team51_Development,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,Team51_Development,999,
67859866,3073STDY13850380,,TCAGGAGG,AGAGTAGA,,GBS,TCAGGAGG,AGAGTAGA,[email protected] [email protected] [email protected] [email protected],,[email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected],,PFA_GRC1_v1.0,0,0,68525973,0,67859866,,5833,S0910,,,standard,,,from:450 to:450,,,Plasmodium falciparum,0,,,3073STDY13850380,8762891,0,3073STDY13850380,,,110723_negative39,,,1,0,0,Development of sequencing and library prep protocols using Human DNA ,3073,Team51_Development,Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla),0,Team51_Development,1000,

0 comments on commit ba422cf

Please sign in to comment.