diff --git a/MANIFEST b/MANIFEST index b65ecdf9..fb1d55f3 100644 --- a/MANIFEST +++ b/MANIFEST @@ -226,7 +226,6 @@ lib/npg_tracking/illumina/run.pm lib/npg_tracking/illumina/runfolder.pm lib/npg_tracking/illumina/run/folder.pm lib/npg_tracking/illumina/run/long_info.pm -lib/npg_tracking/illumina/run/short_info.pm lib/npg_tracking/Schema.pm lib/npg_tracking/Schema/Result/Annotation.pm lib/npg_tracking/Schema/Result/Designation.pm @@ -409,7 +408,6 @@ t/50-decorator.t t/60-illumina-runfolder.t t/60-illumina-run-folder.t t/60-illumina-run-long_info.t -t/60-illumina-run-short_info.t t/60-illumina-run.t t/60-util-mailer.t t/70-bin-npg_status2file.t diff --git a/lib/npg_tracking/illumina/run/folder.pm b/lib/npg_tracking/illumina/run/folder.pm index 2af32ba7..7959470e 100644 --- a/lib/npg_tracking/illumina/run/folder.pm +++ b/lib/npg_tracking/illumina/run/folder.pm @@ -1,7 +1,7 @@ package npg_tracking::illumina::run::folder; use Moose::Role; -use Moose::Meta::Class; +use Moose::Util::TypeConstraints; use File::Spec::Functions qw/splitdir catfile catdir/; use Carp; use Cwd qw/getcwd/; @@ -10,6 +10,7 @@ use Readonly; use Math::Random::Secure qw/irand/; use List::Util qw/first/; +use npg_tracking::util::types; use npg_tracking::util::abs_path qw/abs_path/; use npg_tracking::Schema; use npg_tracking::glossary::lane; @@ -17,8 +18,8 @@ use npg_tracking::util::config qw/get_config_staging_areas/; our $VERSION = '0'; -with qw{ npg_tracking::illumina::run - npg_tracking::illumina::run::short_info }; +with 'npg_tracking::illumina::run'; + # Top-level directory where instruments create runfolders Readonly::Scalar my $INCOMING_DIR => q{/incoming/}; @@ -60,6 +61,86 @@ Readonly::Hash my %NPG_PATH => ( q{qc_path} => 'Path directory with top level QC data', ); +has q{id_run} => ( + isa => q{NpgTrackingRunId}, + is => q{ro}, + required => 0, + lazy_build => 1, + documentation => 'Integer identifier for a sequencing run', +); +sub _build_id_run { + my ($self) = @_; + + my $id_run; + + if ($self->npg_tracking_schema()) { + if (!$self->has_run_folder()) { + $self->run_folder(); # Force the build + } + my $rs = $self->npg_tracking_schema()->resultset('Run') + ->search({folder_name => $self->run_folder()}); + if ($rs->count == 1) { + $id_run = $rs->next()->id_run(); + } + } + + # When no id_run is set, attempt to parse an id_run from the experiment name + # recorded in the Illumina XML file. + # We embed additional information in NovaSeqX samplesheets which have no + # meaning here. See L + if ( !$id_run && $self->can('experiment_name') && $self->experiment_name() ) { + ($id_run, undef) = $self->experiment_name() =~ m{ + \A + [\s]* + ([\d]+) # id_run + ([\w\d\s]*) # instrument name or other embedded info + \Z + }xms; + } + + if( !$id_run ) { + croak q[Unable to identify id_run with data provided]; + } + + return $id_run; +} + + +my $run_folder_subtype_name = __PACKAGE__.q(::folder); +subtype $run_folder_subtype_name + => as 'Str' + => where { splitdir($_)==1 }; + +has q{run_folder} => ( + isa => $run_folder_subtype_name, + is => q{ro}, + lazy_build => 1, + documentation => 'Directory name of the run folder', +); +sub _build_run_folder { + my ($self) = @_; + ($self->subpath or $self->has_id_run) + or croak 'Need a path or id_run to work out a run_folder'; + return first {$_ ne q()} reverse File::Spec->splitdir($self->runfolder_path); +} + + +has q{npg_tracking_schema} => ( + isa => q{Maybe[npg_tracking::Schema]}, + is => q{ro}, + lazy_build => 1, +); +sub _build_npg_tracking_schema { + my $schema; + try { + $schema = npg_tracking::Schema->connect(); + } catch { + carp qq{Unable to connect to NPG tracking DB for faster globs.\n}; + }; + return $schema; +} + + foreach my $path_attr ( keys %NPG_PATH ) { has $path_attr => ( isa => q{Str}, @@ -91,52 +172,36 @@ sub set_bam_basecall_path { return $self->bam_basecall_path; } -has q{npg_tracking_schema} => ( - isa => q{Maybe[npg_tracking::Schema]}, - is => q{ro}, - lazy_build => 1, -); -sub _build_npg_tracking_schema { - my $schema; - try { - $schema = npg_tracking::Schema->connect(); - } catch { - carp qq{Unable to connect to NPG tracking DB for faster globs.\n}; - }; - return $schema; -} - -# Build method for the 'run_folder' attribute in -# npg_tracking::illumina::run::short_info -sub _build_run_folder { - my ($self) = @_; - ($self->subpath or $self->has_id_run) - or croak 'Need a path or id_run to work out a run_folder'; - return first {$_ ne q()} reverse File::Spec->splitdir($self->runfolder_path); -} - sub _build_runfolder_path { my ($self) = @_; my $path; + my $runfolder_name = $self->has_run_folder ? $self->run_folder : undef; + + # Try to use one of paths (if any) supplied via a constructor to figure out + # the location of the run folder directory. This method examines the + # directory structure looking for subdirectories, which normally exist in + # the Illumina run folder. if ($self->subpath()) { $path = _get_path_from_given_path($self->subpath()); - $path && return $path; - } - - my $db_runfolder_name; - my $runfolder_name; - if ($self->can('run_folder') and $self->has_run_folder) { - $runfolder_name = $self->run_folder; } - if ($self->npg_tracking_schema() and $self->id_run()) { + # Try to get the run folder name and glob from the database and then glob + # for the run folder directory. Limit this search to run folders that + # are known to be on staging. + if ((not $path) and $self->npg_tracking_schema()) { + # The code below needs run ID, so 'id_run' will be built if not given. if (not $self->tracking_run->is_tag_set(q(staging))) { croak sprintf 'NPG tracking reports run %i no longer on staging', $self->id_run; } - $db_runfolder_name = $self->tracking_run->folder_name; + my $db_runfolder_name = $self->tracking_run->folder_name; if ($db_runfolder_name) { + if ($runfolder_name and ($db_runfolder_name ne $runfolder_name)) { + # Probably this is an error. Warn for now. + carp sprintf 'Inconsistent db and given run folder name: %s, %s', + $db_runfolder_name, $runfolder_name; + } if (my $gpath = $self->tracking_run->folder_path_glob) { $path = $self->_get_path_from_glob_pattern( catfile($gpath, $db_runfolder_name) @@ -145,21 +210,23 @@ sub _build_runfolder_path { } } - if ( (not $path) and $runfolder_name ) { + # Try to use the runfolder name, if set via the constructor, and the + # staging area prefix from the 'npg_tracking' configuration file to + # glob the file system. This is the most expensive file system glob, + # so doing this as the last resort. + if ((not $path) and $runfolder_name) { $path = $self->_get_path_from_glob_pattern( $self->_folder_path_glob_pattern() . $runfolder_name ); } - if ( $db_runfolder_name and $runfolder_name and - ($db_runfolder_name ne $runfolder_name) ) { - carp sprintf 'Inconsistent db and given run folder name: %s, %s', - $db_runfolder_name, $runfolder_name; - } + # Most likely, the code execution will not advance this far without $path + # being computed. In case of problems an error will be raised by one of + # the methods called above. Returning an undefined path will trigger an + # error since the 'runfolder_path' attribute is defined as a string. + # Raising an error here to help with deciphering error messages. - if (not $path) { - croak 'Failed to infer runfolder_path'; - } + $path or croak 'Failed to infer runfolder_path'; return $path; } @@ -403,6 +470,25 @@ recalibrated directory, which will be used to construct other paths from. =head1 SUBROUTINES/METHODS +=head2 id_run + +An attribute, NPG run identifier. If the value is not supplied, an attempt +to build it is made. + +If access to a run tracking database is available and the database contains +the run record and the run folder name is defined in the database record and +the run_folder attribute is defined or can be built, then its value is used +to retrieve the id_run value from the database. + +If 'experiment_name' accessor is provided by the class that inherits from +this role, then, in the absence of a database record, an attempt is made to parse +out run ID from the value returned by the 'experiment_name' accessor. See +npg_tracking::illumina::run::long_info for the implementation of this accessor. + +=head2 run_folder + +An attribute, run folder name, can be set in the constructor or lazy-built. + =head2 npg_tracking_schema npg_tracking::Schema db handle object, which is allowed to be assigned an @@ -459,7 +545,7 @@ Might be undefined. =item Moose::Role -=item Moose::Meta::Class +=item Moose::Util::TypeConstraints =item Carp diff --git a/lib/npg_tracking/illumina/run/short_info.pm b/lib/npg_tracking/illumina/run/short_info.pm deleted file mode 100644 index f40c5886..00000000 --- a/lib/npg_tracking/illumina/run/short_info.pm +++ /dev/null @@ -1,174 +0,0 @@ -package npg_tracking::illumina::run::short_info; - -use Moose::Role; -use Moose::Util::TypeConstraints; -use File::Spec::Functions qw(splitdir); -use Carp; -use Try::Tiny; -use Readonly; - -use npg_tracking::util::types; - -our $VERSION = '0'; - -has q{id_run} => ( - isa => q{NpgTrackingRunId}, - is => q{ro}, - required => 0, - lazy_build => 1, - documentation => 'Integer identifier for a sequencing run', -); - -my $run_folder_subtype_name = __PACKAGE__.q(::folder); -subtype $run_folder_subtype_name - => as 'Str' - => where { splitdir($_)==1 }; - -has q{run_folder} => ( - isa => $run_folder_subtype_name, - is => q{ro}, - lazy_build => 1, - documentation => 'Directory name of the run folder', -); - -sub _build_id_run { - my ($self) = @_; - - my $id_run; - - if ($self->can(q(npg_tracking_schema)) and $self->npg_tracking_schema()) { - if (!$self->has_run_folder()) { - $self->run_folder(); # Force the build - } - my $rs = $self->npg_tracking_schema()->resultset('Run') - ->search({folder_name => $self->run_folder()}); - if ($rs->count == 1) { - $id_run = $rs->next()->id_run(); - } - } - - # When no id_run is set, attempt to parse an id_run from the experiment name - # recorded in the Illumina XML file. - # We embed additional information in NovaSeqX samplesheets which have no - # meaning here. See L - if ( !$id_run && $self->can('experiment_name') && $self->experiment_name() ) { - ($id_run, undef) = $self->experiment_name() =~ m{ - \A - [\s]* - ([\d]+) # id_run - ([\w\d\s]*) # instrument name or other embedded info - \Z - }xms; - } - - if( !$id_run ) { - croak q[Unable to identify id_run with data provided]; - } - - return $id_run; -} - -no Moose::Role; - -1; - -__END__ - -=head1 NAME - -npg_tracking::illumina::run::short_info - -=head1 VERSION - -=head1 SYNOPSIS - - package Mypackage; - use Moose; - with q{npg_tracking::illumina::run::short_info}; - -=head1 DESCRIPTION - -=head1 SUBROUTINES/METHODS - -=head2 id_run - -NPG run identifier. If the value is not supplied, an attempt to build it is -made. - -If access to a run tracking database is available and the database contains -the run record and the run folder name is defined in the database record and -the run_folder attribute is defined or can be built, then its value is used -to retrieve the id_run value from the database. - -Access to a run tracking database is made via the 'npg_tracking_schema' -attribute, which can be provided by a class which consumes this role. See -npg_tracking::illumina::run::folder for an example implementation of the -npg_tracking_schema attribute. - -If 'experiment_name' accessor is provided by the class that inherits from -this role, then, in the absence of a database record, an attempt is made to parse -out run ID from the value returned by the 'experiment_name' accessor. See -npg_tracking::illumina::run::long_info for the implementation of this accessor. - -=head2 run_folder - -An attribute, can be set in the constructor or lazy-built. A class consuming -this role should provide a builder method '_build_run_folder'. Failure to -provide a builder might result in a run-time error. The attribute is constrained -to not contain a file-system path. - -The implementation of the build method for this attribute should not try to -retrieve run record from the tracking database. - -=head1 DIAGNOSTICS - -=head1 CONFIGURATION AND ENVIRONMENT - -=head1 DEPENDENCIES - -=over - -=item Moose::Role - -=item Moose::Util::TypeConstraints - -=item File::Spec::Functions - -=item Carp - -=item Try::Tiny - -=item Readonly - -=back - -=head1 INCOMPATIBILITIES - -=head1 BUGS AND LIMITATIONS - -=head1 AUTHOR - -=over - -=item Andy Brown - -=item Marina Gourtovaia - -=back - -=head1 LICENSE AND COPYRIGHT - -Copyright (C) 2013,2014,2015,2016,2018,2023,2024 Genome Research Ltd. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . diff --git a/t/60-illumina-run-folder.t b/t/60-illumina-run-folder.t index dbcfcaa2..99b8b035 100644 --- a/t/60-illumina-run-folder.t +++ b/t/60-illumina-run-folder.t @@ -1,34 +1,175 @@ use strict; use warnings; -use Test::More tests => 4; +use Test::More tests => 6; use Test::Exception; use Test::Warn; use File::Temp qw(tempdir); use File::Path qw(make_path remove_tree); use Cwd; +use Try::Tiny; + +use t::dbic_util; BEGIN { # Test staging area prefix is defined in t/.npg/npg_tracking. # This config. file is used by this test. # prefix defined as /tmp/esa-sv-* local $ENV{'HOME'}=getcwd().'/t'; + # Force reading 'npg_tracking config file. use_ok(q{npg_tracking::illumina::run::folder}); } -################## start of test class #################### +################## start of test classes ################## { package test::run::folder; use Moose; with qw{npg_tracking::illumina::run::folder}; } -################## end of test class #################### + +{ + package test::nvx_short_info; + use Moose; + with 'npg_tracking::illumina::run::folder'; + + has experiment_name => (is => 'rw'); +} +################## end of test classes #################### package main; my $basedir = tempdir( template => 'esa-sv-XXXXXXXXXX', TMPDIR => 1, CLEANUP => 1); -subtest 'standard runfolder' => sub { +my $schema = t::dbic_util->new->test_schema( + fixture_path => q[t/data/dbic_fixtures]); + + +subtest 'set and build id_run and run_folder attributes' => sub { + plan tests => 8; + + throws_ok { + test::run::folder->new( + run_folder => q[export/sv03/my_folder], + npg_tracking_schema => undef + ) + } qr{Attribute \(run_folder\) does not pass the type constraint}, + 'error supplying a directory path as the run_folder attribute value'; + + throws_ok { + test::run::folder->new(run_folder => q[], npg_tracking_schema => undef) + } qr{Attribute \(run_folder\) does not pass the type constraint}, + 'error supplying an empty atring as the run_folder attribute value'; + + my $obj = test::run::folder->new( + run_folder => q[my_folder], + id_run => 1234, + npg_tracking_schema => undef + ); + is ($obj->run_folder, 'my_folder', 'the run_folder value is as set'); + is ($obj->id_run, 1234, 'id_run value is as set'); + + $obj = test::run::folder->new( + run_folder => q[my_folder], + npg_tracking_schema => undef + ); + throws_ok { $obj->id_run } qr{Unable to identify id_run with data provided}, + 'error building id_run'; + + $obj = test::run::folder->new( + run_folder => 'xxxxxx', + npg_tracking_schema => $schema + ); + throws_ok { $obj->id_run } qr{Unable to identify id_run with data provided}, + 'error building id_run when no db record for the run folder exists'; + + my $rf = q[20231017_LH00210_0012_B22FCNFLT3]; + + { + # DB schema handle is not set, an attempt to build it will be made. + # Since the user HOME is reset, the file with db credentials does not exist. + local $ENV{'HOME'}=getcwd().'/t'; + $obj = test::run::folder->new(run_folder => $rf); + throws_ok { $obj->id_run } qr{Unable to identify id_run with data provided}, + 'error building id_run'; + } + + $obj = test::run::folder->new( + run_folder => $rf, + npg_tracking_schema => $schema + ); + is ($obj->id_run, 47995, 'id_run value retrieved from the database record'); +}; + +subtest 'test id_run extraction from within experiment_name' => sub { + plan tests => 8; + + my $short_info; + { + # DB schema handle is not set, an attempt to build it will be made. + # Since the user HOME is reset, the file with db credentials does not exist. + local $ENV{'HOME'}=getcwd().'/t'; + + $short_info = test::nvx_short_info->new( + experiment_name => '45678_NVX1_A', + run_folder => 'not_a_folder' + ); + my $id_run; + warning_like { $id_run = $short_info->id_run } + qr /Unable to connect to NPG tracking DB for faster globs/, + 'warning about a failure to connect to the database'; + is($id_run, '45678', 'id_run parsed from experiment name'); + } + + $short_info = test::nvx_short_info->new( + experiment_name => ' 45678_NVX1_A ', + run_folder => 'not_a_folder', + npg_tracking_schema => undef + ); + is($short_info->id_run, '45678', + 'id_run parsed from loosely formatted experiment name'); + + $short_info = test::nvx_short_info->new( + experiment_name => '45678_NVX1_A ', + run_folder => 'not_a_folder', + npg_tracking_schema => undef + ); + is($short_info->id_run, '45678', + 'id_run parsed from experiment name with postfix spaces'); + + $short_info = test::nvx_short_info->new( + experiment_name => ' 45678_NVX1_A', + run_folder => 'not_a_folder', + npg_tracking_schema => undef + ); + is($short_info->id_run, '45678', + 'id_run parsed from experiment name with prefixed spaces'); + + $short_info = test::nvx_short_info->new( + experiment_name => '45678', + run_folder => 'not_a_folder', + npg_tracking_schema => undef + ); + is($short_info->id_run, '45678', 'Bare id_run as experiment name is fine'); + + $short_info = test::nvx_short_info->new( + experiment_name => 'NovaSeqX_WHGS_TruSeqPF_NA12878', + run_folder => 'not_a_folder', + npg_tracking_schema => undef + ); + throws_ok { $short_info->id_run } + qr{Unable to identify id_run with data provided}, + 'Custom run name cannot be parsed'; + + $short_info = test::nvx_short_info->new( + id_run => '45678', + experiment_name => '56789_NVX1_A', + run_folder => 'not_a_folder', + npg_tracking_schema => undef + ); + is($short_info->id_run, '45678', 'Set id_run wins over experiment_name'); +}; + +subtest 'standard runfolder, no DB access' => sub { plan tests => 20; my $run_folder = q{20231019_LH00275_0006_B19NJCA4LE}; @@ -120,7 +261,7 @@ subtest 'standard runfolder' => sub { 'absence of bam_basecall directory is an error'; }; -subtest 'runfolder with an unusual path' => sub { +subtest 'runfolder with an unusual path, no DB access' => sub { plan tests => 12; my $path = join q[/], $basedir, qw/aa bb cc dd/; diff --git a/t/60-illumina-run-short_info.t b/t/60-illumina-run-short_info.t deleted file mode 100644 index dc2cdbce..00000000 --- a/t/60-illumina-run-short_info.t +++ /dev/null @@ -1,116 +0,0 @@ -use strict; -use warnings; -use Test::More tests => 5; -use Test::Exception; -use Moose::Meta::Class; - -use t::dbic_util; - -use_ok(q{npg_tracking::illumina::run::short_info}); - -my $schema = t::dbic_util->new->test_schema( - fixture_path => q[t/data/dbic_fixtures]); - -my $rfname = q[20231017_LH00210_0012_B22FCNFLT3]; - -# Start of package test::short_info -package test::short_info; -use Moose; -with qw{npg_tracking::illumina::run::short_info}; - -sub _build_run_folder { return $rfname; } -# End of package test::short_info - -# Start of package test::db_short_info -package test::db_short_info; -use Moose; -use npg_tracking::Schema; -with qw{npg_tracking::illumina::run::short_info}; - -has q{npg_tracking_schema} => ( - isa => 'npg_tracking::Schema', - is => 'ro', - default => sub { return $schema }, -); -# End of package test::db_short_info - -# Start of package test::nvx_short_info -package test::nvx_short_info; -use Moose; -with 'npg_tracking::illumina::run::short_info'; - -has experiment_name => (is => 'rw'); -# End of package test::nvx_short_info - -package main; - -subtest 'object derived directly from the role' => sub { - plan tests => 6; - - my $class = Moose::Meta::Class->create_anon_class( - roles => [qw/npg_tracking::illumina::run::short_info/] - ); - - throws_ok { $class->new_object(id_run => 1234)->run_folder() } - qr{does not support builder method '_build_run_folder'}, - q{Error thrown as no _build_run_folder method in class}; - - throws_ok { $class->new_object(run_folder => q[export/sv03/my_folder]) } - qr{Attribute \(run_folder\) does not pass the type constraint}, - 'error supplying a directory path as the run_folder attribute value'; - - throws_ok { $class->new_object(run_folder => q[]) } - qr{Attribute \(run_folder\) does not pass the type constraint}, - 'error supplying an empty atring as the run_folder attribute value'; - - my $obj = $class->new_object(run_folder => q[my_folder], id_run => 1234); - is ($obj->run_folder, 'my_folder', 'the run_folder value is as set'); - is ($obj->id_run, 1234, 'id_run value is as set'); - - throws_ok { $class->new_object(run_folder => q[my_folder])->id_run } - qr{Unable to identify id_run with data provided}, - 'error building id_run'; -}; - -subtest 'object with a bulder method for run_folder' => sub { - plan tests => 1; - - is (test::short_info->new(id_run => 47995)->run_folder, $rfname, - 'value of run_folder attribute is set by the builder method'); -}; - -subtest 'object with access to tracking database' => sub { - plan tests => 2; - - throws_ok { test::db_short_info->new(run_folder => 'xxxxxx')->id_run } - qr{Unable to identify id_run with data provided}, - 'error building id_run when no db record for the run folder exists'; - is (test::db_short_info->new(run_folder => $rfname)->id_run, 47995, - 'id_run value retrieved from the database recprd'); -}; - -subtest 'Test id_run extraction from within experiment_name' => sub { - plan tests => 7; - my $short_info = test::nvx_short_info->new(experiment_name => '45678_NVX1_A', run_folder => 'not_a_folder'); - is($short_info->id_run, '45678', 'id_run parsed from experiment name'); - - $short_info = test::nvx_short_info->new(experiment_name => ' 45678_NVX1_A ', run_folder => 'not_a_folder'); - is($short_info->id_run, '45678', 'id_run parsed from loosely formatted experiment name'); - - $short_info = test::nvx_short_info->new(experiment_name => '45678_NVX1_A ', run_folder => 'not_a_folder'); - is($short_info->id_run, '45678', 'id_run parsed from experiment name with postfix spaces'); - - $short_info = test::nvx_short_info->new(experiment_name => ' 45678_NVX1_A', run_folder => 'not_a_folder'); - is($short_info->id_run, '45678', 'id_run parsed from experiment name with prefixed spaces'); - - $short_info = test::nvx_short_info->new(experiment_name => '45678', run_folder => 'not_a_folder'); - is($short_info->id_run, '45678', 'Bare id_run as experiment name is fine'); - - $short_info = test::nvx_short_info->new(experiment_name => 'NovaSeqX_WHGS_TruSeqPF_NA12878', run_folder => 'not_a_folder'); - throws_ok { $short_info->id_run } qr{Unable to identify id_run with data provided}, 'Custom run name cannot be parsed'; - - $short_info = test::nvx_short_info->new(id_run => '45678', experiment_name => '56789_NVX1_A', run_folder => 'not_a_folder'); - is($short_info->id_run, '45678', 'Set id_run wins over experiment_name'); -}; - -1;