diff --git a/MANIFEST b/MANIFEST index cacc1d62..fb1d55f3 100644 --- a/MANIFEST +++ b/MANIFEST @@ -225,9 +225,7 @@ lib/npg_tracking/glossary/tag.pm lib/npg_tracking/illumina/run.pm lib/npg_tracking/illumina/runfolder.pm lib/npg_tracking/illumina/run/folder.pm -lib/npg_tracking/illumina/run/folder/location.pm lib/npg_tracking/illumina/run/long_info.pm -lib/npg_tracking/illumina/run/short_info.pm lib/npg_tracking/Schema.pm lib/npg_tracking/Schema/Result/Annotation.pm lib/npg_tracking/Schema/Result/Designation.pm @@ -410,7 +408,6 @@ t/50-decorator.t t/60-illumina-runfolder.t t/60-illumina-run-folder.t t/60-illumina-run-long_info.t -t/60-illumina-run-short_info.t t/60-illumina-run.t t/60-util-mailer.t t/70-bin-npg_status2file.t diff --git a/lib/npg_tracking/illumina/run/folder.pm b/lib/npg_tracking/illumina/run/folder.pm index 73f4c772..01654f0b 100644 --- a/lib/npg_tracking/illumina/run/folder.pm +++ b/lib/npg_tracking/illumina/run/folder.pm @@ -1,22 +1,25 @@ package npg_tracking::illumina::run::folder; use Moose::Role; -use Moose::Meta::Class; -use File::Spec::Functions qw(splitdir catfile catdir); +use Moose::Util::TypeConstraints; +use File::Spec::Functions qw/splitdir catfile catdir/; use Carp; use Cwd qw/getcwd/; use Try::Tiny; use Readonly; use Math::Random::Secure qw/irand/; +use List::Util qw/first/; +use npg_tracking::util::types; use npg_tracking::util::abs_path qw/abs_path/; use npg_tracking::Schema; use npg_tracking::glossary::lane; -use npg_tracking::illumina::run::folder::location; +use npg_tracking::util::config qw/get_config_staging_areas/; our $VERSION = '0'; -with q{npg_tracking::illumina::run}; +with 'npg_tracking::illumina::run'; + # Top-level directory where instruments create runfolders Readonly::Scalar my $INCOMING_DIR => q{/incoming/}; @@ -37,6 +40,14 @@ Readonly::Scalar my $PP_ARCHIVE_DIR => q{pp_archive}; Readonly::Scalar our $SUMMARY_LINK => q{Latest_Summary}; Readonly::Scalar my $QC_DIR => q{qc}; +my $config=get_config_staging_areas(); +# The prod. value of prefix is '/export/esa-sv-*' in Feb. 2024 +# Example prod. run folder path +# /export/esa-sv-20240201-01/IL_seq_data/incoming/20240124_LH00210_0016_B225GWVLT3 +Readonly::Scalar my $STAGING_AREAS_PREFIX => $config->{'prefix'} || q(); +Readonly::Scalar my $FOLDER_PATH_PREFIX_GLOB_PATTERN => + "$STAGING_AREAS_PREFIX/IL*/*/"; + Readonly::Hash my %NPG_PATH => ( q{runfolder_path} => 'Path to and including the run folder', q{dragen_analysis_path} => 'Path to the DRAGEN analysis directory', @@ -50,10 +61,91 @@ Readonly::Hash my %NPG_PATH => ( q{qc_path} => 'Path directory with top level QC data', ); +has q{id_run} => ( + isa => q{NpgTrackingRunId}, + is => q{ro}, + required => 0, + lazy_build => 1, + documentation => 'Integer identifier for a sequencing run', +); +sub _build_id_run { + my ($self) = @_; + + my $id_run; + + if ($self->npg_tracking_schema()) { + if (!$self->has_run_folder()) { + $self->run_folder(); # Force the build + } + my $rs = $self->npg_tracking_schema()->resultset('Run') + ->search({folder_name => $self->run_folder()}); + if ($rs->count == 1) { + $id_run = $rs->next()->id_run(); + } + } + + # When no id_run is set, attempt to parse an id_run from the experiment name + # recorded in the Illumina XML file. + # We embed additional information in NovaSeqX samplesheets which have no + # meaning here. See L + if ( !$id_run && $self->can('experiment_name') && $self->experiment_name() ) { + ($id_run, undef) = $self->experiment_name() =~ m{ + \A + [\s]* + ([\d]+) # id_run + ([\w\d\s]*) # instrument name or other embedded info + \Z + }xms; + } + + if( !$id_run ) { + croak q[Unable to identify id_run with data provided]; + } + + return $id_run; +} + + +my $run_folder_subtype_name = __PACKAGE__.q(::folder); +subtype $run_folder_subtype_name + => as 'Str' + => where { splitdir($_)==1 }; + +has q{run_folder} => ( + isa => $run_folder_subtype_name, + is => q{ro}, + lazy_build => 1, + documentation => 'Directory name of the run folder', +); +sub _build_run_folder { + my ($self) = @_; + ($self->subpath or $self->has_id_run) + or croak 'Need a path or id_run to work out a run_folder'; + return first {$_ ne q()} reverse File::Spec->splitdir($self->runfolder_path); +} + + +has q{npg_tracking_schema} => ( + isa => q{Maybe[npg_tracking::Schema]}, + is => q{ro}, + lazy_build => 1, +); +sub _build_npg_tracking_schema { + my $schema; + try { + $schema = npg_tracking::Schema->connect(); + } catch { + carp qq{Unable to connect to NPG tracking DB for faster globs.\n}; + }; + return $schema; +} + + foreach my $path_attr ( keys %NPG_PATH ) { has $path_attr => ( isa => q{Str}, is => q{ro}, + predicate => 'has_' . $path_attr, lazy_build => 1, documentation => $NPG_PATH{$path_attr}, ); @@ -80,41 +172,36 @@ sub set_bam_basecall_path { return $self->bam_basecall_path; } -has q{npg_tracking_schema} => ( - isa => q{Maybe[npg_tracking::Schema]}, - is => q{ro}, - lazy_build => 1, -); -sub _build_npg_tracking_schema { - my $schema; - try { - $schema = npg_tracking::Schema->connect(); - } catch { - warn qq{WARNING: Unable to connect to NPG tracking DB for faster globs.\n}; - }; - return $schema; -} - sub _build_runfolder_path { my ($self) = @_; - my $path = $self->_get_path_from_given_path(); - $path && return $path; - - my $db_runfolder_name; - my $runfolder_name; - if ( $self->can('run_folder') and $self->has_run_folder ) { - $runfolder_name = $self->run_folder; + my $path; + my $runfolder_name = $self->has_run_folder ? $self->run_folder : undef; + + # Try to use one of paths (if any) supplied via a constructor to figure out + # the location of the run folder directory. This method examines the + # directory structure looking for subdirectories, which normally exist in + # the Illumina run folder. + if ($self->subpath()) { + $path = _get_path_from_given_path($self->subpath()); } - if ( $self->npg_tracking_schema() and - $self->can(q(id_run)) and $self->id_run() ) { + # Try to get the run folder name and glob from the database and then glob + # for the run folder directory. Limit this search to run folders that + # are known to be on staging. + if ((not $path) and $self->npg_tracking_schema()) { + # The code below needs run ID, so 'id_run' will be built if not given. if (not $self->tracking_run->is_tag_set(q(staging))) { croak sprintf 'NPG tracking reports run %i no longer on staging', $self->id_run; } - $db_runfolder_name = $self->tracking_run->folder_name; + my $db_runfolder_name = $self->tracking_run->folder_name; if ($db_runfolder_name) { + if ($runfolder_name and ($db_runfolder_name ne $runfolder_name)) { + # Probably this is an error. Warn for now. + carp sprintf 'Inconsistent db and given run folder name: %s, %s', + $db_runfolder_name, $runfolder_name; + } if (my $gpath = $self->tracking_run->folder_path_glob) { $path = $self->_get_path_from_glob_pattern( catfile($gpath, $db_runfolder_name) @@ -123,21 +210,23 @@ sub _build_runfolder_path { } } - if ( (not $path) and $runfolder_name ) { + # Try to use the runfolder name, if set via the constructor, and the + # staging area prefix from the 'npg_tracking' configuration file to + # glob the file system. This is the most expensive file system glob, + # so doing this as the last resort. + if ((not $path) and $runfolder_name) { $path = $self->_get_path_from_glob_pattern( $self->_folder_path_glob_pattern() . $runfolder_name ); } - if ( $db_runfolder_name and $runfolder_name and - ($db_runfolder_name ne $runfolder_name) ) { - carp sprintf 'Inconsistent db and given run folder name: %s, %s', - $db_runfolder_name, $runfolder_name; - } + # Most likely, the code execution will not advance this far without $path + # being computed. In case of problems an error will be raised by one of + # the methods called above. Returning an undefined path will trigger an + # error since the 'runfolder_path' attribute is defined as a string. + # Raising an error here to help with deciphering error messages. - if (not $path) { - croak 'Failed to infer runfolder_path'; - } + $path or croak 'Failed to infer runfolder_path'; return $path; } @@ -244,7 +333,7 @@ has q{subpath} => ( ); sub _build_subpath { my $self = shift; - my $path; + foreach my $path_method ( qw/ recalibrated_path basecall_path intensity_path @@ -252,12 +341,11 @@ sub _build_subpath { runfolder_path / ) { my $has_path_method = q{has_} . $path_method; if ($self->$has_path_method()) { - $path = $self->$path_method(); - last; + return $self->$path_method(); } } - return $path; + return; } ############# @@ -266,13 +354,8 @@ sub _build_subpath { has q{_folder_path_glob_pattern} => ( isa => q{Str}, is => q{ro}, - lazy_build => 1, + default => $FOLDER_PATH_PREFIX_GLOB_PATTERN, ); -sub _build__folder_path_glob_pattern { - my $test_dir = $ENV{TEST_DIR} || q{}; - return $test_dir . - $npg_tracking::illumina::run::folder::location::FOLDER_PATH_PREFIX_GLOB_PATTERN; -} sub _infer_analysis_path { my ($path, $distance) = @_; @@ -316,25 +399,19 @@ sub _get_path_from_glob_pattern { } sub _get_path_from_given_path { - my ($self) = @_; + my ($subpath) = @_; - $self->subpath or return; - - my @subpath = splitdir( $self->subpath ); - while (@subpath) { - my $path = catdir(@subpath); - if ( -d $path # path of all remaining parts of _given_path (subpath) - and - -d catdir($path, $CONFIG_DIR) # does this directory have a Config Directory - and - -d catdir($path, $DATA_DIR) # a runfolder is likely to have a Data directory - ) { - return $path; + my @dirs = splitdir($subpath); + while (@dirs) { + my $path = catdir(@dirs); + # a runfolder has to have a Data directory + if (-d $path and -d catdir($path, $DATA_DIR)) { + return $path; } - pop @subpath; + pop @dirs; } - croak q{Nothing looks like a run_folder in any given subpath}; + croak qq{Nothing looks like a run folder in any subpath of $subpath}; } sub _get_analysis_path_from_glob { @@ -389,20 +466,42 @@ recalibrated directory, which will be used to construct other paths from. =head1 SUBROUTINES/METHODS -=head2 runfolder_path +=head2 id_run -=head2 bam_basecall +An attribute, NPG run identifier. If the value is not supplied, an attempt +to build it is made. -=head2 set_bam_basecall_path +If access to a run tracking database is available and the database contains +the run record and the run folder name is defined in the database record and +the run_folder attribute is defined or can be built, then its value is used +to retrieve the id_run value from the database. + +If 'experiment_name' accessor is provided by the class that inherits from +this role, then, in the absence of a database record, an attempt is made to parse +out run ID from the value returned by the 'experiment_name' accessor. See +npg_tracking::illumina::run::long_info for the implementation of this accessor. + +=head2 run_folder - Sets and returns bam_basecall_path. Error if this attribute has - already been set. +An attribute, run folder name, can be set in the constructor or lazy-built. - $obj->set_bam_basecall_path(); - print $obj->bam_basecall_path(); # BAM_basecalls_SOME-RANDOM-NUMBER +=head2 npg_tracking_schema - $obj->set_bam_basecall_path(20190122); - print $obj->bam_basecall_path(); # BAM_basecalls_20190122 +npg_tracking::Schema db handle object, which is allowed to be assigned an +undefined value. An attempt to build this attribute is made. In case of a +failure an undefined value is assigned. + +=head2 runfolder_path + +=head2 bam_basecall_path + +=head2 set_bam_basecall_path + +Sets and returns bam_basecall_path. Error if this attribute has +already been set. + + $obj->set_bam_basecall_path(); + print $obj->bam_basecall_path(); # BAM_basecalls_SOME-RANDOM-NUMBER =head2 analysis_path @@ -442,7 +541,7 @@ Might be undefined. =item Moose::Role -=item Moose::Meta::Class +=item Moose::Util::TypeConstraints =item Carp @@ -456,6 +555,8 @@ Might be undefined. =item Math::Random::Secure +=item List::Util + =back =head1 INCOMPATIBILITIES diff --git a/lib/npg_tracking/illumina/run/folder/location.pm b/lib/npg_tracking/illumina/run/folder/location.pm deleted file mode 100644 index 6b93924c..00000000 --- a/lib/npg_tracking/illumina/run/folder/location.pm +++ /dev/null @@ -1,73 +0,0 @@ -package npg_tracking::illumina::run::folder::location; - -use strict; -use warnings; -use Readonly; - -use npg_tracking::util::config qw(get_config_staging_areas); - -our $VERSION = '0'; - -my $config=get_config_staging_areas(); - -Readonly::Array our @STAGING_AREAS_INDEXES => @{$config->{'indexes'}||[q()]}; -Readonly::Scalar our $STAGING_AREAS_PREFIX => $config->{'prefix'} || q(); -Readonly::Array our @STAGING_AREAS => map { $STAGING_AREAS_PREFIX . $_ } - @STAGING_AREAS_INDEXES; -Readonly::Scalar our $HOST_GLOB_PATTERN => $STAGING_AREAS_PREFIX . - q[{].join(q(,), @STAGING_AREAS_INDEXES).q[}]; -Readonly::Scalar our $DIR_GLOB_PATTERN => q[{IL,HS}*/*/]; -Readonly::Scalar our $FOLDER_PATH_PREFIX_GLOB_PATTERN => "$HOST_GLOB_PATTERN/$DIR_GLOB_PATTERN"; - -1; - -__END__ - -=head1 NAME - -npg_tracking::illumina::run::folder::location - -=head1 SYNOPSIS - -=head1 DESCRIPTION - -Externally accessible constants. - -=head1 SUBROUTINES/METHODS - -=head1 DIAGNOSTICS - -=head1 CONFIGURATION AND ENVIRONMENT - -=head1 DEPENDENCIES - -=over - -=item Readonly - -=back - -=head1 INCOMPATIBILITIES - -=head1 BUGS AND LIMITATIONS - -=head1 AUTHOR - -David K. Jackson - -=head1 LICENSE AND COPYRIGHT - -Copyright (C) 2011 GRL - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . diff --git a/lib/npg_tracking/illumina/run/short_info.pm b/lib/npg_tracking/illumina/run/short_info.pm deleted file mode 100644 index f40c5886..00000000 --- a/lib/npg_tracking/illumina/run/short_info.pm +++ /dev/null @@ -1,174 +0,0 @@ -package npg_tracking::illumina::run::short_info; - -use Moose::Role; -use Moose::Util::TypeConstraints; -use File::Spec::Functions qw(splitdir); -use Carp; -use Try::Tiny; -use Readonly; - -use npg_tracking::util::types; - -our $VERSION = '0'; - -has q{id_run} => ( - isa => q{NpgTrackingRunId}, - is => q{ro}, - required => 0, - lazy_build => 1, - documentation => 'Integer identifier for a sequencing run', -); - -my $run_folder_subtype_name = __PACKAGE__.q(::folder); -subtype $run_folder_subtype_name - => as 'Str' - => where { splitdir($_)==1 }; - -has q{run_folder} => ( - isa => $run_folder_subtype_name, - is => q{ro}, - lazy_build => 1, - documentation => 'Directory name of the run folder', -); - -sub _build_id_run { - my ($self) = @_; - - my $id_run; - - if ($self->can(q(npg_tracking_schema)) and $self->npg_tracking_schema()) { - if (!$self->has_run_folder()) { - $self->run_folder(); # Force the build - } - my $rs = $self->npg_tracking_schema()->resultset('Run') - ->search({folder_name => $self->run_folder()}); - if ($rs->count == 1) { - $id_run = $rs->next()->id_run(); - } - } - - # When no id_run is set, attempt to parse an id_run from the experiment name - # recorded in the Illumina XML file. - # We embed additional information in NovaSeqX samplesheets which have no - # meaning here. See L - if ( !$id_run && $self->can('experiment_name') && $self->experiment_name() ) { - ($id_run, undef) = $self->experiment_name() =~ m{ - \A - [\s]* - ([\d]+) # id_run - ([\w\d\s]*) # instrument name or other embedded info - \Z - }xms; - } - - if( !$id_run ) { - croak q[Unable to identify id_run with data provided]; - } - - return $id_run; -} - -no Moose::Role; - -1; - -__END__ - -=head1 NAME - -npg_tracking::illumina::run::short_info - -=head1 VERSION - -=head1 SYNOPSIS - - package Mypackage; - use Moose; - with q{npg_tracking::illumina::run::short_info}; - -=head1 DESCRIPTION - -=head1 SUBROUTINES/METHODS - -=head2 id_run - -NPG run identifier. If the value is not supplied, an attempt to build it is -made. - -If access to a run tracking database is available and the database contains -the run record and the run folder name is defined in the database record and -the run_folder attribute is defined or can be built, then its value is used -to retrieve the id_run value from the database. - -Access to a run tracking database is made via the 'npg_tracking_schema' -attribute, which can be provided by a class which consumes this role. See -npg_tracking::illumina::run::folder for an example implementation of the -npg_tracking_schema attribute. - -If 'experiment_name' accessor is provided by the class that inherits from -this role, then, in the absence of a database record, an attempt is made to parse -out run ID from the value returned by the 'experiment_name' accessor. See -npg_tracking::illumina::run::long_info for the implementation of this accessor. - -=head2 run_folder - -An attribute, can be set in the constructor or lazy-built. A class consuming -this role should provide a builder method '_build_run_folder'. Failure to -provide a builder might result in a run-time error. The attribute is constrained -to not contain a file-system path. - -The implementation of the build method for this attribute should not try to -retrieve run record from the tracking database. - -=head1 DIAGNOSTICS - -=head1 CONFIGURATION AND ENVIRONMENT - -=head1 DEPENDENCIES - -=over - -=item Moose::Role - -=item Moose::Util::TypeConstraints - -=item File::Spec::Functions - -=item Carp - -=item Try::Tiny - -=item Readonly - -=back - -=head1 INCOMPATIBILITIES - -=head1 BUGS AND LIMITATIONS - -=head1 AUTHOR - -=over - -=item Andy Brown - -=item Marina Gourtovaia - -=back - -=head1 LICENSE AND COPYRIGHT - -Copyright (C) 2013,2014,2015,2016,2018,2023,2024 Genome Research Ltd. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . diff --git a/lib/npg_tracking/illumina/runfolder.pm b/lib/npg_tracking/illumina/runfolder.pm index 39e1939b..7e3082b7 100644 --- a/lib/npg_tracking/illumina/runfolder.pm +++ b/lib/npg_tracking/illumina/runfolder.pm @@ -8,9 +8,6 @@ package npg_tracking::illumina::runfolder; use Moose; use MooseX::StrictConstructor; use namespace::autoclean; -use Carp; -use File::Spec; -use List::Util qw(first); our $VERSION = '0'; @@ -36,17 +33,9 @@ directory heirarchy and from the files within it. =cut -with qw/npg_tracking::illumina::run::folder - npg_tracking::illumina::run::short_info/; +with 'npg_tracking::illumina::run::folder'; with 'npg_tracking::illumina::run::long_info'; -sub _build_run_folder { - my $self = shift; - ($self->subpath or $self->has_id_run) - or croak 'Need a path or id_run to work out a run_folder'; - return first {$_ ne q()} reverse File::Spec->splitdir($self->runfolder_path); -} - __PACKAGE__->meta->make_immutable; 1; @@ -63,12 +52,6 @@ __PACKAGE__->meta->make_immutable; =item MooseX::StrictConstructor -=item Carp - -=item File::Spec - -=item List::Utils - =item namespace::autoclean =back diff --git a/t/.npg/npg_tracking b/t/.npg/npg_tracking index 5a7b7c0d..072e666a 100644 --- a/t/.npg/npg_tracking +++ b/t/.npg/npg_tracking @@ -1,12 +1,9 @@ #!/usr/bin/env perl my $VAR1 = { 'staging_areas' => { - 'indexes' => [18 .. 32, 34 .. 47, 49 .. 55], - 'prefix' => '/nfs/sf', + 'prefix' => '/tmp/esa-sv-*' }, 'staging_areas2webservers' => { - 'gs01' => {'npg_tracking' => 'http://gso1.san.ac.uk:678', - 'seqqc' => 'http://gso1.san.ac.uk:999'}, 'default' => {'npg_tracking' => 'http://some.san.ac.uk:678', 'seqqc' => 'http://some.san.ac.uk:999'}, 'esa-sv' => {'npg_tracking' => 'http://esa-sv.dnap.san.ac.uk:678', diff --git a/t/10-npg_tracking-util-config.t b/t/10-npg_tracking-util-config.t index 78f961e4..5efd68e2 100644 --- a/t/10-npg_tracking-util-config.t +++ b/t/10-npg_tracking-util-config.t @@ -28,7 +28,7 @@ is($config->{'root'}, '/lustre/scratch109/srpipe/', 'ref rep root retrieved'); $config = npg_tracking::util::config::get_config_staging_areas; isa_ok ($config, 'HASH'); -is($config->{'prefix'}, '/nfs/sf', 'prefix retrieved'); +is($config->{'prefix'}, '/tmp/esa-sv-*', 'prefix retrieved'); $config = npg_tracking::util::config::get_config_users; isa_ok ($config, 'HASH'); diff --git a/t/20-view-run.t b/t/20-view-run.t index 06e9ce51..951a35fd 100644 --- a/t/20-view-run.t +++ b/t/20-view-run.t @@ -29,19 +29,18 @@ my $util = t::util->new({fixtures => 1,}); isa_ok($view, 'npg::view::run', 'isa npg::view::run'); my $default_urls = {'npg_tracking' => 'http://some.san.ac.uk:678', 'seqqc' => 'http://some.san.ac.uk:999'}; - my $gs01_urls = {'npg_tracking' => 'http://gso1.san.ac.uk:678', - 'seqqc' => 'http://gso1.san.ac.uk:999'}; + my $esa_urls = {'npg_tracking' => 'http://esa-sv.dnap.san.ac.uk:678', + 'seqqc' => 'http://esa-sv.dnap.san.ac.uk:999'}; is_deeply($view->staging_urls(), $default_urls, 'no args, default urls returned'); is_deeply($view->staging_urls('host'), $default_urls, 'no match args, default urls returned'); - is_deeply($view->staging_urls('gs01'), $gs01_urls, + is_deeply($view->staging_urls('esa-sv'), $esa_urls, 'matching host args, correct host-specific urls returned'); my $name = 'esa-sv-20180707'; - my $esa_urls = {'npg_tracking' => qq[http://${name}.dnap.san.ac.uk:678], - 'seqqc' => qq[http://${name}.dnap.san.ac.uk:999]}; - + $esa_urls = {'npg_tracking' => qq[http://${name}.dnap.san.ac.uk:678], + 'seqqc' => qq[http://${name}.dnap.san.ac.uk:999]}; is_deeply($view->staging_urls($name), $default_urls, 'default urls - no run id, so not on staging'); @@ -52,8 +51,7 @@ my $util = t::util->new({fixtures => 1,}); id_run => 8, }), }); - is_deeply($view->staging_urls($name), $esa_urls, - 'run on staging, esa urls'); + is_deeply($view->staging_urls($name), $esa_urls, 'run on staging, esa urls'); } { diff --git a/t/60-illumina-run-folder.t b/t/60-illumina-run-folder.t index 7279f6e2..918bd2ef 100644 --- a/t/60-illumina-run-folder.t +++ b/t/60-illumina-run-folder.t @@ -1,175 +1,288 @@ use strict; use warnings; -use Test::More tests => 5; +use Test::More tests => 6; use Test::Exception; use Test::Warn; use File::Temp qw(tempdir); use File::Path qw(make_path remove_tree); -use File::Spec::Functions qw(catfile); -use Moose::Meta::Class; use Cwd; +use Try::Tiny; + +use t::dbic_util; BEGIN { + # Test staging area prefix is defined in t/.npg/npg_tracking. + # This config. file is used by this test. + # prefix defined as /tmp/esa-sv-* local $ENV{'HOME'}=getcwd().'/t'; + # Force reading 'npg_tracking config file. use_ok(q{npg_tracking::illumina::run::folder}); } -################## start of test class #################### +################## start of test classes ################## { package test::run::folder; use Moose; - use File::Spec::Functions qw(splitdir); - use List::Util qw(first); - - with qw{npg_tracking::illumina::run::short_info - npg_tracking::illumina::run::folder}; + with qw{npg_tracking::illumina::run::folder}; +} - sub _build_run_folder { - my ($self) = @_; - my $path = $self->runfolder_path(); - return first {$_ ne q()} reverse splitdir($path); - } +{ + package test::nvx_short_info; + use Moose; + with 'npg_tracking::illumina::run::folder'; - no Moose; + has experiment_name => (is => 'rw'); } -################## end of test class #################### +################## end of test classes #################### package main; -my $basedir = tempdir( CLEANUP => 1 ); -local $ENV{dev} = qw{non_existant_dev_enviroment}; #prevent pickup of user's config -local $ENV{TEST_DIR} = $basedir; #so when npg_tracking::illumina::run::folder globs the test director - -subtest 'standard runfolder' => sub { - plan tests => 25; - - my $instr = 'HS2'; - my $id_run = 1234; - my $run_folder = 'test_folder'; - my $runfolder_path = qq{$basedir/nfs/sf44/} . $instr . - q{/analysis/} . $run_folder; - my $data_subpath = $runfolder_path . q{/Data}; - my $intensities_subpath = $data_subpath . q{/Intensities}; - my $basecalls_subpath = $intensities_subpath . q{/BaseCalls}; - my $bbcalls_subpath = $intensities_subpath . q{/BAM_basecalls_2009-10-01}; - my $pb_cal_subpath = $bbcalls_subpath . q{/no_cal}; - my $archive_subpath = $pb_cal_subpath . q{/archive}; - my $no_archive_subpath = $bbcalls_subpath . q{/no_archive}; - my $pp_archive_subpath = $bbcalls_subpath . q{/pp_archive}; - my $qc_subpath = $archive_subpath . q{/qc}; - my $config_path = $runfolder_path . q{/Config}; - - my $delete_staging = sub { - remove_tree qq{$basedir/nfs}; - }; - my $create_staging = sub { - $delete_staging->(); - make_path $qc_subpath; - make_path $config_path; - }; - - $create_staging->(); +my $basedir = tempdir( + template => 'esa-sv-XXXXXXXXXX', TMPDIR => 1, CLEANUP => 1); + +my $schema = t::dbic_util->new->test_schema( + fixture_path => q[t/data/dbic_fixtures]); + + +subtest 'set and build id_run and run_folder attributes' => sub { + plan tests => 8; + + throws_ok { + test::run::folder->new( + run_folder => q[export/sv03/my_folder], + npg_tracking_schema => undef + ) + } qr{Attribute \(run_folder\) does not pass the type constraint}, + 'error supplying a directory path as the run_folder attribute value'; + + throws_ok { + test::run::folder->new(run_folder => q[], npg_tracking_schema => undef) + } qr{Attribute \(run_folder\) does not pass the type constraint}, + 'error supplying an empty atring as the run_folder attribute value'; + + my $obj = test::run::folder->new( + run_folder => q[my_folder], + id_run => 1234, + npg_tracking_schema => undef + ); + is ($obj->run_folder, 'my_folder', 'the run_folder value is as set'); + is ($obj->id_run, 1234, 'id_run value is as set'); + + $obj = test::run::folder->new( + run_folder => q[my_folder], + npg_tracking_schema => undef + ); + throws_ok { $obj->id_run } qr{Unable to identify id_run with data provided}, + 'error building id_run'; + + $obj = test::run::folder->new( + run_folder => 'xxxxxx', + npg_tracking_schema => $schema + ); + throws_ok { $obj->id_run } qr{Unable to identify id_run with data provided}, + 'error building id_run when no db record for the run folder exists'; + + my $rf = q[20231017_LH00210_0012_B22FCNFLT3]; + + { + # DB schema handle is not set, an attempt to build it will be made. + # Since the user HOME is reset, the file with db credentials does not exist. + local $ENV{'HOME'}=getcwd().'/t'; + $obj = test::run::folder->new(run_folder => $rf); + throws_ok { $obj->id_run } qr{Unable to identify id_run with data provided}, + 'error building id_run'; + } + + $obj = test::run::folder->new( + run_folder => $rf, + npg_tracking_schema => $schema + ); + is ($obj->id_run, 47995, 'id_run value retrieved from the database record'); +}; + +subtest 'test id_run extraction from within experiment_name' => sub { + plan tests => 8; + + my $short_info; + { + # DB schema handle is not set, an attempt to build it will be made. + # Since the user HOME is reset, the file with db credentials does not exist. + local $ENV{'HOME'}=getcwd().'/t'; + + $short_info = test::nvx_short_info->new( + experiment_name => '45678_NVX1_A', + run_folder => 'not_a_folder' + ); + my $id_run; + warning_like { $id_run = $short_info->id_run } + qr /Unable to connect to NPG tracking DB for faster globs/, + 'warning about a failure to connect to the database'; + is($id_run, '45678', 'id_run parsed from experiment name'); + } + + $short_info = test::nvx_short_info->new( + experiment_name => ' 45678_NVX1_A ', + run_folder => 'not_a_folder', + npg_tracking_schema => undef + ); + is($short_info->id_run, '45678', + 'id_run parsed from loosely formatted experiment name'); + + $short_info = test::nvx_short_info->new( + experiment_name => '45678_NVX1_A ', + run_folder => 'not_a_folder', + npg_tracking_schema => undef + ); + is($short_info->id_run, '45678', + 'id_run parsed from experiment name with postfix spaces'); + + $short_info = test::nvx_short_info->new( + experiment_name => ' 45678_NVX1_A', + run_folder => 'not_a_folder', + npg_tracking_schema => undef + ); + is($short_info->id_run, '45678', + 'id_run parsed from experiment name with prefixed spaces'); + + $short_info = test::nvx_short_info->new( + experiment_name => '45678', + run_folder => 'not_a_folder', + npg_tracking_schema => undef + ); + is($short_info->id_run, '45678', 'Bare id_run as experiment name is fine'); + + $short_info = test::nvx_short_info->new( + experiment_name => 'NovaSeqX_WHGS_TruSeqPF_NA12878', + run_folder => 'not_a_folder', + npg_tracking_schema => undef + ); + throws_ok { $short_info->id_run } + qr{Unable to identify id_run with data provided}, + 'Custom run name cannot be parsed'; + + $short_info = test::nvx_short_info->new( + id_run => '45678', + experiment_name => '56789_NVX1_A', + run_folder => 'not_a_folder', + npg_tracking_schema => undef + ); + is($short_info->id_run, '45678', 'Set id_run wins over experiment_name'); +}; + +subtest 'standard runfolder, no DB access' => sub { + plan tests => 20; + + my $run_folder = q{20231019_LH00275_0006_B19NJCA4LE}; + my $runfolder_path = join q{/}, $basedir, q{IL_seq_data}, + q{analysis}, $run_folder; + + my $paths = {}; + $paths->{runfolder_path} = $runfolder_path; + $paths->{data_subpath} = $runfolder_path . q{/Data}; + $paths->{intensities_subpath} = $paths->{data_subpath} . q{/Intensities}; + $paths->{basecalls_subpath} = $paths->{intensities_subpath} . q{/BaseCalls}; + $paths->{bbcalls_subpath} = $paths->{intensities_subpath} . + q{/BAM_basecalls_20240223-125418}; + $paths->{pb_cal_subpath} = $paths->{bbcalls_subpath} . q{/no_cal}; + $paths->{archive_subpath} = $paths->{pb_cal_subpath} . q{/archive}; + $paths->{no_archive_subpath} = $paths->{bbcalls_subpath} . q{/no_archive}; + $paths->{pp_archive_subpath} = $paths->{bbcalls_subpath} . q{/pp_archive}; + $paths->{qc_subpath} = $paths->{archive_subpath} . q{/qc}; + + for my $path (values %{$paths}) { + make_path($path); + } my $path_info; - lives_ok { $path_info = test::run::folder->new( - id_run => $id_run, run_folder => $run_folder - ) } q{created role_test object ok}; + lives_ok { + $path_info = test::run::folder->new( + run_folder => $run_folder, + npg_tracking_schema => undef + ) + } q{created role_test object ok}; + is($path_info->runfolder_path, $runfolder_path, q{runfolder_path found}); my $p; - warning_like { $p = $path_info->runfolder_path() } - qr/Unable to connect to NPG tracking DB for faster globs/, - 'expected warnings'; - is($p, $runfolder_path, q{runfolder_path found}); warning_like { $p = $path_info->recalibrated_path() } qr/Latest_Summary does not exist or is not a link/, - 'warning about lt absence'; - is($p, $pb_cal_subpath, 'recalibrated path'); - is($path_info->analysis_path(), $bbcalls_subpath, + 'warning about Latest_Summary absence'; + is($p, $paths->{pb_cal_subpath}, 'recalibrated path'); + is($path_info->analysis_path(), $paths->{bbcalls_subpath}, q{found a recalibrated directory, so able to work out analysis_path}); - is($path_info->archive_path(), $archive_subpath, q{archive path}); - is($path_info->no_archive_path(), $no_archive_subpath, q{no_archive path}); - is($path_info->pp_archive_path(), $pp_archive_subpath, q{pp_archive path}); - is($path_info->qc_path(), $qc_subpath, q{qc path}); - is($path_info->basecall_path(), $basecalls_subpath, q{basecall path}); - is($path_info->dragen_analysis_path(), "$runfolder_path/Analysis", + is($path_info->archive_path(), $paths->{archive_subpath}, q{archive path}); + is($path_info->no_archive_path(), $paths->{no_archive_subpath}, + q{no_archive path}); + is($path_info->pp_archive_path(), $paths->{pp_archive_subpath}, + q{pp_archive path}); + is($path_info->qc_path(), $paths->{qc_subpath}, q{qc path}); + is($path_info->basecall_path(), $paths->{basecalls_subpath}, q{basecall path}); + is($path_info->dragen_analysis_path(), $runfolder_path . q{/Analysis}, q{DRAGEN analysis path}); - lives_ok { $path_info = test::run::folder->new(subpath => $archive_subpath) } - q{created role_test object ok}; + $path_info = test::run::folder->new( + subpath => $paths->{archive_subpath}, + npg_tracking_schema => undef + ); is($path_info->runfolder_path(), $runfolder_path, q{runfolder_path found}); warning_like { $p = $path_info->recalibrated_path() } qr/Latest_Summary does not exist or is not a link/, 'warning about Latest_Summary absence'; - is($p, $pb_cal_subpath, q{recalibrated_path found}); - is($path_info->analysis_path(), $bbcalls_subpath, 'analysis path'); + is($p, $paths->{pb_cal_subpath}, q{recalibrated_path found}); + is($path_info->analysis_path(), $paths->{bbcalls_subpath}, 'analysis path'); - $create_staging->(); my $ls = qq{$runfolder_path/Latest_Summary}; - symlink $pb_cal_subpath, $ls; - my $other = qq{$intensities_subpath/BAM_basecalls_2019-10-01}; + symlink $paths->{pb_cal_subpath}, $ls; + my $other = $paths->{intensities_subpath} . q{/BAM_basecalls_2019-10-01}; make_path $other; - $path_info = test::run::folder->new( - id_run => $id_run, run_folder => $run_folder); - warning_like { $p = $path_info->runfolder_path() } - qr/Unable to connect to NPG tracking DB for faster globs/, - 'expected warnings'; - is($p, $runfolder_path, q{runfolder_path found}); - is($path_info->basecall_path(), $basecalls_subpath, + run_folder => $run_folder, + npg_tracking_schema => undef + ); + is($path_info->runfolder_path, $runfolder_path, q{runfolder_path found}); + is($path_info->basecall_path(), $paths->{basecalls_subpath}, q{basecalls_path found when Latest_Summary link is present}); unlink $ls; $path_info = test::run::folder->new( - id_run => $id_run, run_folder => $run_folder); - warning_like { $p = $path_info->runfolder_path() } - qr/Unable to connect to NPG tracking DB for faster globs/, - 'expected warnings'; - is($p, $runfolder_path, q{runfolder_path found}); + run_folder => $run_folder, + npg_tracking_schema => undef + ); + is($path_info->runfolder_path, $runfolder_path, q{runfolder_path found}); throws_ok { $path_info->recalibrated_path() } qr/Multiple bam_basecall directories in the intensity directory/, 'multiple bam_basecall directories cannot be resolved ' . 'without the Latest_Summary link'; remove_tree $other; - remove_tree $bbcalls_subpath; + remove_tree $paths->{bbcalls_subpath}; throws_ok { $path_info->recalibrated_path() } qr/bam_basecall directory not found in the intensity directory/, 'absence of bam_basecall directory is an error'; - - $create_staging->(); - $path_info = test::run::folder->new( - id_run => $id_run, - run_folder => $run_folder, - recalibrated_path => qq{$bbcalls_subpath/Help}, - ); - is( $path_info->analysis_path(), $bbcalls_subpath, q{analysis path inferred} ); - - $delete_staging->(); }; -subtest 'runfolder with unusual structure' => sub { +subtest 'runfolder with an unusual path, no DB access' => sub { plan tests => 12; my $path = join q[/], $basedir, qw/aa bb cc dd/; make_path $path; - my $rf = test::run::folder->new(archive_path => $path); - throws_ok { $rf->runfolder_path } - qr/Nothing looks like a run_folder in any given subpath/, - 'cannot infer intensity_path'; - throws_ok { $rf->intensity_path } - qr/Nothing looks like a run_folder in any given subpath/, - 'cannot infer intensity_path'; - throws_ok { $rf->basecall_path } - qr/Nothing looks like a run_folder in any given subpath/, - 'cannot infer basecall_path'; - throws_ok { $rf->recalibrated_path } - qr/Nothing looks like a run_folder in any given subpath/, - 'cannot infer recalibrated_path'; + my $rf = test::run::folder->new( + archive_path => $path, + npg_tracking_schema => undef + ); + my @methods = map {$_ .'_path'} qw/runfolder intensity basecall recalibrated/; + for my $path_method (@methods) { + throws_ok { $rf->$path_method } + qr/Nothing looks like a run folder in any subpath/, + "cannot infer $path_method"; + } is ($rf->bam_basecall_path, undef, 'bam_basecall_path not set'); is ($rf->analysis_path, join(q[/], $basedir, qw/aa bb/), 'analysis path'); - $rf = test::run::folder->new( runfolder_path => $basedir, - archive_path => $path ); + $rf = test::run::folder->new( + runfolder_path => $basedir, + archive_path => $path, + npg_tracking_schema => undef + ); is ($rf->intensity_path, "$basedir/Data/Intensities", 'intensity_path returned though it does not exist'); is ($rf->basecall_path, "$basedir/Data/Intensities/BaseCalls", @@ -190,7 +303,10 @@ subtest 'setting bam_basecall_path' => sub { my $path = join q[/], $basedir, qw/ee/; make_path $path; - my $rf = test::run::folder->new(runfolder_path => $path); + my $rf = test::run::folder->new( + runfolder_path => $path, + npg_tracking_schema => undef + ); is ($rf->bam_basecall_path, undef, 'bam_basecall_path is not set'); ok (!$rf->has_bam_basecall_path(), 'bam_basecall_path is not set'); is ($rf->analysis_path, q{}, 'analysis path is empty'); @@ -204,57 +320,19 @@ subtest 'setting bam_basecall_path' => sub { qr/bam_basecall is already set to $expected/, 'bam_basecall_path can be set only once'; - $rf = test::run::folder->new(runfolder_path => $path); + $rf = test::run::folder->new( + runfolder_path => $path, + npg_tracking_schema => undef + ); like ($rf->set_bam_basecall_path(), qr/BAM_basecalls_\d+/, 'setting bam_basecall_path without a custom suffix'); - $rf = test::run::folder->new(runfolder_path => $path); + $rf = test::run::folder->new( + runfolder_path => $path, + npg_tracking_schema => undef + ); is ($rf->set_bam_basecall_path('t/data', 1), 't/data', 'bam_basecall_path is set to the path given'); }; -subtest 'standard run folder No 2' => sub { - plan tests => 10; - - my $hs_runfolder_dir = qq{$basedir/nfs/sf44/ILorHSany_sf20/incoming/100914_HS3_05281_A_205MBABXX}; - make_path qq{$hs_runfolder_dir/Data/Intensities/BAM_basecalls_20101016-172254/no_cal/archive}; - make_path qq{$hs_runfolder_dir/Config}; - symlink q{Data/Intensities/BAM_basecalls_20101016-172254/no_cal}, - qq{$hs_runfolder_dir/Latest_Summary}; - - my $linked_dir = readlink ( $hs_runfolder_dir . q{/Latest_Summary} ); - - my $o = test::run::folder->new( - runfolder_path => $hs_runfolder_dir, - ); - my $recalibrated_path; - lives_ok { $recalibrated_path = $o->recalibrated_path; } 'recalibrated_path from runfolder_path and summary link'; - cmp_ok( $recalibrated_path, 'eq', $hs_runfolder_dir . q(/Data/Intensities/BAM_basecalls_20101016-172254/no_cal), 'recalibrated_path from summary link' ); - cmp_ok( $o->analysis_path, 'eq', $hs_runfolder_dir . q(/Data/Intensities/BAM_basecalls_20101016-172254), 'analysis_path from summary link' ); - cmp_ok( $o->basecall_path, 'eq', $hs_runfolder_dir . q(/Data/Intensities/BaseCalls), 'basecall_path from summary link' ); - cmp_ok( $o->runfolder_path, 'eq', $hs_runfolder_dir, 'runfolder_path from summary link (no_cal)' ); - - $o = test::run::folder->new( - archive_path => $hs_runfolder_dir . - q(/Data/Intensities/BAM_basecalls_20101016-172254/no_cal/archive) - ); - cmp_ok( $o->analysis_path, 'eq', $hs_runfolder_dir . q(/Data/Intensities/BAM_basecalls_20101016-172254), - 'analysis_path directly from archiva_path'); - cmp_ok( $o->recalibrated_path, 'eq', $hs_runfolder_dir . q(/Data/Intensities/BAM_basecalls_20101016-172254/no_cal), 'recalibrated_path from archive_path' ); - cmp_ok( $o->basecall_path, 'eq', $hs_runfolder_dir . q(/Data/Intensities/BaseCalls), 'basecall_path from archive_path' ); - cmp_ok( $o->runfolder_path, 'eq', $hs_runfolder_dir, 'runfolder_path from archive_path' ); - - unlink qq{$hs_runfolder_dir/Latest_Summary}; - # link points to non-existing directory - symlink q{Data/Intensities/Bustard1.8.1a2_01-10-2010_RTA.2/PB_cal}, - qq{$hs_runfolder_dir/Latest_Summary}; - - $o = test::run::folder->new( - runfolder_path => $hs_runfolder_dir, - ); - throws_ok { $o->recalibrated_path; } - qr/is not a directory, cannot be the recalibrated path/, - 'link points to non-existing directory - error'; -}; - 1; diff --git a/t/60-illumina-run-short_info.t b/t/60-illumina-run-short_info.t deleted file mode 100644 index dc2cdbce..00000000 --- a/t/60-illumina-run-short_info.t +++ /dev/null @@ -1,116 +0,0 @@ -use strict; -use warnings; -use Test::More tests => 5; -use Test::Exception; -use Moose::Meta::Class; - -use t::dbic_util; - -use_ok(q{npg_tracking::illumina::run::short_info}); - -my $schema = t::dbic_util->new->test_schema( - fixture_path => q[t/data/dbic_fixtures]); - -my $rfname = q[20231017_LH00210_0012_B22FCNFLT3]; - -# Start of package test::short_info -package test::short_info; -use Moose; -with qw{npg_tracking::illumina::run::short_info}; - -sub _build_run_folder { return $rfname; } -# End of package test::short_info - -# Start of package test::db_short_info -package test::db_short_info; -use Moose; -use npg_tracking::Schema; -with qw{npg_tracking::illumina::run::short_info}; - -has q{npg_tracking_schema} => ( - isa => 'npg_tracking::Schema', - is => 'ro', - default => sub { return $schema }, -); -# End of package test::db_short_info - -# Start of package test::nvx_short_info -package test::nvx_short_info; -use Moose; -with 'npg_tracking::illumina::run::short_info'; - -has experiment_name => (is => 'rw'); -# End of package test::nvx_short_info - -package main; - -subtest 'object derived directly from the role' => sub { - plan tests => 6; - - my $class = Moose::Meta::Class->create_anon_class( - roles => [qw/npg_tracking::illumina::run::short_info/] - ); - - throws_ok { $class->new_object(id_run => 1234)->run_folder() } - qr{does not support builder method '_build_run_folder'}, - q{Error thrown as no _build_run_folder method in class}; - - throws_ok { $class->new_object(run_folder => q[export/sv03/my_folder]) } - qr{Attribute \(run_folder\) does not pass the type constraint}, - 'error supplying a directory path as the run_folder attribute value'; - - throws_ok { $class->new_object(run_folder => q[]) } - qr{Attribute \(run_folder\) does not pass the type constraint}, - 'error supplying an empty atring as the run_folder attribute value'; - - my $obj = $class->new_object(run_folder => q[my_folder], id_run => 1234); - is ($obj->run_folder, 'my_folder', 'the run_folder value is as set'); - is ($obj->id_run, 1234, 'id_run value is as set'); - - throws_ok { $class->new_object(run_folder => q[my_folder])->id_run } - qr{Unable to identify id_run with data provided}, - 'error building id_run'; -}; - -subtest 'object with a bulder method for run_folder' => sub { - plan tests => 1; - - is (test::short_info->new(id_run => 47995)->run_folder, $rfname, - 'value of run_folder attribute is set by the builder method'); -}; - -subtest 'object with access to tracking database' => sub { - plan tests => 2; - - throws_ok { test::db_short_info->new(run_folder => 'xxxxxx')->id_run } - qr{Unable to identify id_run with data provided}, - 'error building id_run when no db record for the run folder exists'; - is (test::db_short_info->new(run_folder => $rfname)->id_run, 47995, - 'id_run value retrieved from the database recprd'); -}; - -subtest 'Test id_run extraction from within experiment_name' => sub { - plan tests => 7; - my $short_info = test::nvx_short_info->new(experiment_name => '45678_NVX1_A', run_folder => 'not_a_folder'); - is($short_info->id_run, '45678', 'id_run parsed from experiment name'); - - $short_info = test::nvx_short_info->new(experiment_name => ' 45678_NVX1_A ', run_folder => 'not_a_folder'); - is($short_info->id_run, '45678', 'id_run parsed from loosely formatted experiment name'); - - $short_info = test::nvx_short_info->new(experiment_name => '45678_NVX1_A ', run_folder => 'not_a_folder'); - is($short_info->id_run, '45678', 'id_run parsed from experiment name with postfix spaces'); - - $short_info = test::nvx_short_info->new(experiment_name => ' 45678_NVX1_A', run_folder => 'not_a_folder'); - is($short_info->id_run, '45678', 'id_run parsed from experiment name with prefixed spaces'); - - $short_info = test::nvx_short_info->new(experiment_name => '45678', run_folder => 'not_a_folder'); - is($short_info->id_run, '45678', 'Bare id_run as experiment name is fine'); - - $short_info = test::nvx_short_info->new(experiment_name => 'NovaSeqX_WHGS_TruSeqPF_NA12878', run_folder => 'not_a_folder'); - throws_ok { $short_info->id_run } qr{Unable to identify id_run with data provided}, 'Custom run name cannot be parsed'; - - $short_info = test::nvx_short_info->new(id_run => '45678', experiment_name => '56789_NVX1_A', run_folder => 'not_a_folder'); - is($short_info->id_run, '45678', 'Set id_run wins over experiment_name'); -}; - -1;