diff --git a/MANIFEST b/MANIFEST index 2d93e029..4450e22e 100644 --- a/MANIFEST +++ b/MANIFEST @@ -38,7 +38,6 @@ lib/npg_pipeline/function/autoqc.pm lib/npg_pipeline/function/autoqc/generic.pm lib/npg_pipeline/function/autoqc_archiver.pm lib/npg_pipeline/function/bqsr_calc.pm -lib/npg_pipeline/function/cache_merge_component.pm lib/npg_pipeline/function/cluster_count.pm lib/npg_pipeline/function/current_analysis_link.pm lib/npg_pipeline/function/definition.pm @@ -104,7 +103,6 @@ t/20-function-autoqc.t t/20-function-autoqc-generic.t t/20-function-autoqc_archiver.t t/20-function-bqsr_calc.t -t/20-function-cache_merge_component.t t/20-function-cluster_count.t t/20-function-current_analysis_link.t t/20-function-definition.t diff --git a/data/config_files/function_list_post_qc_review.json b/data/config_files/function_list_post_qc_review.json index 8d31602c..c95821eb 100644 --- a/data/config_files/function_list_post_qc_review.json +++ b/data/config_files/function_list_post_qc_review.json @@ -26,11 +26,6 @@ "source": "update_ml_warehouse", "target": "archive_to_s3" }, - { - "relation": "dependsOn", - "source": "update_ml_warehouse", - "target": "cache_merge_component" - }, { "relation": "dependsOn", "source": "archive_run_data_to_irods", @@ -56,11 +51,6 @@ "source": "archive_irods_locations_to_ml_warehouse", "target": "run_run_archived" }, - { - "relation": "dependsOn", - "source": "cache_merge_component", - "target": "run_run_archived" - }, { "relation": "dependsOn", "source": "run_run_archived", @@ -204,14 +194,6 @@ "resources": {} } }, - { - "id": "cache_merge_component", - "label": "cache_merge_component", - "metadata": { - "description": "Cached products that are due for a top-up in a directory outside the run folder (if configured)", - "resources": {} - } - }, { "id": "upload_auto_qc_to_qc_database", "label": "upload_auto_qc_to_qc_database", diff --git a/lib/npg_pipeline/base/options.pm b/lib/npg_pipeline/base/options.pm index 5c6e4555..05aa75d8 100644 --- a/lib/npg_pipeline/base/options.pm +++ b/lib/npg_pipeline/base/options.pm @@ -62,20 +62,6 @@ sub _default_to_local { return $self->local; } -=head2 no_cache_merge_component - -Switches off caching of data products suitable for later merging - -=cut - -has q{no_cache_merge_component} => ( - isa => q{Bool}, - is => q{ro}, - lazy => 1, - builder => '_default_to_local', - documentation => q{Switches off caching of data products suitable for later merging.}, -); - =head2 no_s3_archival Switches off archival to s3. diff --git a/lib/npg_pipeline/function/cache_merge_component.pm b/lib/npg_pipeline/function/cache_merge_component.pm deleted file mode 100644 index b26afe6f..00000000 --- a/lib/npg_pipeline/function/cache_merge_component.pm +++ /dev/null @@ -1,197 +0,0 @@ -package npg_pipeline::function::cache_merge_component; - -use namespace::autoclean; - -use File::Basename; -use File::Spec::Functions qw{catdir catfile}; -use Moose; -use MooseX::StrictConstructor; -use Readonly; -use List::Util qw(all); - -extends 'npg_pipeline::base_resource'; - -with qw{npg_pipeline::product::cache_merge}; - -Readonly::Scalar my $LINK_EXECUTABLE => 'ln'; - -our $VERSION = '0'; - -=head2 create - - Arg [1] : None - - Example : my $defs = $obj->create - Description: Create per-product data file function definitions - for caching files eligible as top-up candidates. - - Returntype : ArrayRef[npg_pipeline::function::definition] - -=cut - -sub create { - my ($self) = @_; - - my $id_run = $self->id_run(); - my $job_name = sprintf q{%s_%d}, $LINK_EXECUTABLE, $id_run; - - my @products = $self->no_cache_merge_component ? () : - grep { $self->is_cacheable($_) } - @{$self->products->{data_products}}; - my @definitions = (); - - foreach my $product (@products) { - my $destdir = $self->merge_component_cache_dir($product); - - my @file_paths = $self->expected_files($product); - $self->_check_files(@file_paths); - - my @commands; - foreach my $file_path (@file_paths) { - my $filename = basename($file_path); - - push @commands, sprintf q{%s %s %s}, - $LINK_EXECUTABLE, $file_path, $destdir; - } - - my $command = join q{ && }, qq(mkdir -p $destdir), reverse @commands; - $self->debug("Adding command '$command'"); - - push @definitions, $self->create_definition({ - job_name => $job_name, - command => $command, - composition => $product->composition() - }) - } - - if (not @definitions) { - push @definitions, $self->create_excluded_definition(); - } - - return \@definitions; -} - -sub _check_files { - my ($self, @file_paths) = @_; - - my @missing = grep { not -e } @file_paths; - - if (@missing) { - $self->logcroak('Failed to cache files; the following files ', - 'are missing: ', join q{ }, @missing); - } - - return; -} - -=head2 is_cacheable - - Arg [1] : npg_pipeline::product - - Example : $obj->is_cacheable($product) - Description: Return true if the product should be cached for a later - top-up or merge - cache is configured, seq QC Pass, - lib QC undecided - - Returntype : Bool - -=cut - -sub is_cacheable { - my ($self, $product) = @_; - - my $rpt = $product->rpt_list(); - my $name = $product->file_name_root(); - - if ($self->is_release_data($product) and - $self->merge_component_study_cache_dir($product)) { - - my @seqqc = $product->final_seqqc_objs($self->qc_schema); - @seqqc or $self->logcroak("Product $name, $rpt are not all Final seq QC values"); - - if(not all { $_->is_accepted } @seqqc) { - $self->info("Product $name, $rpt are not all Final Accepted seq QC values"); - return 0; - } - - my $libqc_obj = $product->final_libqc_obj($self->qc_schema); - # Lib outcomes are not available for full lane libraries, so the code below - # might give an error when absence of QC outcome is legitimate. - $libqc_obj or $self->logcroak("Product $name, $rpt is not Final lib QC value"); - if (not $libqc_obj->is_undecided) { - $self->info("Product $name, $rpt has Final lib QC value which is not undecided and so is NOT eligible for caching"); - return 0; - } - - return 1; - } - - $self->info("Study for product $name, $rpt is NOT configured for caching"); - return 0; -} - -__PACKAGE__->meta->make_immutable; - -1; - -__END__ - -=head1 NAME - -npg_pipeline::function::cache_merge_component - -=head1 SYNOPSIS - - my $obj = npg_pipeline::function::cache_merge_component->new - (runfolder_path => $runfolder_path); - -=head1 DESCRIPTION - -Caches a data product ready for merging with top-up data. - -Caching is configured per-study using the configuration file -product_release.yml, see npg_pipeline::product::release. - - -=head1 SUBROUTINES/METHODS - -=head1 BUGS AND LIMITATIONS - -=head1 INCOMPATIBILITIES - -=head1 DIAGNOSTICS - -=head1 CONFIGURATION AND ENVIRONMENT - -=head1 DEPENDENCIES - -=over - -=item Moose - -=item MooseX::StrictConstructor - -=item Readonly - -=back - -=head1 AUTHOR - -David K. Jackson - -=head1 LICENSE AND COPYRIGHT - -Copyright (C) 2019 Genome Research Ltd. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . diff --git a/lib/npg_pipeline/pluggable/registry.pm b/lib/npg_pipeline/pluggable/registry.pm index 20760681..8e6afff6 100644 --- a/lib/npg_pipeline/pluggable/registry.pm +++ b/lib/npg_pipeline/pluggable/registry.pm @@ -58,7 +58,6 @@ Readonly::Hash my %REGISTRY => ( 'bam_cluster_counter_check'=> {'cluster_count' => 'create'}, 'seqchksum_comparator' => {'seqchksum_comparator' => 'create'}, 'archive_to_s3' => {'s3_archiver' => 'create'}, - 'cache_merge_component' => {'cache_merge_component' => 'create'}, 'archive_to_irods_samplesheet' => {'seq_to_irods_archiver' => {method => 'create', lims_driver_type =>'samplesheet'}}, diff --git a/t/20-function-cache_merge_component.t b/t/20-function-cache_merge_component.t deleted file mode 100644 index c34e8d78..00000000 --- a/t/20-function-cache_merge_component.t +++ /dev/null @@ -1,267 +0,0 @@ -use strict; -use warnings; - -use File::Temp; -use Log::Log4perl qw[:levels]; -use File::Temp qw[tempdir]; -use Test::More tests => 8; -use Test::Exception; -use File::Copy::Recursive qw[dircopy]; -use t::util; - -my $temp_dir = tempdir(CLEANUP => 1); -Log::Log4perl->easy_init({level => $INFO, - layout => '%d %p %m %n', - file => join(q[/], $temp_dir, 'logfile')}); - -{ - package TestDB; - use Moose; - - with 'npg_testing::db'; -} - -# See README in fixtures for a description of the test data. -my $qc = TestDB->new - (sqlite_utf8_enabled => 1, - verbose => 0)->create_test_db('npg_qc::Schema', - 't/data/qc_outcomes/fixtures'); - -local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = - 't/data/novaseq/180709_A00538_0010_BH3FCMDRXX/' . - 'Data/Intensities/BAM_basecalls_20180805-013153/' . - 'metadata_cache_26291/samplesheet_26291.csv'; - -my $pkg = 'npg_pipeline::function::cache_merge_component'; -use_ok($pkg); - -my $runfolder_path = 't/data/novaseq/180709_A00538_0010_BH3FCMDRXX'; -my $copy = join q[/], $temp_dir, '180709_A00538_0010_BH3FCMDRXX'; -dircopy $runfolder_path, $copy or die 'Failed to copy run folder'; -$runfolder_path = $copy; - -my $timestamp = '20180701-123456'; - -my $default = { - default => { - minimum_cpu => 1, - memory => 2 - } -}; - -subtest 'local and no_cache_merge_component' => sub { - plan tests => 7; - - my $cacher = $pkg->new - (conf_path => "t/data/release/config/archive_on", - runfolder_path => $runfolder_path, - id_run => 26291, - timestamp => $timestamp, - qc_schema => $qc, - local => 1, - resource => $default - ); - ok($cacher->no_cache_merge_component, 'no_cache_merge_component flag is set to true'); - my $ds = $cacher->create; - is(scalar @{$ds}, 1, 'one definition is returned'); - isa_ok($ds->[0], 'npg_pipeline::function::definition'); - is($ds->[0]->excluded, 1, 'function is excluded'); - - $cacher = $pkg->new - (conf_path => "t/data/release/config/archive_on", - runfolder_path => $runfolder_path, - id_run => 26291, - timestamp => $timestamp, - qc_schema => $qc, - no_cache_merge_component => 1, - resource => $default - ); - ok(!$cacher->local, 'local flag is false'); - $ds = $cacher->create; - is(scalar @{$ds}, 1, 'one definition is returned'); - is($ds->[0]->excluded, 1, 'function is excluded'); -}; - -subtest 'create' => sub { - plan tests => 4 + (1 + 13) * 4; - - #Tags 7, 8, 1, 11, 2, 5 - preliminary results - - my $cacher; - lives_ok { - $cacher = $pkg->new - (conf_path => "t/data/release/config/archive_on", - runfolder_path => $runfolder_path, - id_run => 26291, - timestamp => $timestamp, - qc_schema => $qc, - resource => $default - ); - } 'cacher created ok'; - - throws_ok {$cacher->create} - qr/Product 26291\#1, 26291:1:1;26291:2:1 is not Final lib QC value/, - 'error since some results are preliminary'; - - my $rs = $qc->resultset('MqcLibraryOutcomeEnt'); - # Make all outcomes final - while (my $row = $rs->next) { - if (!$row->has_final_outcome) { - my $shift = $row->is_undecided ? 1 : 2; - $row->update({id_mqc_outcome => $row->id_mqc_outcome + $shift}); - } - } - - my @defs = @{$cacher->create}; - my $num_defs_observed = scalar @defs; - my $num_defs_expected = 4; - cmp_ok($num_defs_observed, '==', $num_defs_expected, - "create returns $num_defs_expected definitions when caching"); - - my @archived_rpts; - foreach my $def (@defs) { - push @archived_rpts, - [map { [$_->id_run, $_->position, $_->tag_index] } - map {$_->components_list} grep {defined} $def->composition]; - } - - is_deeply(\@archived_rpts, - [ - [[26291, 1, 5], [26291, 2, 5]], - [[26291, 1, 6], [26291, 2, 6]], - [[26291, 1,11], [26291, 2,11]], - [[26291, 1,12], [26291, 2,12]] - ], - 'four undecided final cached') - or diag explain \@archived_rpts; - - my $cmd_patt = qr|^ln $runfolder_path/.*/archive/plex\d+/.* /tmp/npg_seq_pipeline/cache_merge_component_test/\w{2}/\w{2}/\w{64}$|; - - foreach my $def (@defs) { - is($def->created_by, $pkg, "created_by is $pkg"); - is($def->identifier, 26291, "identifier is set correctly"); - - my $cmd = $def->command; - my @parts = split / && /, $cmd; # Deconstruct the command - like(shift @parts, qr|^mkdir -p /tmp/npg_seq_pipeline/cache_merge_component_test/\w{2}/\w{2}/\w{64}$|); - foreach my $part (@parts) { - like($part, $cmd_patt, "$cmd matches $cmd_patt"); - } - } -}; - -subtest 'abort_on_missing_files' => sub { - plan tests => 2; - - my $cacher; - lives_ok { - $cacher = $pkg->new - (conf_path => "t/data/release/config/archive_on", - runfolder_path => $runfolder_path, - id_run => 26291, - timestamp => $timestamp, - qc_schema => $qc, - resource => $default - ); - } 'cacher created ok'; - - my $to_move = "$runfolder_path/Data/Intensities/BAM_basecalls_20180805-013153/no_cal/archive/plex12/26291#12.cram"; - my $moved = $to_move . '_moved'; - rename $to_move, $moved or die 'failed to move test file'; - - dies_ok { - $cacher->create; - } 'aborts okay'; - - rename $moved, $to_move or die 'failed to move test file'; -}; - -subtest 'abort_on_missing_lib_qc' => sub { - plan tests => 2; - - $qc->resultset(q(MqcLibraryOutcomeEnt))->search({})->first->delete; - my $cacher; - lives_ok { - $cacher = $pkg->new - (conf_path => "t/data/release/config/archive_on", - runfolder_path => $runfolder_path, - id_run => 26291, - timestamp => $timestamp, - qc_schema => $qc, - resource => $default - ); - } 'cacher created ok'; - - dies_ok { - $cacher->create; - } 'aborts okay'; -}; - -subtest 'no_cache_study' => sub { - plan tests => 2; - - my $cacher = $pkg->new - (conf_path => "t/data/release/config/archive_off", - runfolder_path => $runfolder_path, - id_run => 26291, - timestamp => $timestamp, - qc_schema => $qc, - resource => $default - ); - - my @defs = @{$cacher->create}; - my $num_defs_observed = scalar @defs; - my $num_defs_expected = 1; - cmp_ok($num_defs_observed, '==', $num_defs_expected, - "create returns $num_defs_expected definitions when not archiving") or - diag explain \@defs; - - is($defs[0]->composition, undef, 'definition has no composition') or - diag explain \@defs; -}; - -subtest 'create_with_failed_lane' => sub { - plan tests => 3; - - $qc->resultset(q(MqcOutcomeEnt))->search({id_run=>26291, position=>1})->first->toggle_final_outcome(q(fakeuser)); - my $cacher; - lives_ok { - $cacher = $pkg->new - (conf_path => "t/data/release/config/archive_on", - runfolder_path => $runfolder_path, - id_run => 26291, - timestamp => $timestamp, - qc_schema => $qc, - resource => $default - ); - } 'cacher created ok'; - - my @defs = @{$cacher->create}; - my $num_defs_observed = scalar @defs; - my $num_defs_expected = 1; # single "excluded" - cmp_ok($num_defs_observed, '==', $num_defs_expected, - "create returns $num_defs_expected definitions when caching"); - ok($defs[0] && $defs[0]->excluded, "excluded") -}; - -subtest 'abort_on_missing_seq_qc' => sub { - plan tests => 2; - - $qc->resultset(q(MqcOutcomeEnt))->search({id_run=>26291, position=>1})->first->delete; - my $cacher; - lives_ok { - $cacher = $pkg->new - (conf_path => "t/data/release/config/archive_on", - runfolder_path => $runfolder_path, - id_run => 26291, - timestamp => $timestamp, - qc_schema => $qc, - resource => $default - ); - } 'cacher created ok'; - - dies_ok { - $cacher->create; - } 'aborts okay'; -}; -