From 46d9e3fc5c40cd70fca9e634eb5566ebbd74a07d Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Tue, 9 Apr 2024 15:34:27 +0100 Subject: [PATCH] Added an option to exclude lanes from a merge. --- lib/npg_pipeline/base.pm | 54 ++++++++++++++++++++++++++++++++++------ t/10-base.t | 53 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 97 insertions(+), 10 deletions(-) diff --git a/lib/npg_pipeline/base.pm b/lib/npg_pipeline/base.pm index f4a3852b..cd2841a4 100644 --- a/lib/npg_pipeline/base.pm +++ b/lib/npg_pipeline/base.pm @@ -187,7 +187,10 @@ sub _build_general_values_conf { =head2 merge_lanes Tells p4 stage2 (seq_alignment) to merge all lanes (at their plex level -if plexed) and to run its downstream tasks using corresponding compositions. +if plexed, except spiked PhiX and tag_zero). + +If not set, this attribute is build lazily. It is set to true for NovaSeq runs, +which use a Standard flowcell. =cut @@ -208,6 +211,12 @@ sub _build_merge_lanes { =head2 merge_by_library +Tells p4 stage2 (seq_alignment) to merge all plexes that belong to the same +library, except spiked PhiX and tag_zero. + +If not set, this attribute is build lazily. It is set to true for indexed +NovaSeqX runs. + =cut has q{merge_by_library} => ( @@ -216,13 +225,32 @@ has q{merge_by_library} => ( lazy_build => 1, documentation => q{Tells p4 stage2 (seq_alignment) to merge all plexes } . q{that belong to the same library, except spiked PhiX and }. - q{tag zero)}, + q{tag zero}, ); sub _build_merge_by_library { my $self = shift; return $self->is_indexed && $self->platform_NovaSeqX(); } +=head2 process_separately_lanes + +An array of lane (position) numbers, which should not be merged with anyother +lanes. To be used in conjunction with C or C +attributes. Does not have any impact if both of these attributes are false. + +Defaults to an empty array value, meaning that all possible entities will be +merged. + +=cut + +has q{process_separately_lanes} => ( + isa => q{ArrayRef}, + is => q{ro}, + default => sub { return []; }, + documentation => q{Array of lane numbers, which have to be excluded from } . + q{a merge}, +); + =head2 lims st::api::lims run-level or product-specific object @@ -346,7 +374,8 @@ sub _build_products { if ($self->merge_lanes || $self->merge_by_library) { - my $all_lims = $self->lims->aggregate_libraries(\@lane_lims); + my $all_lims = $self->lims->aggregate_libraries( + \@lane_lims, $self->process_separately_lanes); @data_lims = @{$all_lims->{'singles'}}; # Might be empty. # merge_lanes option implies a merge across all lanes. @@ -425,18 +454,27 @@ sub _lims_object2product { sub _check_lane_merge_is_viable { my ($self, $lane_lims, $singles, $merges) = @_; + my %no_merge_lanes = map { $_ => 1 } @{$self->process_separately_lanes}; my @num_plexes = uniq - map { scalar @{$_} } - map { [grep { !$_->is_control } @{$_}] } - map { [$_->children()] } @{$lane_lims}; + map { scalar @{$_} } + map { [grep { !$_->is_control } @{$_}] } + map { [$_->children()] } + grep { ! exists $no_merge_lanes{$_->position} } + @{$lane_lims}; my $m = 'merge_lane option is not viable: '; if (@num_plexes > 1) { $self->logcroak($m . 'different number of samples in lanes'); } - if (any { !$_->is_control } @{$singles}) { - $self->logcroak($m . 'unmerged samples are present after aggregation'); + + my @unmerged_unexpected = grep { ! exists $no_merge_lanes{$_->position} } + grep { !$_->is_control } + @{$singles}; + if (@unmerged_unexpected) { + $self->logcroak( + $m . 'unexpected unmerged samples are present after aggregation'); } + if (@{$merges} != $num_plexes[0]) { $self->logcroak($m . 'number of merged samples after aggregation ' . 'differs from the number of samples in a lane'); diff --git a/t/10-base.t b/t/10-base.t index 318d255e..5039459d 100644 --- a/t/10-base.t +++ b/t/10-base.t @@ -64,7 +64,7 @@ subtest 'repository preexec' => sub { }; subtest 'products - merging (or not) lanes' => sub { - plan tests => 19; + plan tests => 22; my $rf_path = q[t/data/novaseqx/20231017_LH00210_0012_B22FCNFLT3]; my $b = npg_pipeline::base->new(runfolder_path => $rf_path, id_run => 47995); @@ -79,12 +79,23 @@ subtest 'products - merging (or not) lanes' => sub { cp 't/data/novaseq/210111_A00513_0447_AHJ55JDSXY/RunInfo.xml', "$rf_path/RunInfo.xml"; $b = npg_pipeline::base->new(runfolder_path => $rf_path, id_run => 999); ok ($b->merge_lanes, 'merge_lanes flag is set'); + ok (!$b->_selected_lanes, 'selected_lanes flag is not set'); lives_ok {$products = $b->products} 'products hash created for NovaSeq run'; ok (exists $products->{'lanes'}, 'products lanes key exists'); is (scalar @{$products->{'lanes'}}, 4, 'four lane product'); ok (exists $products->{'data_products'}, 'products data_products key exists'); is (scalar @{$products->{'data_products'}}, 29, '29 data products'); + $b = npg_pipeline::base->new( + runfolder_path => $rf_path, + id_run => 999, + merge_lanes => 1, + process_separately_lanes => [2] + ); + # 8 products out of previous 29 are tag zero and spiked phiX + is (scalar @{$b->products->{'data_products'}}, 50, '50 data products'); + ok ($b->_selected_lanes, 'selected_lanes flag is set'); + local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/products/samplesheet_rapidrun_nopool.csv'; cp 't/data/run_params/runParameters.hiseq.rr.xml', "$rf_path/runParameters.xml"; cp 't/data/run_params/RunInfo.hiseq.rr.xml', "$rf_path/RunInfo.xml"; @@ -109,7 +120,7 @@ subtest 'products - merging (or not) lanes' => sub { }; subtest 'products - merging (or not) libraries' => sub { - plan tests => 418; + plan tests => 423; my $rf_info = $util->create_runfolder(); my $rf_path = $rf_info->{'runfolder_path'}; @@ -173,6 +184,22 @@ subtest 'products - merging (or not) libraries' => sub { ok ($p->selected_lanes, 'selected_lanes flag is set to true'); } + $b = npg_pipeline::base->new( + runfolder_path => $rf_path, + id_run => $id_run, + process_separately_lanes => [1,2,5,6] + ); + @products = @{$b->products()->{'data_products'}}; + is (@products, 142, 'number of data products is 142'); + + $b = npg_pipeline::base->new( + runfolder_path => $rf_path, + id_run => $id_run, + process_separately_lanes => [1,6] + ); + @products = @{$b->products()->{'data_products'}}; + is (@products, 142, 'number of data products is 142'); + # Expect lanes 3 and 4 merged. $b = npg_pipeline::base->new( runfolder_path => $rf_path, id_run => $id_run, lanes => [4,8,3]); @@ -236,6 +263,28 @@ subtest 'products - merging (or not) libraries' => sub { } } is (@products, 0, 'no products are left'); + + # remove lane 3 from the merge - no merge will take place + $b = npg_pipeline::base->new( + runfolder_path => $rf_path, + id_run => $id_run, + lanes => [4,8,3], + merge_by_library => 1, + process_separately_lanes => [3] + ); + @products = @{$b->products()->{'data_products'}}; + is (@products, 64, 'number of data products is 64'); + + $b = npg_pipeline::base->new( + runfolder_path => $rf_path, + id_run => $id_run, + lanes => [4,8,3], + merge_by_library => 0, + process_separately_lanes => [3,8] + ); + lives_ok { @products = @{$b->products()->{'data_products'}} } + 'process_separately_lanes is compatible with suppressed merge'; + is (@products, 64, 'number of data products is 64'); }; sub _generate_rpt {