Skip to content

Commit

Permalink
Use one aggregation method for merges.
Browse files Browse the repository at this point in the history
Use st::api::lims->aggregate_libraries() method for
both 'merge_lanes' and 'merge_by_library' pipeline options.
  • Loading branch information
mgcam committed Jan 5, 2024
1 parent 0521f96 commit 1bde389
Show file tree
Hide file tree
Showing 76 changed files with 149 additions and 139 deletions.
114 changes: 44 additions & 70 deletions MANIFEST

Large diffs are not rendered by default.

97 changes: 61 additions & 36 deletions lib/npg_pipeline/base.pm
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use namespace::autoclean;
use MooseX::Getopt::Meta::Attribute::Trait::NoGetopt;
use POSIX qw(strftime);
use Math::Random::Secure qw{irand};
use List::MoreUtils qw{any};
use List::MoreUtils qw{any uniq};
use File::Basename;
use Readonly;

Expand Down Expand Up @@ -358,49 +358,51 @@ sub _build_products {
if ($self->has_product_rpt_list) {
@data_lims = ($self->lims);
} else {
my @positions = $self->positions;
@lane_lims = map { $self->lims4lane($_) } @positions;
@lane_lims = map { $self->lims4lane($_) } $self->positions;

if ($self->merge_lanes) {
@data_lims = $self->lims->aggregate_xlanes(@positions);
} else {
my %tag0_lims = ();
if ($self->is_indexed) {
%tag0_lims = map { $_->position => $_->create_tag_zero_object() }
grep { $_->is_pool } @lane_lims;
}

if ($self->merge_lanes || $self->merge_by_library) {

my $all_lims = $self->lims->aggregate_libraries(\@lane_lims);
@data_lims = @{$all_lims->{'singles'}}; # Might be empty.

my %tag0_lims = ();
if ($self->is_indexed) {
%tag0_lims = map { $_->position => $_->create_tag_zero_object() }
grep { $_->is_pool } @lane_lims;
# merge_lanes option implies a merge across all lanes.
if ($self->merge_lanes && (@lane_lims > 1)) {
$self->_check_lane_merge_is_viable(
\@lane_lims, $all_lims->{'singles'}, $all_lims->{'merges'});
}

if ($self->merge_by_library) {
my $all_lims = $self->lims->aggregate_libraries(\@lane_lims);
@data_lims = @{$all_lims->{'singles'}}; # Might be empty.
# Tag zero LIMS objects for all lanes, merged or unmerged.
push @data_lims, map { $tag0_lims{$_} } (sort keys %tag0_lims);

if ( @{$all_lims->{'merges'}} ) {
# If the libraries are merged across a subset of lanes under analysis,
# the 'selected_lanes' flag needs to be flipped to true.
if (!$self->_selected_lanes) {
my $rpt_list = $all_lims->{'merges'}->[0]->rpt_list;;
my $num_components =
npg_tracking::glossary::composition::factory::rpt_list
->new(rpt_list => $rpt_list)
->create_composition()->num_components();
if ($num_components != scalar @lane_lims) {
$self->_set_selected_lanes(1);
}
# Tag zero LIMS objects for all pooled lanes, merged or unmerged.
push @data_lims, map { $tag0_lims{$_} } (sort keys %tag0_lims);

if ( @{$all_lims->{'merges'}} ) {
# If the libraries are merged across a subset of lanes under analysis,
# the 'selected_lanes' flag needs to be flipped to true.
if (!$self->_selected_lanes) {
my $rpt_list = $all_lims->{'merges'}->[0]->rpt_list;;
my $num_components =
npg_tracking::glossary::composition::factory::rpt_list
->new(rpt_list => $rpt_list)
->create_composition()->num_components();
if ($num_components != scalar @lane_lims) {
$self->_set_selected_lanes(1);
}
push @data_lims, @{$all_lims->{'merges'}};
}
push @data_lims, @{$all_lims->{'merges'}};
}

} else {
# To keep backward-compatible order of pipeline invocations, add
# tag zero LIMS object at the end of other objects for the lane.
@data_lims = map {
exists $tag0_lims{$_->position} ?
} else {
# To keep backward-compatible order of pipeline invocations, add
# tag zero LIMS object at the end of other objects for the lane.
@data_lims = map {
exists $tag0_lims{$_->position} ?
($_->children, $tag0_lims{$_->position}) : $_
} @lane_lims;
}
} @lane_lims;
}
}

Expand Down Expand Up @@ -442,6 +444,29 @@ sub _lims_object2product {
);
}

sub _check_lane_merge_is_viable {
my ($self, $lane_lims, $singles, $merges) = @_;

my @num_plexes = uniq
map { scalar @{$_} }
map { [grep { !$_->is_control } @{$_}] }
map { [$_->children()] } @{$lane_lims};

my $m = 'merge_lane option is not viable: ';
if (@num_plexes > 1) {
$self->logcroak($m . 'different number of samples in lanes');
}
if (any { !$_->is_control } @{$singles}) {
$self->logcroak($m . 'unmerged samples are present after aggregation');
}
if (@{$merges} != $num_plexes[0]) {
$self->logcroak($m . 'number of merged samples after aggregation ' .
'differs from the number of samples in a lane');
}

return 1;
}

__PACKAGE__->meta->make_immutable;

1;
Expand Down
3 changes: 2 additions & 1 deletion t/10-base.t
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,14 @@ subtest 'products - merging (or not) lanes' => sub {

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/products/samplesheet_novaseq4lanes.csv';
cp 't/data/run_params/runParameters.novaseq.xml', "$rf_path/runParameters.xml";
cp 't/data/novaseq/210111_A00513_0447_AHJ55JDSXY/RunInfo.xml', "$rf_path/RunInfo.xml";
$b = npg_pipeline::base->new(runfolder_path => $rf_path, id_run => 999);
ok ($b->merge_lanes, 'merge_lanes flag is set');
lives_ok {$products = $b->products} 'products hash created for NovaSeq run';
ok (exists $products->{'lanes'}, 'products lanes key exists');
is (scalar @{$products->{'lanes'}}, 4, 'four lane product');
ok (exists $products->{'data_products'}, 'products data_products key exists');
is (scalar @{$products->{'data_products'}}, 23, '23 data products');
is (scalar @{$products->{'data_products'}}, 29, '29 data products');

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/products/samplesheet_rapidrun_nopool.csv';
cp 't/data/run_params/runParameters.hiseq.rr.xml', "$rf_path/runParameters.xml";
Expand Down
24 changes: 15 additions & 9 deletions t/10-runfolder_scaffold.t
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,13 @@ subtest 'top level scaffold' => sub {
};

subtest 'product level scaffold, NovaSeq all lanes' => sub {
plan tests => 99;
plan tests => 101;

my $util = t::util->new();
my $rfh = $util->create_runfolder();
my $rf_path = $rfh->{'runfolder_path'};
fcopy 't/data/run_params/runParameters.novaseq.xml', "$rf_path/runParameters.xml";
fcopy 't/data/run_params/runParameters.novaseq.xml', "$rf_path/runParameters.xml";
fcopy 't/data/novaseq/210111_A00513_0447_AHJ55JDSXY/RunInfo.xml', "$rf_path/RunInfo.xml";
local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/products/samplesheet_novaseq4lanes.csv';

my $rfs = Moose::Meta::Class->create_anon_class(
Expand All @@ -95,6 +96,8 @@ subtest 'product level scaffold, NovaSeq all lanes' => sub {
push @dirs, (map {join q[/], $_, 'qc'} @original);
push @dirs, (map {join q[/], $_, 'tileviz_'.$_} @original);
push @dirs, (map {join q[/], $_, '.npg_cache_10000'} @original);
push @dirs, (map {join q[/], $_, 'plex0/qc'} @original);
push @dirs, (map {join q[/], $_, 'plex888/qc'} @original);
map { ok (-d $_, "$_ created") } map {join q[/], $apath, $_} @dirs;

for my $lane (@original) {
Expand All @@ -106,7 +109,7 @@ subtest 'product level scaffold, NovaSeq all lanes' => sub {
like ($content, qr/No tileviz data available for this lane/, 'info exists');
}

@original = map {'plex' . $_} (0 .. 21, 888);
@original = map {'plex' . $_} (1 .. 21);
@dirs = @original;
push @dirs, (map {join q[/], $_, 'qc'} @original);
push @dirs, (map {join q[/], $_, '.npg_cache_10000'} @original);
Expand All @@ -115,16 +118,17 @@ subtest 'product level scaffold, NovaSeq all lanes' => sub {
my $tileviz_index = join q[/], $apath, 'tileviz', 'index.html';
ok (-e $tileviz_index, 'tileviz index created');
my @lines = read_file($tileviz_index);
is (scalar @lines, 7, 'tileviz index contains sevel lines');
is (scalar @lines, 7, 'tileviz index contains seven lines');
};

subtest 'product level scaffold, NovaSeq selected lanes' => sub {
plan tests => 175;
plan tests => 165;

my $util = t::util->new();
my $rfh = $util->create_runfolder();
my $rf_path = $rfh->{'runfolder_path'};
fcopy 't/data/run_params/runParameters.novaseq.xml', "$rf_path/runParameters.xml";
fcopy 't/data/run_params/runParameters.novaseq.xml', "$rf_path/runParameters.xml";
fcopy 't/data/novaseq/210111_A00513_0447_AHJ55JDSXY/RunInfo.xml', "$rf_path/RunInfo.xml";
local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/products/samplesheet_novaseq4lanes.csv';

my $rfs = Moose::Meta::Class->create_anon_class(
Expand All @@ -149,12 +153,14 @@ subtest 'product level scaffold, NovaSeq selected lanes' => sub {
push @dirs, (map {join q[/], $_, 'qc'} @original);
push @dirs, (map {join q[/], $_, 'tileviz_'.$_} @original);
push @dirs, (map {join q[/], $_, '.npg_cache_10000'} @original);
push @dirs, (map {join q[/], $_, 'plex0/qc'} @original);
push @dirs, (map {join q[/], $_, 'plex888/qc'} @original);
map { ok (-d $_, "$_ created") } map {join q[/], $apath, $_} @dirs;

@dirs = qw/lane1 lane4/;
map { ok (!-e $_, "$_ not created") } map {join q[/], $apath, $_} @dirs;

@original = map {'lane2-3/plex' . $_} (0 .. 21, 888);
@original = map {'lane2-3/plex' . $_} (1 .. 21);
@dirs = @original;
push @dirs, (map {join q[/], $_, 'qc'} @original);
push @dirs, (map {join q[/], $_, '.npg_cache_10000'} @original);
Expand All @@ -172,7 +178,7 @@ subtest 'product level scaffold, NovaSeq selected lanes' => sub {
"link for lane $l file is not created");
}

for my $t ( (0 .. 21, 888) ) {
for my $t ( (1 .. 21) ) {
my $name = "999_2-3#${t}.cram";
my $file = "$napath/lane2-3/plex${t}/stage1/$name";
ok ((-l $file), "link for plex $t is created");
Expand Down Expand Up @@ -252,4 +258,4 @@ subtest 'product level scaffold, library merge for NovaSeqX' => sub {
}
};

1;
1;
Loading

0 comments on commit 1bde389

Please sign in to comment.