Skip to content

Commit

Permalink
Merge pull request #819 from mgcam/merge_exclude
Browse files Browse the repository at this point in the history
Added an option to exclude lanes from a merge.
  • Loading branch information
dkj authored Apr 19, 2024
2 parents ec78f7d + 0b7eb97 commit 0a8d6f3
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 11 deletions.
41 changes: 35 additions & 6 deletions lib/st/api/lims.pm
Original file line number Diff line number Diff line change
Expand Up @@ -824,14 +824,15 @@ sub is_composition {
Given a list of lane-level C<st::api::lims> objects, finds their children,
which can be merged and analysed together across all or some of the lanes.
If children are not present, considers lane-level object as a single
library.
library. The second optional argument is a list of lanes (positions) that
should be excluded from the merge.
All argument lane objects should belong to the same run. It is assumed that
they all use the same C<st::api::lims> driver type.
Returns two lists of objects, one for merged entities and one for singletons,
which are wrapped into a dictionary. Either of these lists can be empty. Both
of the lists are guaranteed not to be empty at the same time.
which are wrapped into a dictionary. Either of these lists can be empty. The
two lists are guaranteed not to be empty at the same time.
Tag zero objects are neither added nor explicitly removed. Objects for
spiked-in controls (if present) are always added to the list of singletons.
Expand All @@ -841,11 +842,13 @@ set of input lane-level C<st::api::lims> objects the same lists are always
returned.
Criteria for entities to be eligible for a merge:
they are not controls,
they belong to the same library,
they share the same tag index,
they belong to different lanes, one per lane,
they belong to the same study.
they belong to the same study,
they do not belong to a list of excluded lanes.
This method can be used both as instance and as a class method.
Expand All @@ -857,20 +860,31 @@ This method can be used both as instance and as a class method.
print 'Merged entity ' . $l->to_string;
}
# Exclude lanes 2 and 3 from the merge. Entities belonging to this lane
# will appear under the 'singles' key.
$all_lims = st::api::lims->aggregate_libraries($run_lims->children(), [2,3]);
for my $l (@{$all_lims->{'singles'}}) {
print 'No merge for ' . $l->to_string;
}
=cut

sub aggregate_libraries {
my ($self, $lane_lims_array) = @_;
my ($self, $lane_lims_array, $do_not_merge_lanes) = @_;

# This restriction might be lifted in future.
_check_value_is_unique('id_run', 'run IDs', $lane_lims_array);
$do_not_merge_lanes ||= [];
my @lanes_to_exclude_from_merge = @{$do_not_merge_lanes};
_validate_lane_numbers(@lanes_to_exclude_from_merge);

my $lims_objects_by_library = {};
my @singles = ();
my @all_single_lims_objs = map { $_->is_pool ? $_->children() : $_ }
@{$lane_lims_array};
foreach my $obj (@all_single_lims_objs) {
if ($obj->is_control()) {
if ($obj->is_control() ||
any { $obj->position == $_ } @lanes_to_exclude_from_merge) {
push @singles, $obj;
} else {
push @{$lims_objects_by_library->{_hash_key4lib_aggregation($obj)}}, $obj;
Expand Down Expand Up @@ -995,6 +1009,21 @@ sub _check_value_is_unique {
return;
}

sub _validate_lane_numbers {
my @lanes_to_exclude_from_merge = @_;

if (@lanes_to_exclude_from_merge) {
my @temp = grep { $_ > 0 } map { int } @lanes_to_exclude_from_merge;
my $exclude_string = join q[, ], @lanes_to_exclude_from_merge;
if ( (@temp < @lanes_to_exclude_from_merge) ||
($exclude_string ne join q[, ], @temp) ) {
croak "Invalid lane numbers in list of lanes to exclude from the merge:\n" .
$exclude_string;
}
}
return;
}

=head2 create_tag_zero_object
Using run ID and position values of this object, creates and returns
Expand Down
40 changes: 35 additions & 5 deletions t/40-st-lims-merge.t
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use strict;
use warnings;
use Test::More tests => 10;
use Test::Exception;
use List::MoreUtils qw/all none/;
use List::MoreUtils qw/all none uniq/;
use File::Slurp;
use File::Temp qw/tempdir/;
use Moose::Meta::Class;
Expand Down Expand Up @@ -130,7 +130,7 @@ subtest 'Create tag zero object' => sub {
};

subtest 'Error conditions in aggregation by library' => sub {
plan tests => 3;
plan tests => 7;

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} =
't/data/test40_lims/samplesheet_novaseq4lanes.csv';
Expand Down Expand Up @@ -160,6 +160,17 @@ subtest 'Error conditions in aggregation by library' => sub {
throws_ok { st::api::lims->aggregate_libraries(\@lane_lims) }
qr/Multiple studies in a potential merge by library/,
'can only merge libraries that belong to the same study';

my $emassage =
"Invalid lane numbers in list of lanes to exclude from the merge:";
throws_ok { st::api::lims->aggregate_libraries(\@lane_lims, [qw/foo 3/]) }
qr/$emassage\sfoo, 3/, 'lane number cannot be a string';
throws_ok { st::api::lims->aggregate_libraries(\@lane_lims, [1.1, 2]) }
qr/$emassage\s1.1, 2/, 'lane number cannot be a float';
throws_ok { st::api::lims->aggregate_libraries(\@lane_lims, [-3]) }
qr/$emassage\s-3/, 'lane number cannot be a negative integer';
throws_ok { st::api::lims->aggregate_libraries(\@lane_lims, [0]) }
qr/$emassage\s0/, 'lane number cannot be zero';
};

subtest 'Allow duplicate libraries with different tag indexes' => sub {
Expand Down Expand Up @@ -200,7 +211,7 @@ subtest 'Allow duplicate libraries with different tag indexes' => sub {
};

subtest 'Aggregation by library for a NovaSeq standard flowcell' => sub {
plan tests => 101;
plan tests => 107;

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} =
't/data/test40_lims/samplesheet_novaseq4lanes.csv';
Expand All @@ -217,7 +228,7 @@ subtest 'Aggregation by library for a NovaSeq standard flowcell' => sub {
}
is (keys %{$lims}, 2, 'no unexpected keys');
is (@{$lims->{'singles'}}, 4, 'list of singles contains 4 objects');
is (@{$lims->{'merges'}}, 21, 'list of merges contains 21 objects');
is (@{$lims->{'merges'}}, 21, 'list of merges contains 21 object');

ok ( (all { $_->is_control } @{$lims->{'singles'}}),
'all singles are spiked-in controls');
Expand All @@ -244,8 +255,27 @@ subtest 'Aggregation by library for a NovaSeq standard flowcell' => sub {

_compare_properties([$lims->{'merges'}->[0], $lims->{'merges'}->[20]]);

# Exclude lanes 1 and 3 from the merge
$lims = st::api::lims->aggregate_libraries(\@lane_lims, [1,3]);
is (@{$lims->{'singles'}}, 46, 'list of singles contains 46 objects');
is (@{$lims->{'merges'}}, 21, 'list of merges contains 21 object');
my @positions = uniq sort map { $_->position }
grep { ! $_->is_control }
@{$lims->{'singles'}};
is (join(q[,], @positions), '1,3', 'lanes 1 and 3 are not merged');
@expected_rpt_lists = _generate_rpt_lists($id_run, [2, 4], [(1 .. 21)]);
@rpt_lists = map { $_->rpt_list } @{$lims->{'merges'}};
is_deeply (\@rpt_lists, \@expected_rpt_lists, 'merges list - correct object');

# Exclude lanes 1 and 5 from the merge
lives_ok { $lims = st::api::lims->aggregate_libraries(\@lane_lims, [1,5]) }
'asking to exclude a lane for which there is no data is not an error';
@rpt_lists = map { $_->rpt_list } @{$lims->{'merges'}};
@expected_rpt_lists = _generate_rpt_lists($id_run, [2, 3, 4], [(1 .. 21)]);
is_deeply (\@rpt_lists, \@expected_rpt_lists, 'merges list - correct object');

# Select two lanes out of four.
$lims = st::api::lims->aggregate_libraries([$lane_lims[0], $lane_lims[2]]);
$lims = st::api::lims->aggregate_libraries([$lane_lims[0], $lane_lims[2]], []);
is (@{$lims->{'singles'}}, 2, 'list of singles contains 2 objects');
is (@{$lims->{'merges'}}, 21, 'list of merges contains 21 objects');
@rpt_lists = map { $_->rpt_list } @{$lims->{'merges'}};
Expand Down

0 comments on commit 0a8d6f3

Please sign in to comment.