From 60c49e89e1ccf4d6938142de9c1f1bbcc0e70c2b Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Tue, 9 Apr 2024 14:18:05 +0100 Subject: [PATCH 1/2] Added an option to exclude lanes from a merge. --- lib/st/api/lims.pm | 41 +++++++++++++++++++++++++++++++++++------ t/40-st-lims-merge.t | 40 +++++++++++++++++++++++++++++++++++----- 2 files changed, 70 insertions(+), 11 deletions(-) diff --git a/lib/st/api/lims.pm b/lib/st/api/lims.pm index 12c77247..1c0f7084 100644 --- a/lib/st/api/lims.pm +++ b/lib/st/api/lims.pm @@ -824,14 +824,15 @@ sub is_composition { Given a list of lane-level C objects, finds their children, which can be merged and analysed together across all or some of the lanes. If children are not present, considers lane-level object as a single -library. +library. The second optional argument is a list of lanes (positions) that +should be excluded from the merge. All argument lane objects should belong to the same run. It is assumed that they all use the same C driver type. Returns two lists of objects, one for merged entities and one for singletons, -which are wrapped into a dictionary. Either of these lists can be empty. Both -of the lists are guaranteed not to be empty at the same time. +which are wrapped into a dictionary. Either of these lists can be empty. The +two lists are guaranteed not to be empty at the same time. Tag zero objects are neither added nor explicitly removed. Objects for spiked-in controls (if present) are always added to the list of singletons. @@ -841,11 +842,13 @@ set of input lane-level C objects the same lists are always returned. Criteria for entities to be eligible for a merge: + they are not controls, they belong to the same library, they share the same tag index, they belong to different lanes, one per lane, - they belong to the same study. + they belong to the same study, + they do not belong to a list of excluded lanes. This method can be used both as instance and as a class method. @@ -857,20 +860,31 @@ This method can be used both as instance and as a class method. print 'Merged entity ' . $l->to_string; } + # Exclude lanes 2 and 3 from the merge. Entities belonging to this lane + # wil appear unther the 'singles' key. + $all_lims = st::api::lims->aggregate_libraries($run_lims->children(), [2,3]); + for my $l (@{$all_lims->{'singles'}}) { + print 'No merge for ' . $l->to_string; + } + =cut sub aggregate_libraries { - my ($self, $lane_lims_array) = @_; + my ($self, $lane_lims_array, $do_not_merge_lanes) = @_; # This restriction might be lifted in future. _check_value_is_unique('id_run', 'run IDs', $lane_lims_array); + $do_not_merge_lanes ||= []; + my @lanes_to_exclude_from_merge = @{$do_not_merge_lanes}; + _validate_lane_numbers(@lanes_to_exclude_from_merge); my $lims_objects_by_library = {}; my @singles = (); my @all_single_lims_objs = map { $_->is_pool ? $_->children() : $_ } @{$lane_lims_array}; foreach my $obj (@all_single_lims_objs) { - if ($obj->is_control()) { + if ($obj->is_control() || + any { $obj->position == $_ } @lanes_to_exclude_from_merge) { push @singles, $obj; } else { push @{$lims_objects_by_library->{_hash_key4lib_aggregation($obj)}}, $obj; @@ -995,6 +1009,21 @@ sub _check_value_is_unique { return; } +sub _validate_lane_numbers { + my @lanes_to_exclude_from_merge = @_; + + if (@lanes_to_exclude_from_merge) { + my @temp = grep { $_ > 0 } map { int } @lanes_to_exclude_from_merge; + my $exclude_string = join q[, ], @lanes_to_exclude_from_merge; + if ( (@temp < @lanes_to_exclude_from_merge) || + ($exclude_string ne join q[, ], @temp) ) { + croak "Invalid lane numbers in list of lanes to exclude from the merge:\n" . + $exclude_string; + } + } + return; +} + =head2 create_tag_zero_object Using run ID and position values of this object, creates and returns diff --git a/t/40-st-lims-merge.t b/t/40-st-lims-merge.t index 085c0efc..8193a499 100644 --- a/t/40-st-lims-merge.t +++ b/t/40-st-lims-merge.t @@ -2,7 +2,7 @@ use strict; use warnings; use Test::More tests => 10; use Test::Exception; -use List::MoreUtils qw/all none/; +use List::MoreUtils qw/all none uniq/; use File::Slurp; use File::Temp qw/tempdir/; use Moose::Meta::Class; @@ -130,7 +130,7 @@ subtest 'Create tag zero object' => sub { }; subtest 'Error conditions in aggregation by library' => sub { - plan tests => 3; + plan tests => 7; local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/test40_lims/samplesheet_novaseq4lanes.csv'; @@ -160,6 +160,17 @@ subtest 'Error conditions in aggregation by library' => sub { throws_ok { st::api::lims->aggregate_libraries(\@lane_lims) } qr/Multiple studies in a potential merge by library/, 'can only merge libraries that belong to the same study'; + + my $emassage = + "Invalid lane numbers in list of lanes to exclude from the merge:"; + throws_ok { st::api::lims->aggregate_libraries(\@lane_lims, [qw/foo 3/]) } + qr/$emassage\sfoo, 3/, 'lane number cannot be a string'; + throws_ok { st::api::lims->aggregate_libraries(\@lane_lims, [1.1, 2]) } + qr/$emassage\s1.1, 2/, 'lane number cannot be a float'; + throws_ok { st::api::lims->aggregate_libraries(\@lane_lims, [-3]) } + qr/$emassage\s-3/, 'lane number cannot be a negative integer'; + throws_ok { st::api::lims->aggregate_libraries(\@lane_lims, [0]) } + qr/$emassage\s0/, 'lane number cannot be zero'; }; subtest 'Allow duplicate libraries with different tag indexes' => sub { @@ -200,7 +211,7 @@ subtest 'Allow duplicate libraries with different tag indexes' => sub { }; subtest 'Aggregation by library for a NovaSeq standard flowcell' => sub { - plan tests => 101; + plan tests => 107; local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/test40_lims/samplesheet_novaseq4lanes.csv'; @@ -217,7 +228,7 @@ subtest 'Aggregation by library for a NovaSeq standard flowcell' => sub { } is (keys %{$lims}, 2, 'no unexpected keys'); is (@{$lims->{'singles'}}, 4, 'list of singles contains 4 objects'); - is (@{$lims->{'merges'}}, 21, 'list of merges contains 21 objects'); + is (@{$lims->{'merges'}}, 21, 'list of merges contains 21 object'); ok ( (all { $_->is_control } @{$lims->{'singles'}}), 'all singles are spiked-in controls'); @@ -244,8 +255,27 @@ subtest 'Aggregation by library for a NovaSeq standard flowcell' => sub { _compare_properties([$lims->{'merges'}->[0], $lims->{'merges'}->[20]]); + # Exclude lanes 1 and 3 from the merge + $lims = st::api::lims->aggregate_libraries(\@lane_lims, [1,3]); + is (@{$lims->{'singles'}}, 46, 'list of singles contains 46 objects'); + is (@{$lims->{'merges'}}, 21, 'list of merges contains 21 object'); + my @positions = uniq sort map { $_->position } + grep { ! $_->is_control } + @{$lims->{'singles'}}; + is (join(q[,], @positions), '1,3', 'lanes 1 and 3 are not merged'); + @expected_rpt_lists = _generate_rpt_lists($id_run, [2, 4], [(1 .. 21)]); + @rpt_lists = map { $_->rpt_list } @{$lims->{'merges'}}; + is_deeply (\@rpt_lists, \@expected_rpt_lists, 'merges list - correct object'); + + # Exclude lanes 1 and 5 from the merge + lives_ok { $lims = st::api::lims->aggregate_libraries(\@lane_lims, [1,5]) } + 'asking to exclude a lane for which there is no data is not an error'; + @rpt_lists = map { $_->rpt_list } @{$lims->{'merges'}}; + @expected_rpt_lists = _generate_rpt_lists($id_run, [2, 3, 4], [(1 .. 21)]); + is_deeply (\@rpt_lists, \@expected_rpt_lists, 'merges list - correct object'); + # Select two lanes out of four. - $lims = st::api::lims->aggregate_libraries([$lane_lims[0], $lane_lims[2]]); + $lims = st::api::lims->aggregate_libraries([$lane_lims[0], $lane_lims[2]], []); is (@{$lims->{'singles'}}, 2, 'list of singles contains 2 objects'); is (@{$lims->{'merges'}}, 21, 'list of merges contains 21 objects'); @rpt_lists = map { $_->rpt_list } @{$lims->{'merges'}}; From 0b7eb9705d0f8f3a176e65db09474418265c6563 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Fri, 19 Apr 2024 11:27:07 +0100 Subject: [PATCH 2/2] Fixed typos --- lib/st/api/lims.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/st/api/lims.pm b/lib/st/api/lims.pm index 1c0f7084..b169e783 100644 --- a/lib/st/api/lims.pm +++ b/lib/st/api/lims.pm @@ -861,7 +861,7 @@ This method can be used both as instance and as a class method. } # Exclude lanes 2 and 3 from the merge. Entities belonging to this lane - # wil appear unther the 'singles' key. + # will appear under the 'singles' key. $all_lims = st::api::lims->aggregate_libraries($run_lims->children(), [2,3]); for my $l (@{$all_lims->{'singles'}}) { print 'No merge for ' . $l->to_string;