Skip to content

Commit

Permalink
Merge pull request #799 from mgcam/drop_aggregation_by_lane
Browse files Browse the repository at this point in the history
Deleted aggregate_xlanes method from st::api::lims
  • Loading branch information
dkj authored Feb 7, 2024
2 parents 8e26af0 + 8282916 commit d2d4fad
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 315 deletions.
137 changes: 2 additions & 135 deletions lib/st/api/lims.pm
Original file line number Diff line number Diff line change
Expand Up @@ -819,140 +819,6 @@ sub is_composition {
return $self->rpt_list ? 1 : 0;
}

=head2 aggregate_xlanes
For a run-level st::api::lims object returns a list of st::api::lims
objects representing aggregated entities.
Aggregation is performed across all lanes of the lims object, unless
an explicit list of positions is gived as an argument.
If all lanes are pools, agreggation is performed per tag index (plex)
and the list contains one or more objects, each one representing a tag
index (plex). An entry for tag index zero is added as well.
If lanes are libraries, aggregation of these libraries
is performed and the list contains one object.
It is possible to aggregate one lane, though practically it does not
make much sense.
List members represent compositions and have rpt_list attribute set.
my $l = st::api::lims->new(id_run => 44);
my $a = $l->aggregate_xlanes();
my $a = $l->aggregate_xlanes(qw/2 3/);
Assuming run id 44, for two lanes representing the same pool of four tag
indexes (1, 2, 3, 4), the list members will have the following values
of the rpt_list attribute:
44:1:0;44:2:0
44:1:1;44:2:1
44:1:2;44:2:2
44:1:3;44:2:3
44:1:4;44:2:4
The new objects has the same driver settings as the original object.
=cut

sub aggregate_xlanes {
my ($self, @positions) = @_;

if ($self->is_composition || $self->position) {
croak 'Not run-level object';
}

my $lanes_ia = $self->children_ia;

#####
# If a list of positions is given, restrict the operation to
# this set of positions.
#
if (@positions) {
my $reduced = {};
foreach my $p (@positions) {
if (!exists $lanes_ia->{$p}) {
croak sprintf 'Requested position %i does not exists in %s',
$p,
$self->to_string();
}
$reduced->{$p} = $lanes_ia->{$p};
}
$lanes_ia = $reduced;
}

my @lanes = sort { $a->position <=> $b->position } values %{$lanes_ia};
@positions = keys %{$lanes_ia};

#####
# We cannot have a mixture of pools and libraries.
#
my @pools = grep {$_} map { $_->is_pool ? 1 : 0 } @lanes;
if (@pools != 0 && @pools != @lanes) {
croak sprintf 'Both pools and libraries in lanes %s in %s',
join(q[, ], @positions),
$self->to_string();
}

#####
# Test function. Certain attrubutes should be the same
# across all objects of the lims array (first arg.).
#
my $can_merge = sub {
my ($lims, @attrs) = @_;
for my $attr_name (@attrs) {
my @values = grep { defined $_ } map { $_->$attr_name } @{$lims};
if (@values != @{$lims}) {
croak qq[$attr_name is not defined for one of lims objects];
}
@values = uniq @values;
if (@values != 1) {
croak qq[$attr_name is not the same across lims objects list];
}
}
return;
}; # End of test function

my $init = $self->copy_init_args();
delete $init->{'id_run'};

my $lims4compisitions = {};
my @test_attrs = qw/sample_id library_id/;
my $lanes_rpt_list = npg_tracking::glossary::rpt->deflate_rpts(\@lanes);
my @aggregated = ();

if (!@pools) {
$can_merge->(\@lanes, @test_attrs); # Test consistency
push @aggregated, __PACKAGE__->new(%{$init}, rpt_list => $lanes_rpt_list);
} else {
my @sizes = uniq (map { $_->num_children } @lanes);
if (@sizes != 1) { # Test consistency
croak 'Different number of plexes in lanes';
}

#####
# The each_arrayref function is given a list of arrays of plex-level st::api::lims
# objects, each array represent all plexes in a lane. The arrays of plexes are ordered
# by tag index. The each_arrayref function returns an iterator, which on each invocation
# collates and returns a list of first, second, etc, array members in the first, second,
# etc, invocation respectively.
#
my $ea = each_arrayref map { [$_->children()] } @lanes;
while ( my @plexes = $ea->() ) {
$can_merge->(\@plexes, @test_attrs, 'tag_index'); # Test consistency
push @aggregated, __PACKAGE__->new(%{$init},
rpt_list => npg_tracking::glossary::rpt->deflate_rpts(\@plexes));
}
# Add object for tag zero
push @aggregated, __PACKAGE__->new(%{$init},
rpt_list => npg_tracking::glossary::rpt->tag_zero_rpt_list($lanes_rpt_list));
}

return @aggregated;
}

=head2 aggregate_libraries
Given a list of lane-level C<st::api::lims> objects, finds their children,
Expand Down Expand Up @@ -1409,7 +1275,8 @@ Marina Gourtovaia E<lt>[email protected]<gt>
=head1 LICENSE AND COPYRIGHT
Copyright (C) 2013,2014,2015,2016,2017,2018,2019,2020,2021,2023 Genome Research Ltd.
Copyright (C) 2013,2014,2015,2016,2017,2018,2019,2020,2021,2023,2024
Genome Research Ltd.
This file is part of NPG.
Expand Down
181 changes: 1 addition & 180 deletions t/40-st-lims-merge.t
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use strict;
use warnings;
use Test::More tests => 11;
use Test::More tests => 9;
use Test::Exception;
use List::MoreUtils qw/all none/;
use File::Slurp;
Expand Down Expand Up @@ -129,157 +129,6 @@ subtest 'Create tag zero object' => sub {
}
};

subtest 'Aggregation across lanes for pools' => sub {
plan tests => 89;

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} =
't/data/test40_lims/samplesheet_novaseq4lanes.csv';

my $l = st::api::lims->new(rpt_list => '25846:1:3');
throws_ok { $l->aggregate_xlanes() } qr/Not run-level object/,
'method cannot be run for a composition';
$l = st::api::lims->new(id_run => 25846, position => 1);
throws_ok { $l->aggregate_xlanes() } qr/Not run-level object/,
'method cannot be run for a lane-level object';
$l = st::api::lims->new(id_run => 25846, position => 1, tag_index => 4);
throws_ok { $l->aggregate_xlanes() } qr/Not run-level object/,
'method cannot be run for a plex-level object';

$l = st::api::lims->new(id_run => 25846);

throws_ok { $l->aggregate_xlanes(qw/2 10/) }
qr/Requested position 10 does not exists in /,
'error if requested position does not exist';

my @merged = $l->aggregate_xlanes();
is (scalar @merged, 23, 'number of aggregates is number of tags plus two');
my $tag_zero = pop @merged;
my $tag_spiked = pop @merged;
my $tag_last = pop @merged;
my $tag_first = shift @merged;
is ($tag_zero->rpt_list, '25846:1:0;25846:2:0;25846:3:0;25846:4:0',
'rpt list for tag zero object');
is ($tag_spiked->rpt_list, '25846:1:888;25846:2:888;25846:3:888;25846:4:888',
'rpt list for spiked in tag object');
is ($tag_last->rpt_list, '25846:1:21;25846:2:21;25846:3:21;25846:4:21',
'rpt list for tag 21 object');
is ($tag_first->rpt_list, '25846:1:1;25846:2:1;25846:3:1;25846:4:1',
'rpt list for tag 1 object');

@merged = $l->aggregate_xlanes(qw/1 4/);
is (scalar @merged, 23, 'number of aggregates is number of tags plus two');
$tag_zero = pop @merged;
$tag_spiked = pop @merged;
$tag_last = pop @merged;
$tag_first = shift @merged;
is ($tag_zero->rpt_list, '25846:1:0;25846:4:0',
'rpt list for tag zero object');
is ($tag_spiked->rpt_list, '25846:1:888;25846:4:888',
'rpt list for spiked in tag object');
is ($tag_last->rpt_list, '25846:1:21;25846:4:21',
'rpt list for tag 21 object');
is ($tag_first->rpt_list, '25846:1:1;25846:4:1',
'rpt list for tag 1 object');

@merged = $l->aggregate_xlanes(qw/1/);
is (scalar @merged, 23, 'number of aggregates is number of tags plus two');
$tag_zero = pop @merged;
$tag_spiked = pop @merged;
$tag_last = pop @merged;
$tag_first = shift @merged;
is ($tag_zero->rpt_list, '25846:1:0', 'rpt list for tag zero object');
is ($tag_spiked->rpt_list, '25846:1:888', 'rpt list for spiked in tag object');
is ($tag_last->rpt_list, '25846:1:21', 'rpt list for tag 21 object');
is ($tag_first->rpt_list, '25846:1:1', 'rpt list for tag 1 object');

@merged = $l->aggregate_xlanes();
is (scalar @merged, 23, 'number of aggregates is number of tags plus two');
$tag_zero = pop @merged;
$tag_spiked = pop @merged;
$tag_last = pop @merged;
$tag_first = shift @merged;
is ($tag_zero->rpt_list, '25846:1:0;25846:2:0;25846:3:0;25846:4:0',
'rpt list for tag zero object');
is ($tag_spiked->rpt_list, '25846:1:888;25846:2:888;25846:3:888;25846:4:888',
'rpt list for spiked in tag object');
is ($tag_last->rpt_list, '25846:1:21;25846:2:21;25846:3:21;25846:4:21',
'rpt list for tag 21 object');
is ($tag_first->rpt_list, '25846:1:1;25846:2:1;25846:3:1;25846:4:1',
'rpt list for tag 1 object');
ok ((none {defined $_->id_run} ($tag_zero, $tag_spiked, $tag_first, $tag_last)),
"id_run not defined");

_compare_properties([$tag_first, $tag_last, $tag_zero, $tag_spiked]);

ok ($tag_spiked->is_phix_spike, 'is phix spike');
ok (!$tag_first->is_phix_spike, 'is not phix spike');
ok (!$tag_zero->is_phix_spike, 'is not phix spike');

is (join(q[:], $tag_zero->study_names), 'Illumina Controls:NovaSeq testing',
'study names including spiked phix');
is (join(q[:], $tag_zero->study_names(1)), 'Illumina Controls:NovaSeq testing',
'sudy names including spiked phix');
is (join(q[:], $tag_zero->study_names(0)), 'NovaSeq testing',
'study names excluding spiked phix');

my @sample_names = qw/
5318STDY7462457 5318STDY7462458 5318STDY7462459 5318STDY7462460 5318STDY7462461
5318STDY7462462 5318STDY7462463 5318STDY7462464 5318STDY7462465 5318STDY7462466
5318STDY7462467 5318STDY7462468 5318STDY7462469 5318STDY7462470 5318STDY7462471
5318STDY7462472 5318STDY7462473 5318STDY7462474 5318STDY7462475 5318STDY7462476
5318STDY7462477 /;

is (join(q[:], $tag_zero->sample_names(0)), join(q[:], @sample_names),
'sample names excluding spiked phix');
push @sample_names, 'phiX_for_spiked_buffers';
is (join(q[:], $tag_zero->sample_names()), join(q[:], @sample_names),
'sample names including spiked phix');
is (join(q[:], $tag_zero->sample_names(1)), join(q[:], @sample_names),
'sample names including spiked phix');

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = q[];

my $id_run = 47995;
$l = st::api::lims->new(
id_run => $id_run,
id_flowcell_lims => 98292,
driver_type => 'ml_warehouse',
mlwh_schema => $schema_wh,
);

@merged = $l->aggregate_xlanes(qw/1 2/);
is (scalar @merged, 19, 'number of aggregates is number of tags plus two');
$tag_zero = pop @merged;
$tag_spiked = pop @merged;
$tag_last = pop @merged;
$tag_first = shift @merged;
is ($tag_zero->rpt_list, "$id_run:1:0;$id_run:2:0",
'rpt list for tag zero object');
my @tag_zero_sample_names = $tag_zero->sample_names();
is (@tag_zero_sample_names, 18, '18 sample names are retrieved');
is ($tag_zero_sample_names[0], '6751STDY13219539',
'first sample name is correct');
is ($tag_spiked->rpt_list, "$id_run:1:888;$id_run:2:888",
'rpt list for spiked in tag object');
is ($tag_last->rpt_list, "$id_run:1:17;$id_run:2:17",
'rpt list for tag 21 object');
is ($tag_first->rpt_list, "$id_run:1:1;$id_run:2:1",
'rpt list for tag 1 object');
};

subtest 'Aggregation across lanes for non-pools' => sub {
plan tests => 14;

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/test40_lims/samplesheet_rapidrun_nopool.csv';
my @merged = st::api::lims->new(id_run => 22672)->aggregate_xlanes();
is (scalar @merged, 1, 'one object returned');
my $l = $merged[0];
is ($l->rpt_list, '22672:1;22672:2', 'correct rpt_list');
ok (!defined $l->id_run, "id_run not defined");
ok (!$l->is_phix_spike, 'is not phix spike');
_compare_properties_2($l);
};

subtest 'Error conditions in aggregation by library' => sub {
plan tests => 4;

Expand Down Expand Up @@ -562,34 +411,6 @@ sub _compare_properties {
'default_tag_sequence' => 'TCGAGCGT',
'study_alignments_in_bam' => 1,
'study_contains_nonconsented_human' => 0
},
{
'sample_id' => undef,
'sample_name' => undef,
'sample_common_name' => 'Homo sapiens',
'study_id' => 5318,
'study_name' => 'NovaSeq testing',
'reference_genome' => 'Homo_sapiens (1000Genomes_hs37d5 + ensembl_75_transcriptome)',
'library_id' => undef,
'library_name' => undef,
'library_type' => 'Standard',
'default_tag_sequence' => undef,
'study_alignments_in_bam' => 1,
'study_contains_nonconsented_human' => 0
},
{
'sample_id' => '1255141',
'sample_name' => 'phiX_for_spiked_buffers',
'sample_common_name' => undef,
'study_id' => 198,
'study_name' => 'Illumina Controls',
'reference_genome' => undef,
'library_id' => '17883061',
'library_name' => '17883061',
'library_type' => undef,
'default_tag_sequence' => 'ACAACGCAATC',
'study_alignments_in_bam' => 1,
'study_contains_nonconsented_human' => 0
}
];

Expand Down

0 comments on commit d2d4fad

Please sign in to comment.