Skip to content

Commit

Permalink
Merge pull request wtsi-npg#814 from wtsi-npg/devel
Browse files Browse the repository at this point in the history
pull from devel to master to create release 100.0.1
  • Loading branch information
jmtcsngr authored Feb 27, 2024
2 parents abcbd7b + 21bd423 commit f6ca2f7
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 15 deletions.
7 changes: 7 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
LIST OF CHANGES


release 100.0.1
- For aggregation of lims objects by library in st::api::lims, do not
consider the same library with different tag index in the same lane
as a error. Chromium single cell ATAC libraries have four copies of
each sample, each with a different tag.

release 100.0.0
- To help monitor shadow run folders, include the run folder path into the
logging messages in the staging monitor classes code.
Expand Down
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,7 @@ t/data/samplesheet/samplesheet_7753.csv
t/data/samplesheet/samplesheet_27483.csv
t/data/samplesheet/samplesheet_33990.csv
t/data/samplesheet/samplesheet_47995.csv
t/data/samplesheet/samplesheet_singlecell_48460.csv
t/data/samplesheet/4pool4libs_extended.csv
t/data/samplesheet/6946_extended.csv
t/data/samplesheet/7007_extended.csv
Expand Down
38 changes: 30 additions & 8 deletions lib/st/api/lims.pm
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,7 @@ sub aggregate_libraries {
if ($obj->is_control()) {
push @singles, $obj;
} else {
push @{$lims_objects_by_library->{$obj->library_id}}, $obj;
push @{$lims_objects_by_library->{_hash_key4lib_aggregation($obj)}}, $obj;
}
}

Expand All @@ -885,13 +885,27 @@ sub aggregate_libraries {
delete $init->{position};
delete $init->{id_run};

my @non_control_singles = map { $_->[0] }
grep { scalar @{$_} == 1 }
values %{$lims_objects_by_library};
push @singles, @non_control_singles;

my %lanes_with_singles = map { $_->position => 1 }
@non_control_singles;

my $merges = {};
my $lane_set_delim = q[,];
foreach my $library_id (keys %{$lims_objects_by_library}) {
my @lib_lims = @{$lims_objects_by_library->{$library_id}};
if (@lib_lims == 1) {
push @singles, @lib_lims;
} else {
foreach my $hashing_key (keys %{$lims_objects_by_library}) {
my @lib_lims = @{$lims_objects_by_library->{$hashing_key}};
if (@lib_lims > 1) {

# If some libraries from the lane cannot be merged, other libraries
# will not be merged either. This might change in future.
if (any { exists $lanes_with_singles{$_->position} } @lib_lims) {
push @singles, @lib_lims;
next;
}

_check_merge_correctness(\@lib_lims);
my $lane_set = join $lane_set_delim,
sort { $a <=> $b } map { $_->position } @lib_lims;
Expand Down Expand Up @@ -951,14 +965,22 @@ sub aggregate_libraries {
return $all_lims_objects;
}

sub _hash_key4lib_aggregation {
my $lims_obj = shift;
my $key = $lims_obj->library_id;
if (defined $lims_obj->tag_index) {
$key .= q[:] . $lims_obj->tag_index;
}
return $key;
}

sub _check_merge_correctness {
my $lib_lims = shift;
my @lanes = uniq map {$_->position} @{$lib_lims};
if (@lanes != @{$lib_lims}) {
if (@lanes != @{$lib_lims}) { # An unlikely mistake somewhere upstream.
croak 'Intra-lane merge is detected';
}
_check_value_is_unique('study_id', 'studies', $lib_lims);
_check_value_is_unique('tag_index', 'tag indexes', $lib_lims);
return;
}

Expand Down
38 changes: 31 additions & 7 deletions t/40-st-lims-merge.t
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use strict;
use warnings;
use Test::More tests => 9;
use Test::More tests => 10;
use Test::Exception;
use List::MoreUtils qw/all none/;
use File::Slurp;
Expand Down Expand Up @@ -130,7 +130,7 @@ subtest 'Create tag zero object' => sub {
};

subtest 'Error conditions in aggregation by library' => sub {
plan tests => 4;
plan tests => 3;

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} =
't/data/test40_lims/samplesheet_novaseq4lanes.csv';
Expand Down Expand Up @@ -160,19 +160,43 @@ subtest 'Error conditions in aggregation by library' => sub {
throws_ok { st::api::lims->aggregate_libraries(\@lane_lims) }
qr/Multiple studies in a potential merge by library/,
'can only merge libraries that belong to the same study';
};

subtest 'Allow duplicate libraries with different tag indexes' => sub {
plan tests => 6;

$content = read_file($ss_47995_path);
# Real life example: Chromium single cell ATAC libraries have 4 copies
# of each sample in a lane, each with a different tag.
# This should not cause an error.
local $ENV{NPG_CACHED_SAMPLESHEET_FILE} =
't/data/samplesheet/samplesheet_singlecell_48460.csv';
my @lane_lims = st::api::lims->new(id_run => 48460)->children;
my $lims;
lives_ok { $lims = st::api::lims->aggregate_libraries(\@lane_lims) }
'no error since grouping by library ID and tag index';
is (scalar @{$lims->{merges}}, 28, '28 merged libraries');
is (scalar @{$lims->{singles}}, 2, '2 single libraries');

# Testing below that if one library in a potentially meargeable lane
# is a singleton, the whole lane is excluded from the merge.

my $content = read_file('t/data/samplesheet/samplesheet_47995.csv');
# Make library id of tag 1 lane 1 the same as for tag 2 lane 3.
$content =~ s/1,65934716,/1,69723083,/;
# Change study id for all tags of lane 3 to be the same as in lane 1.
$content =~ s/,6050,/,6751,/g;
$file_path = join q[/], $tmp_dir, 'samplesheet_multi_tag.csv';
my $file_path = join q[/], $tmp_dir, 'samplesheet_multi_tag.csv';
write_file($file_path, $content);

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = $file_path;
@lane_lims = st::api::lims->new(id_run => 47995)->children;
throws_ok { st::api::lims->aggregate_libraries(\@lane_lims) }
qr/Multiple tag indexes in a potential merge by library/,
'can only merge libraries with teh same tag index';
lives_ok { $lims = st::api::lims->aggregate_libraries(\@lane_lims) }
'no error since grouping by library ID and tag index';
my @unexpected = grep { $_ =~ / ^1: / }
map { $_->rpt_list } @{$lims->{merges}};
is (scalar @unexpected, 0, 'lane 1 is not in merged entities');
# 8 controls + 17 in lanes 1 and 2 each
is (scalar @{$lims->{singles}}, 42, '42 single libraries');
};

subtest 'Aggregation by library for a NovaSeq standard flowcell' => sub {
Expand Down
Loading

0 comments on commit f6ca2f7

Please sign in to comment.