Skip to content

Commit

Permalink
Merge pull request wtsi-npg#814 from wtsi-npg/devel
Browse files Browse the repository at this point in the history
pull from devel to master to create release 67.1.0
  • Loading branch information
jmtcsngr authored Nov 28, 2023
2 parents 9588ebb + 77c70e2 commit 0521f96
Show file tree
Hide file tree
Showing 21 changed files with 7,273 additions and 141 deletions.
1 change: 0 additions & 1 deletion Build.PL
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ my $builder = $class->new(
'npg_qc::illumina::interop::parser' => 0,
'st::api::lims' => 0,
'st::api::lims::ml_warehouse' => 0,
'npg::api::request' => 0,
'npg::samplesheet' => 0,
'WTSI::DNAP::Utilities::Loggable' => 0,
'WTSI::DNAP::Warehouse::Schema' => 0,
Expand Down
22 changes: 22 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,6 +1,28 @@
LIST OF CHANGES
---------------

release 67.1.0
- Fix typo in analysis specific overrides for bwa_als_se mapping to bwa0_6
- Add in use of autosome target regions for BGE libraries in seq_alignment
- Add 'merge_by_library' pipeline boolean option. This options is automatially
activated for NovaSeqX platform. It triggers a discovery of sets of data
that belong to the same libraries. If cases like this are found, the pipeline
is instructed, at the secondary analysis stage, to process this data as a
single entity. In practice, if the same pool is sequenced in more than one
lane of the run, sample data for the pool are merged across these lanes.
The 'discovery' part of the algorithm is implemented in
https://github.com/wtsi-npg/npg_tracking/pull/772
- Removed provisions for inline indexes
- Removed a check for rapid runs when deciding whether to merge
- Stop warnings about an undefined value when writing to the log from
npg_pipeline::function::seq_alignment
- Some tests were creating test data in the package's source tree. These
activities are redirected to temporary files and directories in /tmp
- Removed listing of non-existing files from MANIFEST
- Removed superfluous dependency on now removed st::api::request
- Added a test to expose a problem with ref cache, which is resolved by
https://github.com/wtsi-npg/npg_tracking/pull/761

release 67.0.0
- Turn off spatial filter QC check for NovaSeqX
- Switch to Perlbrew to obtain multiple Perl versions
Expand Down
9 changes: 7 additions & 2 deletions MANIFEST
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
.github/workflows/testing_and_building_repo.yml
bin/npg_pipeline_analysis_runner
bin/npg_pipeline_archival_runner
bin/npg_pipeline_central
Expand Down Expand Up @@ -78,7 +77,6 @@ lib/npg_pipeline/validation/s3.pm
MANIFEST This list of files
README
README.md
scripts/install_npg_perl_dependencies.sh
scripts/jgf2gml
t/00-critic.t
t/00-distribution.t
Expand Down Expand Up @@ -1042,6 +1040,13 @@ t/data/novaseq/210415_A00971_0162_AHNNTMDSXY/Data/Intensities/BAM_basecalls_2021
t/data/novaseq/210415_A00971_0162_AHNNTMDSXY/Data/Intensities/BAM_basecalls_20210417-080715/metadata_cache_37416/lane_3.taglist
t/data/novaseq/210415_A00971_0162_AHNNTMDSXY/Data/Intensities/BAM_basecalls_20210417-080715/metadata_cache_37416/lane_4.taglist
t/data/novaseq/210415_A00971_0162_AHNNTMDSXY/Data/Intensities/BAM_basecalls_20210417-080715/metadata_cache_37416/samplesheet_37416.csv
t/data/novaseqx/47539/README.md
t/data/novaseqx/47539/RunInfo.xml
t/data/novaseqx/47539/RunParameters.xml
t/data/novaseqx/47539/samplesheet_47539.csv
t/data/novaseqx/20231017_LH00210_0012_B22FCNFLT3/RunInfo.xml
t/data/novaseqx/20231017_LH00210_0012_B22FCNFLT3/RunParameters.xml
t/data/novaseqx/20231017_LH00210_0012_B22FCNFLT3/samplesheet_47995.csv
t/data/p4_stage1_analysis/1234_samplesheet.csv
t/data/p4_stage1_analysis/TileMetricsOut.bin
t/data/portable_pipelines/samplesheet4archival_all_controls.csv
Expand Down
155 changes: 114 additions & 41 deletions lib/npg_pipeline/base.pm
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use File::Basename;
use Readonly;

use npg_tracking::glossary::rpt;
use npg_tracking::glossary::composition::factory::rpt_list;
use st::api::lims;
use npg_pipeline::product;

Expand Down Expand Up @@ -174,8 +175,9 @@ sub random_string {

=head2 positions
A sorted array of lanes (positions) this pipeline will be run on.
Defaults to positions specified in LIMs.
A sorted list of lanes (positions) this pipeline will analyse.
This list is set from the values supplied by the C<lanes> attribute. If
lanes are not set explicitly, defaults to positions specified in LIMS.
=cut

Expand Down Expand Up @@ -206,8 +208,8 @@ sub _build_general_values_conf {

=head2 merge_lanes
Tells p4 stage2 (seq_alignment) to merge lanes (at their plex level if plexed)
and to run its downstream tasks as corresponding compositions.
Tells p4 stage2 (seq_alignment) to merge all lanes (at their plex level
if plexed) and to run its downstream tasks using corresponding compositions.
=cut

Expand All @@ -217,13 +219,30 @@ has q{merge_lanes} => (
lazy => 1,
predicate => q{has_merge_lanes},
builder => q{_build_merge_lanes},
documentation => q{Tells p4 stage2 (seq_alignment) to merge lanes } .
documentation => q{Tells p4 stage2 (seq_alignment) to merge all lanes } .
q{(at their plex level if plexed) and to run its } .
q{downstream tasks as corresponding compositions},
q{downstream tasks using corresponding compositions},
);
sub _build_merge_lanes {
my $self = shift;
return $self->all_lanes_mergeable && !$self->is_rapid_run();
return $self->all_lanes_mergeable;
}

=head2 merge_by_library
=cut

has q{merge_by_library} => (
isa => q{Bool},
is => q{ro},
lazy_build => 1,
documentation => q{Tells p4 stage2 (seq_alignment) to merge all plexes } .
q{that belong to the same library, except spiked PhiX and }.
q{tag zero)},
);
sub _build_merge_by_library {
my $self = shift;
return $self->is_indexed && $self->platform_NovaSeqX();
}

=head2 lims
Expand Down Expand Up @@ -316,6 +335,13 @@ sub get_tag_index_list {

=head2 products
Two arrays of npg_pipeline::product objects, one for lanes, hashed under
the 'lanes' key, another for end products, including, where relevant, tag
zero products, hashed under the 'data_products' key.
If product_rpt_list attribute is set, the 'lanes' key maps to an empty
array.
=cut

has q{products} => (
Expand All @@ -327,48 +353,93 @@ has q{products} => (
sub _build_products {
my $self = shift;

my $selected_lanes = $self->has_product_rpt_list ? 0 :
((join q[], $self->positions) ne
(join q[], map {$_->position} $self->lims->children()));
my (@lane_lims, @data_lims);

my $lims2product = sub {
my $lims = shift;
return npg_pipeline::product->new(
rpt_list => npg_tracking::glossary::rpt->deflate_rpt($lims),
lims => $lims,
selected_lanes => $selected_lanes);
};
if ($self->has_product_rpt_list) {
@data_lims = ($self->lims);
} else {
my @positions = $self->positions;
@lane_lims = map { $self->lims4lane($_) } @positions;

my @lane_lims = ();
if (!$self->has_product_rpt_list) {
@lane_lims = map { $self->lims4lane($_) } $self->positions;
}
if ($self->merge_lanes) {
@data_lims = $self->lims->aggregate_xlanes(@positions);
} else {

my %tag0_lims = ();
if ($self->is_indexed) {
%tag0_lims = map { $_->position => $_->create_tag_zero_object() }
grep { $_->is_pool } @lane_lims;
}

my @data_products;
if ($self->has_product_rpt_list || $self->merge_lanes) {
@data_products =
map {
npg_pipeline::product->new(lims => $_,
rpt_list => $_->rpt_list,
selected_lanes => $selected_lanes)
if ($self->merge_by_library) {
my $all_lims = $self->lims->aggregate_libraries(\@lane_lims);
@data_lims = @{$all_lims->{'singles'}}; # Might be empty.
# Tag zero LIMS objects for all lanes, merged or unmerged.
push @data_lims, map { $tag0_lims{$_} } (sort keys %tag0_lims);

if ( @{$all_lims->{'merges'}} ) {
# If the libraries are merged across a subset of lanes under analysis,
# the 'selected_lanes' flag needs to be flipped to true.
if (!$self->_selected_lanes) {
my $rpt_list = $all_lims->{'merges'}->[0]->rpt_list;;
my $num_components =
npg_tracking::glossary::composition::factory::rpt_list
->new(rpt_list => $rpt_list)
->create_composition()->num_components();
if ($num_components != scalar @lane_lims) {
$self->_set_selected_lanes(1);
}
}
$self->has_product_rpt_list ? ($self->lims) :
$self->lims->aggregate_xlanes($self->positions);
} else {
my @lims = ();
foreach my $lane (@lane_lims) {
if ($self->is_indexed && $lane->is_pool) {
push @lims, $lane->children;
push @lims, $lane->create_tag_zero_object();
push @data_lims, @{$all_lims->{'merges'}};
}

} else {
push @lims, $lane;
# To keep backward-compatible order of pipeline invocations, add
# tag zero LIMS object at the end of other objects for the lane.
@data_lims = map {
exists $tag0_lims{$_->position} ?
($_->children, $tag0_lims{$_->position}) : $_
} @lane_lims;
}
}
@data_products = map { $lims2product->($_) } @lims;
}

return { 'data_products' => \@data_products,
'lanes' => [map { $lims2product->($_) } @lane_lims] };
return {
'data_products' => [map { $self->_lims_object2product($_) } @data_lims],
'lanes' => [map { $self->_lims_object2product($_) } @lane_lims]
};
}

#####
# The boolean flag below defines whether lane numbers are explicitly
# listed in directory and file names for merged products. It is set
# to true whenever a subset of all available lanes is analysed.
# If it is set to false by the builder method, it can be reset to true
# when a full collection of products is constructed.
has q{_selected_lanes} => (
isa => q{Bool},
is => q{ro},
writer => q{_set_selected_lanes},
lazy_build => 1,
);
sub _build__selected_lanes {
my $self = shift;
if (!$self->has_product_rpt_list) {
return ((join q[], $self->positions) ne
(join q[], map {$_->position} $self->lims->children()))
}
return;
}

sub _lims_object2product {
my ($self, $lims) = @_;

return npg_pipeline::product->new(
rpt_list => $lims->rpt_list ? $lims->rpt_list :
npg_tracking::glossary::rpt->deflate_rpt($lims),
lims => $lims,
selected_lanes => $self->_selected_lanes
);
}

__PACKAGE__->meta->make_immutable;
Expand Down Expand Up @@ -403,6 +474,8 @@ __END__
=item npg_tracking::glossary::rpt
=item npg_tracking::glossary::composition::factory::rpt_list
=item st::api::lims
=item WTSI::DNAP::Utilities::Loggable
Expand All @@ -424,7 +497,7 @@ Marina Gourtovaia
=head1 LICENSE AND COPYRIGHT
Copyright (C) 2014,2015,2016,2017,2018,2019,2020 Genome Research Ltd.
Copyright (C) 2014,2015,2016,2017,2018,2019,2020,2023 Genome Research Ltd.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down
72 changes: 8 additions & 64 deletions lib/npg_pipeline/function/p4_stage1_analysis.pm
Original file line number Diff line number Diff line change
Expand Up @@ -197,22 +197,13 @@ sub _get_index_lengths {
my ( $self, $lane_lims ) = @_;

my @index_length_array;

if ($lane_lims->inline_index_exists) {
# Tradis run - treat as a special case
my $index_start = $lane_lims->inline_index_start;
my $index_end = $lane_lims->inline_index_end;
if ($index_start && $index_end) {
push @index_length_array, $index_end - $index_start + 1;
}
} else {
my $n = 0;
my @cycle_counts = $self->read_cycle_counts();
my @reads_indexed = $self->reads_indexed();
foreach my $n (0..$#cycle_counts) {
if ($reads_indexed[$n]) { push @index_length_array, $cycle_counts[$n]; }
}
my $n = 0;
my @cycle_counts = $self->read_cycle_counts();
my @reads_indexed = $self->reads_indexed();
foreach my $n (0..$#cycle_counts) {
if ($reads_indexed[$n]) { push @index_length_array, $cycle_counts[$n]; }
}

return \@index_length_array;
}

Expand All @@ -221,7 +212,7 @@ sub _get_index_lengths {
# Determine parameters for the lane from LIMS information and create the hash from which the p4 stage1
# analysis param_vals file will be generated. Generate the vtfp/viv commands using this param_vals file.
#########################################################################################################
sub _generate_command_params { ## no critic (Subroutines::ProhibitExcessComplexity)
sub _generate_command_params {
my ($self, $lane_lims, $tag_list_file, $lane_product) = @_;
my %p4_params = (
samtools_executable => q{samtools},
Expand Down Expand Up @@ -302,52 +293,6 @@ sub _generate_command_params { ## no critic (Subroutines::ProhibitExcessComplexi
$p4_params{i2b_bc_qual_val} = q[tq];
}

if($lane_lims->inline_index_exists) {
my $index_start = $lane_lims->inline_index_start;
my $index_end = $lane_lims->inline_index_end;
my $index_read = $lane_lims->inline_index_read;

if ($index_start && $index_end && $index_read) {
$self->info(q{P4 stage1 analysis of a lane with inline indexes});

my($first, $final) = $self->read1_cycle_range();
if ($index_read == 1) {
$p4_params{i2b_bc_read} = 1;
$index_start += ($first-1);
$index_end += ($first-1);
$p4_params{i2b_first_index_0} = $index_start;
$p4_params{i2b_final_index_0} = $index_end;
$p4_params{i2b_first_index_1} = $first;
$p4_params{i2b_final_index_1} = $index_start-1;
$p4_params{i2b_first_0} = $index_end+1;
$p4_params{i2b_final_0} = $final;
if ($self->is_paired_read()) {
($first, $final) = $self->read2_cycle_range();
$p4_params{i2b_first_1} = $first;
$p4_params{i2b_final_1} = $final;
}
} elsif ($index_read == 2) {
$p4_params{i2b_bc_read} = 2;
$self->is_paired_read() or $self->logcroak(q{Inline index read (2) does not exist});
$p4_params{i2b_first_0} = $first;
$p4_params{i2b_final_0} = $final;
($first, $final) = $self->read2_cycle_range();
$index_start += ($first-1);
$index_end += ($first-1);
$p4_params{i2b_first_index_0} = $index_start;
$p4_params{i2b_final_index_0} = $index_end;
$p4_params{i2b_first_index_1} = $first;
$p4_params{i2b_final_index_1} = $index_start-1;
$p4_params{i2b_first_1} = $index_end+1;
$p4_params{i2b_final_1} = $final;
} else {
$self->logcroak("Invalid inline index read ($index_read)");
}
$p4_params{i2b_sec_bc_seq_val} = q{br};
$p4_params{i2b_sec_bc_qual_val} = q{qr};
}
}

if($self->_is_duplexseq($lane_lims)) {
$self->info(q{P4 stage1 analysis of a Duplex-Seq lane});

Expand Down Expand Up @@ -417,8 +362,7 @@ sub _generate_command_params { ## no critic (Subroutines::ProhibitExcessComplexi
}

### TODO: remove this read length comparison if biobambam will handle this case. Check clip reinsertion.
if($self->is_paired_read() && !$lane_lims->inline_index_exists) {
# omit BamAdapterFinder for inline index
if($self->is_paired_read()) {
my @range1 = $self->read1_cycle_range();
my $read1_length = $range1[1] - $range1[0] + 1;
my @range2 = $self->read2_cycle_range();
Expand Down
Loading

0 comments on commit 0521f96

Please sign in to comment.