Skip to content

Commit

Permalink
Merge pull request #745 from nerdstrike/feature/experiment_name
Browse files Browse the repository at this point in the history
Allow NovaSeqX samplesheets to embed instrument and slot
  • Loading branch information
mgcam authored Jul 21, 2023
2 parents 5b50f50 + db0b91b commit e81164d
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 37 deletions.
43 changes: 28 additions & 15 deletions lib/npg/samplesheet/novaseq_xseries.pm
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,18 @@ on this instrument model.
The DRAGEN analysis can process a limited number of distinct analysis
configurations. The germline and RNA alignment sections of the generated
samplesheet will contain as many samples as possible within the limit set by
samplesheet will contain as many samples as possible within the limit set by
the C<dragen_max_number_of_configs> attribute. The default value for this
attribute is 4, which is the number of distinct configurations that the
on-board DRAGEN analysis can handle.
In the BCLConvert section, each combination of index lengths counts as a
unique configuration. If the number of these configurations exceeds the value
of the the C<dragen_max_number_of_configs> attribute, no DRAGEN analysis
sections are added to the samplesheet.
sections are added to the samplesheet.
See specification in
L<https://support-docs.illumina.com/SHARE/SampleSheetv2/Content/SHARE/SampleSheetv2/Settings_fNV_mX.htm>
L<https://support-docs.illumina.com/SHARE/SampleSheetv2/Content/SHARE/SampleSheetv2/Settings_fNV_mX.htm>
A full listing of analysis options is available in
L<https://support-docs.illumina.com/SW/DRAGEN_v41/Content/SW/DRAGEN/OptionReference.htm>
Expand Down Expand Up @@ -171,7 +171,7 @@ has 'keep_fastq' => (
Variant calling mode, defaults to C<None>, other valid options are
C<SmallVariantCaller> and C<AllVariantCallers>
=cut

has 'varcall' => (
Expand Down Expand Up @@ -306,8 +306,10 @@ sub _build_run_name {

my $run_name;
if ($self->has_id_run()) {
$run_name = $self->id_run;
# Embed instrument's Sanger network name and slot
$run_name = sprintf '%s_%s_%s', $self->id_run, $self->run->instrument->name, $self->get_instrument_side;
} else {
# Run is not tracked, generate a placeholder ID
my $ug = Data::UUID->new();
my @a = split /-/xms, $ug->to_string($ug->create());
# Add a random string at the end so that the batch can be reused.
Expand All @@ -333,21 +335,14 @@ sub _build_file_name {

my $file_name;
if ($self->has_id_run) {
my $side = $self->run->is_tag_set('fc_slotA') ? 'A' :
($self->run->is_tag_set('fc_slotB') ? 'B' : q[]);
if (!$side) {
croak 'Slot is not set for run ' . $self->id_run;
}
$file_name = join q[_],
$self->run->instrument->name,
$self->id_run,
$side,
$self->run_name,
q[ssbatch] . $self->batch_id;
} else {
$file_name = $self->run_name;
}

my $date = DateTime->now()->strftime('%y%m%d'); # 230602 for 2 June 2023
my $date = DateTime->now()->strftime('%y%m%d'); # 230602 for 2 June 2023
$file_name = sprintf '%s_%s.csv', $date, $file_name;

return $file_name;
Expand Down Expand Up @@ -642,7 +637,7 @@ sub add_common_headers {
$self->_add_line(qw(InstrumentType NovaSeqXPlus));
$self->_add_line();

# Reads section
# Reads section
$self->_add_line('[Reads]');
$self->_add_line(q[Read1Cycles], $READ1_LENGTH);
$self->_add_line(q[Read2Cycles], $READ2_LENGTH);
Expand Down Expand Up @@ -695,6 +690,24 @@ sub do_libtype_tenx_test {
return any { $lt =~ /$_/xmsi } @TENX_ANALYSES_LIB_TYPES;
}

=head2 get_instrument_side
Consult run tags to determine which slot/side of the instrument this run is
intended to be inserted into. Croaks when no value has been set.
=cut

sub get_instrument_side {
my $self = shift;
my $side = $self->run->is_tag_set('fc_slotA') ? 'A' :
($self->run->is_tag_set('fc_slotB') ? 'B' : q[]);
if (! $side) {
croak 'Slot is not set for run ' . $self->id_run;
}
return $side;
}


sub _add_samples {
my ($self, @samples) = @_;

Expand Down
14 changes: 13 additions & 1 deletion lib/npg_tracking/illumina/run/short_info.pm
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,10 @@ sub _build_id_run {
$self->run_folder();
}

# Try to read id_run from the folder name
($inst_t, $inst_i, $id_run) = $self->run_folder() =~ /$NAME_PATTERN/gmsx;

# Failing that try the tracking DB.
if ( !$id_run ) {
if ($self->can(q(npg_tracking_schema)) and $self->npg_tracking_schema()) {
my $rs = $self->npg_tracking_schema()->resultset('Run')
Expand All @@ -107,8 +109,18 @@ sub _build_id_run {
}
}

# When no id_run is set (as in pick-up runs) attempt to parse an id_run from
# the experiment name recorded in the Illumina XML file.
# We embed additional information in NovaSeqX samplesheets which have no
# meaning here. See L<Samplesheet generator|npg::samplesheet::novaseq_xseries>
if ( !$id_run && $self->can('experiment_name') && $self->experiment_name() ) {
($id_run) = $self->experiment_name() =~ /\A[\s]*([\d]+)[\s]*\Z/xms;
($id_run, undef) = $self->experiment_name() =~ m{
\A
[\s]*
([\d]+) # id_run
([\w\d\s]*) # instrument name or other embedded info
\Z
}xms;
}

if( !$id_run ) {
Expand Down
32 changes: 17 additions & 15 deletions t/47-npg_samplesheet_novaseq_xseries.t
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use_ok('npg::samplesheet::novaseq_xseries');
my $class = Moose::Meta::Class->create_anon_class(roles=>[qw/npg_testing::db/]);

my $schema_tracking = $class->new_object({})->create_test_db(
q[npg_tracking::Schema], q[t/data/dbic_fixtures]
q[npg_tracking::Schema], q[t/data/dbic_fixtures]
);

my $schema_wh = $class->new_object({})->create_test_db(
Expand All @@ -22,7 +22,7 @@ my $schema_wh = $class->new_object({})->create_test_db(
my $date = DateTime->now()->strftime('%y%m%d');

subtest 'create the generator object, test simple attributes' => sub {
plan tests => 13;
plan tests => 14;

my $g = npg::samplesheet::novaseq_xseries->new(
npg_tracking_schema => $schema_tracking,
Expand Down Expand Up @@ -51,21 +51,21 @@ subtest 'create the generator object, test simple attributes' => sub {
like($g->run_name, qr/\Assbatch97071_[\w]+\Z/,
'run name when id_run is not given');
like($g->file_name, qr/\A${date}_ssbatch97071_[\w]+\.csv\Z/,
'samplesheet file name when id_run is not given');

$g = npg::samplesheet::novaseq_xseries->new(
npg_tracking_schema => $schema_tracking,
mlwh_schema => $schema_wh,
id_run => 47446,
batch_id => 97071,
);
is($g->run_name, '47446', 'run name is run ID when the latter is known');
'samplesheet file name when id_run is not given');

my $run_row = $schema_tracking->resultset('Run')->create({
id_run => 47446,
id_instrument_format => 12,
id_instrument => 69,
team => 'A'
});

$g = npg::samplesheet::novaseq_xseries->new(
npg_tracking_schema => $schema_tracking,
mlwh_schema => $schema_wh,
id_run => 47446,
batch_id => 97071,
);
throws_ok { $g->file_name } qr/Slot is not set for run 47446/,
'error when the slot is unknown';

Expand All @@ -78,13 +78,15 @@ subtest 'create the generator object, test simple attributes' => sub {
id_run => 47446,
batch_id => 97071,
);
is ($g->file_name, "${date}_NVX1_47446_${slot}_ssbatch97071.csv",
is ($g->file_name, "${date}_47446_NVX1_${slot}_ssbatch97071.csv",
'correct file name is generated');
is($g->run_name, "47446_NVX1_${slot}",
'run name is constructed from run ID when possible');
if ($slot eq 'A') {
$run_row->unset_tag($tag);
}
}

$g = npg::samplesheet::novaseq_xseries->new(
npg_tracking_schema => $schema_tracking,
mlwh_schema => $schema_wh,
Expand All @@ -100,7 +102,7 @@ subtest 'create the generator object, test simple attributes' => sub {
mlwh_schema => $schema_wh,
id_run => 47446,
);
is ($g->file_name, "${date}_NVX1_47446_B_ssbatch99888.csv",
is ($g->file_name, "${date}_47446_NVX1_B_ssbatch99888.csv",
'correct file name is generated');

$run_row->update({id_instrument_format => 10, id_instrument => 68});
Expand All @@ -111,7 +113,7 @@ subtest 'create the generator object, test simple attributes' => sub {
);
throws_ok { $g->file_name }
qr/Instrument model is not NovaSeq X Series/,
'error when the run is registered on the wrong instrument model';
'error when the run is registered on the wrong instrument model';
};

1;
40 changes: 35 additions & 5 deletions t/60-illumina-run-short_info.t
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use strict;
use warnings;
use Test::More tests => 45;
use Test::More tests => 46;
use Test::Exception;
use File::Temp qw/ tempdir /;
use Moose::Meta::Class;
Expand Down Expand Up @@ -30,7 +30,7 @@ sub _build_run_folder {

my $path = $self->_short_path();
return first {$_ ne q()} reverse splitdir($path);

}


Expand All @@ -39,7 +39,7 @@ has q{_short_path} => ( isa => q{Str}, is => q{ro}, lazy_build =>
sub _build__short_path {
my ($self) = @_;
my @dir = $self->has_run_folder() ? glob $self->_folder_path_glob_pattern() . $self->run_folder()
: $self->has_id_run() ? glob $self->_folder_path_glob_pattern() . q(*_{r,}) . $self->id_run() . q{*}
: $self->has_id_run() ? glob $self->_folder_path_glob_pattern() . q(*_{r,}) . $self->id_run() . q{*}
: $self->has_name() ? glob $self->_folder_path_glob_pattern() . q{*} . $self->name()
: croak q{No run_folder, name or id_run provided}
;
Expand Down Expand Up @@ -71,6 +71,12 @@ sub _folder_path_glob_pattern {
}


package test::nvx_short_info;
use Moose;
with 'npg_tracking::illumina::run::short_info';

has experiment_name => (is => 'rw');

package main;

sub create_staging {
Expand Down Expand Up @@ -187,7 +193,7 @@ my $run_folder = q{123456_IL2_1234};
});
is($short_info->short_reference(), $id_run, q{HS short_reference returns id_run});
is($short_info->name(),q{HS2_1234}, q{HS name worked out correctly});
is($short_info->short_reference(), q{123456_HS2_1234_B_205NNABXX}, q{HS short_reference returns run_folder});
is($short_info->short_reference(), q{123456_HS2_1234_B_205NNABXX}, q{HS short_reference returns run_folder});
}

#### test where name is give in the constructor
Expand Down Expand Up @@ -243,7 +249,7 @@ my $run_folder = q{123456_IL2_1234};

my $name;
my $flowcell_id;
lives_ok {
lives_ok {
$name = $short_info->name();
$flowcell_id= $short_info->flowcell_id();
} q{MiSeq runfolder - kit id in place of flowcell};
Expand All @@ -265,4 +271,28 @@ subtest 'process run_folder when no id_run present' => sub {
} qr[Unable to identify id_run], q[Throws when it obtain id_run];
};

subtest 'Test id_run extraction from within experiment_name' => sub {
plan tests => 7;
my $short_info = test::nvx_short_info->new(experiment_name => '45678_NVX1_A', run_folder => 'not_a_folder');
is($short_info->id_run, '45678', 'id_run parsed from experiment name');

$short_info = test::nvx_short_info->new(experiment_name => ' 45678_NVX1_A ', run_folder => 'not_a_folder');
is($short_info->id_run, '45678', 'id_run parsed from loosely formatted experiment name');

$short_info = test::nvx_short_info->new(experiment_name => '45678_NVX1_A ', run_folder => 'not_a_folder');
is($short_info->id_run, '45678', 'id_run parsed from experiment name with postfix spaces');

$short_info = test::nvx_short_info->new(experiment_name => ' 45678_NVX1_A', run_folder => 'not_a_folder');
is($short_info->id_run, '45678', 'id_run parsed from experiment name with prefixed spaces');

$short_info = test::nvx_short_info->new(experiment_name => '45678', run_folder => 'not_a_folder');
is($short_info->id_run, '45678', 'Bare id_run as experiment name is fine');

$short_info = test::nvx_short_info->new(experiment_name => 'NovaSeqX_WHGS_TruSeqPF_NA12878', run_folder => 'not_a_folder');
throws_ok { $short_info->id_run } qr{Unable to identify id_run with data provided}, 'Custom run name cannot be parsed';

$short_info = test::nvx_short_info->new(id_run => '45678', experiment_name => '56789_NVX1_A', run_folder => 'not_a_folder');
is($short_info->id_run, '45678', 'Set id_run wins over experiment_name');
};

1;
2 changes: 1 addition & 1 deletion t/data/dbic_fixtures/300-Run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
team: 'RAD'
is_paired: 0
priority: 1
flowcell_id:
flowcell_id:
- actual_cycle_count: 64
batch_id: 4861
expected_cycle_count: 64
Expand Down

0 comments on commit e81164d

Please sign in to comment.