From daeb7d9ba936140fe7d3d293944bebf8e1a87318 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Mon, 18 Dec 2023 17:37:19 +0000 Subject: [PATCH 1/5] Samplesheet daemon extension for NovaSeqX --- lib/npg/samplesheet/auto.pm | 43 +++++++++++++++++++++++--- lib/npg/samplesheet/novaseq_xseries.pm | 4 ++- t/47-npg_samplesheet_auto.t | 36 ++++++++++++++++++--- 3 files changed, 72 insertions(+), 11 deletions(-) diff --git a/lib/npg/samplesheet/auto.pm b/lib/npg/samplesheet/auto.pm index 267eaf17..ce007663 100644 --- a/lib/npg/samplesheet/auto.pm +++ b/lib/npg/samplesheet/auto.pm @@ -8,11 +8,13 @@ use Readonly; use File::Copy; use File::Spec::Functions; use Carp; +use List::MoreUtils qw(none); -use npg::samplesheet; use npg_tracking::Schema; use WTSI::DNAP::Warehouse::Schema; use st::api::lims::samplesheet; +use npg::samplesheet; +use npg::samplesheet::novaseq_xseries; with q(MooseX::Log::Log4perl); @@ -20,6 +22,10 @@ our $VERSION = '0'; Readonly::Scalar my $MISEQ_INSTRUMENT_FORMAT => 'MiSeq'; Readonly::Scalar my $DEFAULT_SLEEP => 90; +Readonly::Array my @INSTRUMENT_FORMATS => ( + $MISEQ_INSTRUMENT_FORMAT, + $npg::samplesheet::novaseq_xseries::NX_INSTRUMENT_FORMAT +); ##no critic (Subroutines::ProhibitUnusedPrivateSubroutine) @@ -97,7 +103,7 @@ Tests that a valid instrument format is used. sub BUILD { my $self = shift; - if ($self->instrument_format ne $MISEQ_INSTRUMENT_FORMAT) { + if (none {$self->instrument_format eq $_ } @INSTRUMENT_FORMATS) { my $m = sprintf 'Samplesheet auto-generator is not implemented for %s instrument format', $self->instrument_format; @@ -141,9 +147,20 @@ sub process { my $id_run = $r->id_run; $self->log->info('Considering ' . join q[,],$id_run,$r->instrument->name); - my $ss = npg::samplesheet->new( - run => $r, mlwh_schema => $self->mlwh_schema - ); + my $ss; + if ($self->instrument_format eq $MISEQ_INSTRUMENT_FORMAT) { + $ss = npg::samplesheet->new( + run => $r, mlwh_schema => $self->mlwh_schema + ); + } else { + $ss = npg::samplesheet::novaseq_xseries->new( + run => $r, id_run => $id_run, + mlwh_schema => $self->mlwh_schema, + align => 1, keep_fastq => 1, + varcall => q(AllVariantCallers) + ); + } + my $method_name = '_valid_samplesheet_file_exists_for_' . $self->instrument_format; my $generate_new = !$self->$method_name($ss, $id_run); @@ -241,6 +258,22 @@ sub _valid_samplesheet_file_exists_for_MiSeq {##no critic (NamingConventions::Ca return; } +sub _valid_samplesheet_file_exists_for_NovaSeqX {##no critic (NamingConventions::Capitalization) + my ($self, $ss_object, $id_run) = @_; + + # The default samplesheet name starts with the date string. A new + # samplesheet will be generated each day. Not a problem since the run + # should either progress or be cancelled. + + my $o = $ss_object->output; + if (-e $o) { + $self->log->info(qq($o already exists for $id_run)); + return 1; + }; + + return; +} + __PACKAGE__->meta->make_immutable; 1; diff --git a/lib/npg/samplesheet/novaseq_xseries.pm b/lib/npg/samplesheet/novaseq_xseries.pm index cff01692..18694210 100755 --- a/lib/npg/samplesheet/novaseq_xseries.pm +++ b/lib/npg/samplesheet/novaseq_xseries.pm @@ -18,6 +18,8 @@ with 'MooseX::Getopt'; our $VERSION = '0'; +Readonly::Scalar our $NX_INSTRUMENT_FORMAT => 'NovaSeqX'; + Readonly::Scalar my $READ1_LENGTH => 151; Readonly::Scalar my $READ2_LENGTH => 151; Readonly::Scalar my $LIST_INDEX_TAG1 => 2; @@ -325,7 +327,7 @@ sub _build_run_name { my $run_name; if ($self->id_run()) { - if ($self->run->instrument_format()->model() !~ /NovaSeqX/smx) { + if ($self->run->instrument_format()->model() !~ /$NX_INSTRUMENT_FORMAT/smx) { croak 'Instrument is not registered as NovaSeq X Series ' . 'in the tracking database'; } diff --git a/t/47-npg_samplesheet_auto.t b/t/47-npg_samplesheet_auto.t index 813e2baa..9d6a6d31 100644 --- a/t/47-npg_samplesheet_auto.t +++ b/t/47-npg_samplesheet_auto.t @@ -1,17 +1,20 @@ use strict; use warnings; -use Test::More tests => 12; +use Test::More tests => 14; use Test::Exception; use File::Temp qw/ tempdir /; use Moose::Meta::Class; use Log::Log4perl qw(:easy); +use File::chdir; use_ok('npg::samplesheet::auto'); my $util = Moose::Meta::Class->create_anon_class( roles => [qw/npg_testing::db/])->new_object({}); -my $schema_wh = $util->create_test_db(q[WTSI::DNAP::Warehouse::Schema]); -my $schema = $util->create_test_db(q[npg_tracking::Schema]); +my $schema_wh = $util->create_test_db(q[WTSI::DNAP::Warehouse::Schema], + q[t/data/fixtures_lims_wh]); +my $schema = $util->create_test_db(q[npg_tracking::Schema], + q[t/data/dbic_fixtures]); { my $sm; @@ -27,7 +30,7 @@ my $schema = $util->create_test_db(q[npg_tracking::Schema]); instrument_format => 'NovaSeq' )} qr/Samplesheet auto-generator is not implemented for NovaSeq instrument format/, - 'MiSeq error for an invalid instrument format'; + 'Error for an invalid instrument format'; } { @@ -35,7 +38,7 @@ my $schema = $util->create_test_db(q[npg_tracking::Schema]); 't/data/samplesheet/miseq_default.csv'), 10262, 'id run retrieved from a samplesheet'); lives_and { is npg::samplesheet::auto::_id_run_from_samplesheet('some_file'), undef} - 'undef reftuned for a non-exisitng samplesheet'; + 'undef returned for a non-exisitng samplesheet'; } { @@ -66,4 +69,27 @@ my $schema = $util->create_test_db(q[npg_tracking::Schema]); ok(-e $new_file, 'moved file is in samplesheet_old directory'); } +{ + my $dir = tempdir(UNLINK => 1); + mkdir "$dir/samplesheets"; + + my $id_run = 47995; + my $run_row = $schema->resultset('Run')->find($id_run); + $run_row->update_run_status('run pending'); + is ($run_row->current_run_status_description(), 'run pending', + "run $id_run should be picked up by the daemon"); + { + local $CWD = $dir; + diag `ls -l`; + npg::samplesheet::auto->new( + npg_tracking_schema => $schema, + mlwh_schema => $schema_wh, + instrument_format => 'NovaSeqX' + )->process(); + } + my $glob = q[*_47995_NVX1_A_ssbatch98292.csv]; + my @files = glob "$dir/samplesheets/$glob"; + is (@files, 1, 'one NovaSeqX samplesheet file is generated'); +} + 1; From 5755c97e882502232f6a60dfb9037ba0c7ed73a0 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Tue, 19 Dec 2023 08:12:03 +0000 Subject: [PATCH 2/5] Added a script to generate samplesheets for pending NovaSeqX runs --- MANIFEST | 2 + bin/npg_samplesheet4NovaSeqX | 98 ++++++++++++++++++++ bin/npg_samplesheet_generator_NovaSeqXSeries | 4 +- 3 files changed, 102 insertions(+), 2 deletions(-) create mode 100755 bin/npg_samplesheet4NovaSeqX diff --git a/MANIFEST b/MANIFEST index 9a0e6ba4..2822beba 100644 --- a/MANIFEST +++ b/MANIFEST @@ -3,6 +3,8 @@ bin/illumina_instruments_uptime bin/npg_daemon_control bin/npg_move_runfolder bin/npg_samplesheet4MiSeq +bin/npg_samplesheet4NovaSeqX +bin/npg_samplesheet_generator_NovaSeqXSeries bin/npg_status_save bin/staging_area_monitor bin/npg_deletable_dr_runs diff --git a/bin/npg_samplesheet4NovaSeqX b/bin/npg_samplesheet4NovaSeqX new file mode 100755 index 00000000..1123d4c2 --- /dev/null +++ b/bin/npg_samplesheet4NovaSeqX @@ -0,0 +1,98 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use FindBin qw($Bin); +use lib ( -d "$Bin/../lib/perl5" ? "$Bin/../lib/perl5" : "$Bin/../lib" ); +use Log::Log4perl qw(:easy); + +use npg::samplesheet::auto; + +our $VERSION = '0'; + +Log::Log4perl->easy_init($INFO); + +my $log = Log::Log4perl->get_logger('main'); +$log->info('Starting npg samplesheet daemon for NovaSeqX instruments'); + +npg::samplesheet::auto->new(instrument_format => 'NovaSeqX')->loop(); + +0; + +__END__ + +=head1 NAME + +npg_samplesheet4NovaSeqX + +=head1 USAGE + + npg_samplesheet4NovaSeqX + +=head1 DESCRIPTION + +The script, once started, runs in perpetuity, generating Illumina-style +samplesheets for any NovaSeqX run with status 'run pending'. + +=head1 REQUIRED ARGUMENTS + +None + +=head1 OPTIONS + +=head1 DIAGNOSTICS + +=head1 CONFIGURATION + +Access to both npg_tracking and ml warehouse database is required. + +=head1 DEPENDENCIES + +=over + +=item strict + +=item warnings + +=item FindBin + +=item lib + +=item Log::Log4perl + +=item npg::samplesheet::auto + +=back + +=head1 EXIT STATUS + + Does not exit unless is sent a signal to terminate. + +=head1 INCOMPATIBILITIES + +=head1 BUGS AND LIMITATIONS + +=head1 AUTHOR + +Marina Gourtovaia Emg8@sanger.ac.ukE + +=head1 LICENSE AND COPYRIGHT + +Copyright (C) 2023 GRL. + +This file is part of NPG. + +NPG is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +=cut diff --git a/bin/npg_samplesheet_generator_NovaSeqXSeries b/bin/npg_samplesheet_generator_NovaSeqXSeries index 3bf9ee5f..ddd769bd 100755 --- a/bin/npg_samplesheet_generator_NovaSeqXSeries +++ b/bin/npg_samplesheet_generator_NovaSeqXSeries @@ -65,8 +65,8 @@ npg_samplesheet_generator_NovaSeqXSeries =head1 DESCRIPTION -Generates a samplesheet for the NovaSeq Series X Illumina instrument and -DRAGEN analysis. +Generates a single samplesheet for the NovaSeq Series X Illumina instrument +and DRAGEN analysis. =head1 EXIT STATUS From 7337e997b1282a9db3bc1a1c1c0ac273e46c2504 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Tue, 19 Dec 2023 08:46:40 +0000 Subject: [PATCH 3/5] Supressed variant calling. Added a test to test the correctness of passing the option to teh samplesheet generator. --- lib/npg/samplesheet/auto.pm | 3 +-- t/47-npg_samplesheet_auto.t | 7 ++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/npg/samplesheet/auto.pm b/lib/npg/samplesheet/auto.pm index ce007663..e41d05a6 100644 --- a/lib/npg/samplesheet/auto.pm +++ b/lib/npg/samplesheet/auto.pm @@ -156,8 +156,7 @@ sub process { $ss = npg::samplesheet::novaseq_xseries->new( run => $r, id_run => $id_run, mlwh_schema => $self->mlwh_schema, - align => 1, keep_fastq => 1, - varcall => q(AllVariantCallers) + align => 1, keep_fastq => 1 ); } diff --git a/t/47-npg_samplesheet_auto.t b/t/47-npg_samplesheet_auto.t index 9d6a6d31..ca8814ca 100644 --- a/t/47-npg_samplesheet_auto.t +++ b/t/47-npg_samplesheet_auto.t @@ -1,11 +1,12 @@ use strict; use warnings; -use Test::More tests => 14; +use Test::More tests => 15; use Test::Exception; use File::Temp qw/ tempdir /; use Moose::Meta::Class; use Log::Log4perl qw(:easy); use File::chdir; +use Perl6::Slurp; use_ok('npg::samplesheet::auto'); @@ -90,6 +91,10 @@ my $schema = $util->create_test_db(q[npg_tracking::Schema], my $glob = q[*_47995_NVX1_A_ssbatch98292.csv]; my @files = glob "$dir/samplesheets/$glob"; is (@files, 1, 'one NovaSeqX samplesheet file is generated'); + my $compare_file = 't/data/samplesheet/dragen/' . + '231206_47995_NVX1_A_ssbatch98292_align.csv'; + is (slurp($files[0]), slurp($compare_file), + 'the NovaSeqX samplesheet is generated correctly'); } 1; From 58ce63dd49c22869337da4c335ac945dae4605a8 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Tue, 19 Dec 2023 09:04:18 +0000 Subject: [PATCH 4/5] Supressed keeping interim fastq files --- lib/npg/samplesheet/auto.pm | 8 +++++--- t/47-npg_samplesheet_auto.t | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/npg/samplesheet/auto.pm b/lib/npg/samplesheet/auto.pm index e41d05a6..f014b9ea 100644 --- a/lib/npg/samplesheet/auto.pm +++ b/lib/npg/samplesheet/auto.pm @@ -150,13 +150,15 @@ sub process { my $ss; if ($self->instrument_format eq $MISEQ_INSTRUMENT_FORMAT) { $ss = npg::samplesheet->new( - run => $r, mlwh_schema => $self->mlwh_schema + run => $r, + mlwh_schema => $self->mlwh_schema ); } else { $ss = npg::samplesheet::novaseq_xseries->new( - run => $r, id_run => $id_run, + id_run => $id_run, + run => $r, mlwh_schema => $self->mlwh_schema, - align => 1, keep_fastq => 1 + align => 1 ); } diff --git a/t/47-npg_samplesheet_auto.t b/t/47-npg_samplesheet_auto.t index ca8814ca..0e28dfa1 100644 --- a/t/47-npg_samplesheet_auto.t +++ b/t/47-npg_samplesheet_auto.t @@ -93,7 +93,9 @@ my $schema = $util->create_test_db(q[npg_tracking::Schema], is (@files, 1, 'one NovaSeqX samplesheet file is generated'); my $compare_file = 't/data/samplesheet/dragen/' . '231206_47995_NVX1_A_ssbatch98292_align.csv'; - is (slurp($files[0]), slurp($compare_file), + my $expected = slurp($compare_file); + $expected =~ s/KeepFastq,TRUE,,,/KeepFastq,FALSE,,,/; + is (slurp($files[0]), $expected, 'the NovaSeqX samplesheet is generated correctly'); } From 9a2c9f0a29aeab37388b0716af7d057aaedea986 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Thu, 4 Jan 2024 16:14:19 +0000 Subject: [PATCH 5/5] Ability to derive id_run from a run record The old samplesheet generator had this ability. Rearranged the code so that the samplesheet generator for NovaSeqX also has this ability, which simplifies the code in the samplesheet daemon. NovaSeqX samplesheet generator retains ability to generate a samplesheet when neither id_run nor the run db record is set via the constructor. --- lib/npg/samplesheet.pm | 20 +------------------- lib/npg/samplesheet/auto.pm | 5 +++-- lib/npg/samplesheet/base.pm | 10 +++++++++- lib/npg/samplesheet/novaseq_xseries.pm | 26 +++++++++++++++++++++++--- t/47-npg_samplesheet_novaseq_xseries.t | 16 +++++++++++++--- 5 files changed, 49 insertions(+), 28 deletions(-) diff --git a/lib/npg/samplesheet.pm b/lib/npg/samplesheet.pm index ea70c816..72486c38 100755 --- a/lib/npg/samplesheet.pm +++ b/lib/npg/samplesheet.pm @@ -60,24 +60,6 @@ Readonly::Scalar my $MIN_COLUMN_NUM => 3; ####################### Public attributes ######################## ################################################################## -=head2 id_run - -An optional attribute - -=cut - -has '+id_run' => ( - 'lazy_build' => 1, - 'required' => 0, -); -sub _build_id_run { - my ($self) = @_; - if($self->has_tracking_run()){ - return $self->run()->id_run(); - } - croak 'id_run or a run is required'; -} - =head2 extend A boolean attribute, false by default. @@ -509,7 +491,7 @@ David K. Jackson Edavid.jackson@sanger.ac.ukE =head1 LICENSE AND COPYRIGHT -Copyright (C) 2019, 2020, 2023 Genome Research Ltd. +Copyright (C) 2019,2020,2023,2024 Genome Research Ltd. This file is part of NPG. diff --git a/lib/npg/samplesheet/auto.pm b/lib/npg/samplesheet/auto.pm index f014b9ea..f397506e 100644 --- a/lib/npg/samplesheet/auto.pm +++ b/lib/npg/samplesheet/auto.pm @@ -155,7 +155,6 @@ sub process { ); } else { $ss = npg::samplesheet::novaseq_xseries->new( - id_run => $id_run, run => $r, mlwh_schema => $self->mlwh_schema, align => 1 @@ -307,6 +306,8 @@ __END__ =item Carp +=item List::MoreUtils + =item npg_tracking::Schema =item npg::samplesheet @@ -327,7 +328,7 @@ David K. Jackson Edavid.jackson@sanger.ac.ukE =head1 LICENSE AND COPYRIGHT -Copyright (C) 2012,2013,2014,2019,2021,2023 GRL. +Copyright (C) 2012,2013,2014,2019,2021,2023,2024 GRL. This file is part of NPG. diff --git a/lib/npg/samplesheet/base.pm b/lib/npg/samplesheet/base.pm index 866918c7..d8e7d05c 100755 --- a/lib/npg/samplesheet/base.pm +++ b/lib/npg/samplesheet/base.pm @@ -57,8 +57,16 @@ Run ID, an optional attribute. =cut has '+id_run' => ( + 'lazy_build' => 1, 'required' => 0, ); +sub _build_id_run { + my $self = shift; + if ($self->has_tracking_run()) { + return $self->run()->id_run(); + } + croak 'id_run or a run is required'; +} =head2 batch_id @@ -228,7 +236,7 @@ Marina Gourtovaia Emg8@sanger.ac.ukE =head1 LICENSE AND COPYRIGHT -Copyright (C) 2019, 2020, 2023 Genome Research Ltd. +Copyright (C) 2019,2020,2023,2024 Genome Research Ltd. This file is part of NPG. diff --git a/lib/npg/samplesheet/novaseq_xseries.pm b/lib/npg/samplesheet/novaseq_xseries.pm index 18694210..32551333 100755 --- a/lib/npg/samplesheet/novaseq_xseries.pm +++ b/lib/npg/samplesheet/novaseq_xseries.pm @@ -10,7 +10,9 @@ use List::MoreUtils qw(any none uniq); use List::Util qw(first max); use DateTime; use Data::UUID; +use Try::Tiny; +use npg_tracking::util::types; use st::api::lims::samplesheet; extends 'npg::samplesheet::base'; @@ -100,7 +102,7 @@ has 'dragen_software_version' => ( sub _build_dragen_software_version { my $self = shift; - if (!$self->has_id_run) { + if (!$self->id_run) { croak 'DRAGEN software version cannot be retrieved. ' . 'Either supply it as an argument or supply existing id_run'; } @@ -137,7 +139,7 @@ sub _build_file_name { my $self = shift; my $file_name; - if ($self->has_id_run) { + if ($self->id_run) { $file_name = join q[_], $self->run_name, q[ssbatch] . $self->batch_id; @@ -187,10 +189,24 @@ sub _build_output { has '+id_run' => ( + 'isa' => 'Maybe[NpgTrackingRunId]', 'documentation' => 'NPG run ID, optional; if supplied, the record for this '. 'run should exists in the run tracking database', ); +around '_build_id_run' => sub { + my $orig = shift; + my $self = shift; + # Parent's builder method errors if id_run cannot be inferred from + # the database record. Here we allow for this attribute to be undefined. + # Depending on how other atributes are defined, it might be possible to + # generate the samplesheet. + my $id_run; + try { + $id_run = $self->$orig(@_); + }; + return $id_run; +}; has '+batch_id' => ( 'documentation' => 'LIMS batch identifier, optional. If not set, will be ' . @@ -867,6 +883,10 @@ __END__ =item Data::UUID +=item Try::Tiny + +=item npg_tracking::util::types + =back =head1 BUGS AND LIMITATIONS @@ -879,7 +899,7 @@ Marina Gourtovaia Emg8@sanger.ac.ukE =head1 LICENSE AND COPYRIGHT -Copyright (C) 2023 Genome Research Ltd. +Copyright (C) 2023,2024 Genome Research Ltd. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/t/47-npg_samplesheet_novaseq_xseries.t b/t/47-npg_samplesheet_novaseq_xseries.t index 9778f372..9dad14c3 100644 --- a/t/47-npg_samplesheet_novaseq_xseries.t +++ b/t/47-npg_samplesheet_novaseq_xseries.t @@ -26,7 +26,7 @@ my $schema_wh = $class->new_object({})->create_test_db( my $date = DateTime->now()->strftime('%y%m%d'); subtest 'create the generator object, test simple attributes' => sub { - plan tests => 17; + plan tests => 19; my $g = npg::samplesheet::novaseq_xseries->new( npg_tracking_schema => $schema_tracking, @@ -107,11 +107,21 @@ subtest 'create the generator object, test simple attributes' => sub { id_run => 47446, samplesheet_path => "$dir/one/" ); + my $expected_file_name = "${date}_47446_NVX1_B_ssbatch99888.csv"; my $file_name = $g->file_name; - is ($file_name, "${date}_47446_NVX1_B_ssbatch99888.csv", - 'correct file name is generated'); + is ($file_name, $expected_file_name, 'correct file name is generated'); is ($g->output, "$dir/one/$file_name", 'correct output path is generated'); + $g = npg::samplesheet::novaseq_xseries->new( + npg_tracking_schema => $schema_tracking, + mlwh_schema => $schema_wh, + run => $run_row, + samplesheet_path => "$dir/one/" + ); + $file_name = $g->file_name; + is ($file_name, $expected_file_name, 'correct file name is generated'); + is ($g->output, "$dir/one/$file_name", 'correct output path is generated'); + $g = npg::samplesheet::novaseq_xseries->new( npg_tracking_schema => $schema_tracking, mlwh_schema => $schema_wh,