Skip to content

Commit

Permalink
Merge pull request #187 from wtsi-npg/devel
Browse files Browse the repository at this point in the history
prep for release 2.8
  • Loading branch information
dozy authored Apr 18, 2018
2 parents 47026a8 + 6ac94db commit e66c330
Show file tree
Hide file tree
Showing 67 changed files with 3,190 additions and 594 deletions.
7 changes: 3 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,10 @@ env:
- PGVERSION="9.3"
- JANSSON_VERSION="2.9"
- BATON_VERSION="1.1.0"
- SAMTOOLS_VERSION="1.5"
- HTSLIB_VERSION="1.5"
- SAMTOOLS_VERSION="1.7"
- HTSLIB_VERSION="1.7"
- TEARS_VERSION="1.2.3"
- DISPOSABLE_IRODS_VERSION="1.2"
- RENCI_FTP_URL=ftp://ftp.renci.org
- DISPOSABLE_IRODS_VERSION="1.3"
- WTSI_NPG_GITHUB_URL=https://github.com/wtsi-npg

matrix:
Expand Down
2 changes: 1 addition & 1 deletion BuildONT.PL
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ my $build = BuildONT->new
'Try::Tiny' => '>= 0.22',
'URI' => '>= 1.67',
'WTSI::DNAP::Utilities' => 0,
'WTSI::NPG::iRODS' => '>= 2.8.0'
'WTSI::NPG::iRODS' => '>= 3.0.2'
},
recommends => {
'UUID' => '>= 0.24',
Expand Down
4 changes: 4 additions & 0 deletions BuildONT.pm
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,21 @@ our @ont_subset = (
'WTSI/NPG/HTS/ONT/GridIONRunAuditor.pm',
'WTSI/NPG/HTS/ONT/GridIONRunMonitor.pm',
'WTSI/NPG/HTS/ONT/GridIONRunPublisher.pm',
'WTSI/NPG/HTS/ONT/GridIONTarAuditor.pm',
'WTSI/NPG/HTS/ONT/MetaQuery.pm',
'WTSI/NPG/HTS/ONT/MinIONRunMonitor.pm',
'WTSI/NPG/HTS/ONT/MinIONRunPublisher.pm',
'WTSI/NPG/HTS/ONT/TarDataObject.pm',
'WTSI/NPG/HTS/ONT/Watcher.pm',
'WTSI/NPG/HTS/ChecksumCalculator',
'WTSI/NPG/HTS/PathLister.pm',
'WTSI/NPG/HTS/TarItem.pm',
'WTSI/NPG/HTS/TarManifest.pm',
'WTSI/NPG/HTS/TarPublisher.pm',
'WTSI/NPG/HTS/TarStream.pm',
'WTSI/NPG/HTS/Types.pm',
'npg_audit_gridion_run.pl',
'npg_audit_gridion_tar.pl',
'npg_gridion_meta_updater.pl',
'npg_gridion_run_monitor.pl',
'npg_minion_run_monitor.pl',
Expand Down
271 changes: 165 additions & 106 deletions MANIFEST

Large diffs are not rendered by default.

145 changes: 145 additions & 0 deletions bin/npg_audit_gridion_tar.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#!/usr/bin/env perl

use strict;
use warnings;

use FindBin qw[$Bin];
use lib (-d "$Bin/../lib/perl5" ? "$Bin/../lib/perl5" : "$Bin/../lib");

use Data::Dump qw[pp];
use Getopt::Long;
use Log::Log4perl qw[:levels];
use Pod::Usage;

use WTSI::NPG::HTS::ONT::GridIONTarAuditor;

our $VERSION = '';

my $verbose_config = << 'LOGCONF'
log4perl.logger = ERROR, A1
log4perl.logger.WTSI.NPG.HTS.ONT = INFO, A1
log4perl.logger.WTSI.NPG.HTS = INFO, A1
log4perl.logger.WTSI.NPG.iRODS.Publisher = INFO, A1
# Errors from WTSI::NPG::iRODS are propagated in the code to callers
# in WTSI::NPG::HTS::Illumina, so we do not need to see them directly:
log4perl.logger.WTSI.NPG.iRODS = OFF, A1
log4perl.appender.A1 = Log::Log4perl::Appender::Screen
log4perl.appender.A1.layout = Log::Log4perl::Layout::PatternLayout
log4perl.appender.A1.layout.ConversionPattern = %d %-5p %c - %m%n
log4perl.appender.A1.utf8 = 1
# Prevent duplicate messages with a non-Log4j-compliant Log4perl option
log4perl.oneMessagePerAppender = 1
LOGCONF
;

my $collection;
my $debug;
my $log4perl_config;
my $verbose;

GetOptions('collection=s' => \$collection,
'debug' => \$debug,
'help' => sub { pod2usage(-verbose => 2,
-exitval => 0) },
'logconf=s' => \$log4perl_config,
'verbose' => \$verbose);

if ($log4perl_config) {
Log::Log4perl::init($log4perl_config);
Log::Log4perl->get_logger('main')->info
("Using log config file '$log4perl_config'");
}
elsif ($verbose and not $debug) {
Log::Log4perl::init(\$verbose_config);
}
else {
my $level = $debug ? $DEBUG : $WARN;
Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n',
level => $level,
utf8 => 1});
Log::Log4perl->get_logger('WTSI.NPG.iRODS')->level($OFF);
}

$collection or
pod2usage(-msg => 'A --collection argument is required',
-exitval => 2);

my $auditor = WTSI::NPG::HTS::ONT::GridIONRunAuditor->new
(dest_collection => $collection);

my ($num_files, $num_published, $num_errors) = $auditor->check_all_files;

my $msg = sprintf q[Checked %d file published to '%s' with %d errors],
$num_files, $num_published, $auditor->run_collection, $num_errors;

my $log = Log::Log4perl->get_logger('main');
$log->level($ALL);

if ($num_errors == 0) {
$log->info($msg);
}
else {
$log->logcroak($msg);
}

__END__
=head1 NAME
npg_audit_gridion_tar
=head1 SYNOPSIS
npg_audit_gridion_tar --collection <path> [--debug]
[--logconf <path>] [--verbose]
Options:
--collection The root collection in iRODS for GridION data. e.g.
'/seq/ont/gridion'.
--debug Enable debug level logging. Optional, defaults to
false.
--help Display help.
--logconf A log4perl configuration file. Optional.
--verbose Print messages while processing. Optional.
=head1 DESCRIPTION
Checks that the tar manifests and tar files of a single GridION run
(the results of a single flowcell) are in iRODS by comparing the contents
of the manifest with the contents of the tar files in iRODS collection into
which the data were published.
The following are checked:
- Some tar manifest files are in iRODS.
- The tar files described in the manifest(s) and in iRODS.
- The complement of tarred files and their checksums correspond to those
described in manifest(s).
If all files are correct this script exits with success, otherwise it
exits with an error.
=head1 AUTHOR
Keith James <kdj@sanger.ac.uk>
=head1 COPYRIGHT AND DISCLAIMER
Copyright (C) 2018 Genome Research Limited. All Rights Reserved.
This program is free software: you can redistribute it and/or modify
it under the terms of the Perl Artistic License or the GNU General
Public License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
=cut
43 changes: 31 additions & 12 deletions bin/npg_gridion_run_monitor.pl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@
my $debug;
my $log4perl_config;
my $output_dir;
my $poll_interval = 60;
my $quiet_interval = 60 * 60 * 24;
my $session_timeout = 60 * 20;
my $single_server;
my $staging_dir;
my $tmpdir = '/tmp';
my $verbose;
Expand All @@ -39,7 +42,10 @@
},
'logconf=s' => \$log4perl_config,
'output-dir|output_dir=s' => \$output_dir,
'poll-interval|poll_interval=i' => \$poll_interval,
'quiet-interval|quiet_interval=i' => \$quiet_interval,
'session-timeout|session_timeout=s' => \$session_timeout,
'single-server|single_server' => \$single_server,
'staging-dir|staging_dir=s' => \$staging_dir,
'tar_capacity|tar-capacity=i' => \$arch_capacity,
'tar-duration|tar_duration=i' => \$arch_duration,
Expand Down Expand Up @@ -74,7 +80,10 @@
arch_timeout => $arch_timeout,
dest_collection => $collection,
output_dir => $output_dir,
poll_interval => $poll_interval,
quiet_interval => $quiet_interval,
session_timeout => $session_timeout,
single_server => $single_server,
source_dir => $staging_dir,
tmpdir => $tmpdir);

Expand All @@ -85,11 +94,8 @@
my $num_errors = $monitor->start;
my $exit_code = $num_errors == 0 ? 0 : 4;

my $i = 0;
foreach my $dir (@{$monitor->watch_history}) {
$log->info("Watch history [$i]: '$dir'");
$i++;
}
$log->info('In progress: ', pp($monitor->devices_active));
$log->info('Completed: ', pp($monitor->devices_complete));

exit $exit_code;

Expand All @@ -102,8 +108,10 @@ =head1 NAME
=head1 SYNOPSIS
npg_gridion_run_monitor --collection <path> [--debug] [--logconf <path>]
--output-dir <path> --staging-dir <path>
[--tar-capacity <n>] [--tar-timeout <n>] [--tmpdir <path>] [--verbose]
--output-dir <path> [--poll-interval <n>] [--quiet-interval <n>]
[--single-server] --staging-dir <path>
[--tar-capacity <n>] [--tar-timeout <n>]
[--tmpdir <path>] [--verbose]
Options:
--collection The root iRODS collection in which to write data,
Expand All @@ -114,10 +122,21 @@ =head1 SYNOPSIS
--output-dir
--output_dir A writable local directory where log files and
file manifests will be written.
--poll-interval
--poll_interval The number of seconds between polls to the filesystem
to check for new experiment and device directories.
Optional, defauls to 60 seconds.
--quiet-interval
--quiet_interval The number of seconds after a publisher has successfully
completed during which time it will not be restarted
if its device directory remains in the staging directory.
Optional, defaults to 60 * 60 * 24 seconds.
--session-timeout
--session_timeout The number of seconds idle time after which a multi-file
tar session will be closed. Optional, defaults to 60 * 20
seconds.
--single-server
--single_server Connect to only one iRODS server.
--staging-dir
--staging_dir The data staging directory path to watch.
--tar-capacity
Expand All @@ -137,10 +156,10 @@ =head1 SYNOPSIS
=head1 DESCRIPTION
Uses inotify to monitor a staging area for new GridION experiment
result directories. Launches a
WTSI::NPG::HTS::ONT::GridIONRunPublisher for each existing device
directory and for any new device directory created.
Polls a staging area for new GridION experiment result
directories. Launches a WTSI::NPG::HTS::ONT::GridIONRunPublisher for
each existing device directory and for any new device directory
created.
For full documentation see WTSI::NPG::HTS::ONT::GridIONRunMonitor.
Expand All @@ -150,7 +169,7 @@ =head1 AUTHOR
=head1 COPYRIGHT AND DISCLAIMER
Copyright (C) 2017 Genome Research Limited. All Rights Reserved.
Copyright (C) 2017, 2018 Genome Research Limited. All Rights Reserved.
This program is free software: you can redistribute it and/or modify
it under the terms of the Perl Artistic License or the GNU General
Expand Down
100 changes: 100 additions & 0 deletions bin/npg_irods_getstream.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/bin/bash
#
# Copyright (C) 2017 Genome Research Limited. All Rights Reserved.

# This program is free software: you can redistribute it and/or modify
# it under the terms of the Perl Artistic License or the GNU General
# Public License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.

set -eo pipefail

# set -x

usage() {
cat 1>&2 << 'EOF'
This script reads a stream from a data object in iRODS and writes to
STDOUT. The stream is tee'd through md5sum and the result compared to
the expected checksum returned by ichksum. The script will exit with
an error if these checksums do not concur.
Version: $VERSION
Author: Keith James <kdj@sanger.ac.uk>
Usage: $0 [-h] <iRODS path>
Options:
-h Print usage and exit.
EOF
}

trap cleanup EXIT INT TERM

cleanup() {
local exit_code=$?

[ -d "$TMPD" ] && rm -rf "$TMPD"
exit $exit_code
}

make_temp_dir() {
echo $(mktemp -d ${TMPDIR:-/tmp/}$(basename -- "$0").XXXXXXXXXX)
}

# Non-core executables (i.e. exclusing awk, cat etc.)
ICHKSUM=ichksum
MD5SUM=md5sum
TEARS=tears

IRODS_PATH=

while getopts "ht:" option; do
case "$option" in
h)
usage
exit 0
;;
*)
usage
echo "Invalid argument: $option"
exit 1
;;
esac
done

shift $((OPTIND-1))

IRODS_PATH="$1"

if [ -z "$IRODS_PATH" ] ; then
usage
echo -e "\nERROR:\n An iRODS path argument is required"
exit 2
fi

COLLECTION=$(dirname -- "$IRODS_PATH")
DATA_OBJECT=$(basename -- "$IRODS_PATH")
TIMESTAMP=$(date +'%Y:%m:%dT%H:%m:%S')

TMPDIR=/tmp/
TMPD=$(make_temp_dir)
MD5_FILE="$TMPD/$DATA_OBJECT.md5"

IRODS_MD5=$($ICHKSUM "$IRODS_PATH" | awk "/$DATA_OBJECT/ { print \$2 }")

# Send the data from iRODS to md5sum and to STDOUT
$TEARS -r "$IRODS_PATH" | tee >($MD5SUM - | awk '{print $1}' > "$MD5_FILE")

LOCAL_MD5=$(<$MD5_FILE)
if [ "$LOCAL_MD5" != "$IRODS_MD5" ]; then
echo -e "\nERROR: local MD5 '$LOCAL_MD5'" \
"did not match iRODS MD5 '$IRODS_MD5'"
exit 3
fi
Loading

0 comments on commit e66c330

Please sign in to comment.