Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a method to TreePublisher to write json file and expose it in npg_tree_publisher script #368

Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions bin/npg_publish_tree.pl
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
my $restart_file;
my $source_directory;
my $verbose;
my $mlwh_json_filename;

my @include;
my @exclude;
Expand All @@ -61,6 +62,7 @@
'include=s' => \@include,
'max-errors|max_errors=i' => \$max_errors,
'metadata=s' => \$metadata_file,
'mlwh-json|mlwh_json=s' => \$mlwh_json_filename,
'restart-file|restart_file=s' => \$restart_file,
'source-directory|source_directory=s' => \$source_directory,
'verbose' => \$verbose);
Expand Down Expand Up @@ -157,7 +159,9 @@ sub _read_metadata_file {
if ($max_errors) {
push @init_args, max_errors => $max_errors;
}

if (defined $mlwh_json_filename) {
push @init_args, mlwh_json => $mlwh_json_filename;
}
my $coll = WTSI::NPG::iRODS::Collection->new($irods, $dest_collection);
my $publisher = WTSI::NPG::HTS::TreePublisher->new(@init_args);

Expand Down Expand Up @@ -273,7 +277,9 @@ =head1 SYNOPSIS
E.g. [{"attribute": "attr1", "value": "val1"},
{"attribute": "attr2", "value": "val2"}]


--mlwh-json
--mlwh_json Write information about the root collection to json file.
Optional.
--restart-file
--restart_file A file path where a record of successfully published
files will be recorded in JSON format on exit. If the
Expand Down
42 changes: 42 additions & 0 deletions lib/WTSI/NPG/HTS/TreePublisher.pm
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@ use WTSI::DNAP::Utilities::Params qw[function_params];
use WTSI::NPG::HTS::BatchPublisher;
use WTSI::NPG::HTS::PublishState;

use JSON;
use Readonly;

with qw[
WTSI::DNAP::Utilities::Loggable
WTSI::NPG::HTS::RunPublisher
WTSI::NPG::HTS::PathLister
];

our $VERSION = '';
Readonly::Scalar my $JSON_FILE_VERSION => '1.1';

has 'obj_factory' =>
(does => 'WTSI::NPG::HTS::DataObjectFactory',
Expand Down Expand Up @@ -60,6 +64,13 @@ has 'require_checksum_cache' =>
documentation => 'A list of file suffixes for which MD5 cache files ' .
'must be provided and will not be created on the fly');

has 'mlwh_json' =>
kjsanger marked this conversation as resolved.
Show resolved Hide resolved
(isa => 'Str',
is => 'ro',
required => 0,
documentation => 'The json file to which information about the irods collection ' .
'folder will be added. Cannot be used with mlwh_json_cb defined.');

=head2 publish_tree

Arg [1] : File batch, ArrayRef[Str].
Expand All @@ -80,6 +91,10 @@ has 'require_checksum_cache' =>
Function returning true for each file path to be published.
CodeRef. Optional.

mlwh_json_cb
Callback writing information of a collection to JSON file.
kjsanger marked this conversation as resolved.
Show resolved Hide resolved
CodeRef. Optional. Cannot be used with mlwh_json attribute set.
kjsanger marked this conversation as resolved.
Show resolved Hide resolved

Example : my ($num_files, $num_processed, $num_errors) =
$pub->publish_tree($files,
primary_cb => sub { ... },
Expand All @@ -103,9 +118,32 @@ has 'require_checksum_cache' =>
my @named = qw[primary_cb secondary_cb extra_cb filter mlwh_json_cb];
my $params = function_params($positional, @named);

sub write_json {
my ($self) = @_;
my ($json_fh, $json_hash);
open $json_fh, '>:encoding(UTF-8)', $self->mlwh_json or
self->logconfess(q[could not open ml warehouse json file] .
qq[$self->mlwh_json]);
$json_hash = {
version => $JSON_FILE_VERSION,
irods_collection => $self->dest_collection
};
print $json_fh encode_json($json_hash) or
self->logconfess(q[could not write to ml warehouse json file ] .
qq[$self->mlwh_json]);

close $json_fh or
self->logconfess(q[could not close ml warehouse json file] .
qq[$self->mlwh_json]);
return 1;
}

sub publish_tree {
my ($self, $files) = $params->parse(@_);

if (defined $self->mlwh_json && defined $params->mlwh_json_cb) {
kjsanger marked this conversation as resolved.
Show resolved Hide resolved
$self->logconfess('The mlwh_json_cb cannot be defined with mlwh_json variable set');
kjsanger marked this conversation as resolved.
Show resolved Hide resolved
}
if (defined $params->filter) {
ref $params->filter eq 'CODE' or
$self->logconfess('The filter argument must be a CodeRef');
Expand Down Expand Up @@ -171,6 +209,10 @@ has 'require_checksum_cache' =>
$num_errors += $ne;
}

if (defined $self->mlwh_json) {
kjsanger marked this conversation as resolved.
Show resolved Hide resolved
$self->write_json();
}

return ($num_files, $num_processed, $num_errors);
}
}
Expand Down
79 changes: 79 additions & 0 deletions t/lib/WTSI/NPG/HTS/TreePublisherTest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,21 @@ use File::Basename;
use File::Spec::Functions qw[abs2rel catfile];
use Log::Log4perl;
use Test::More;
use Test::Exception;

use base qw[WTSI::NPG::HTS::Test];

use WTSI::NPG::HTS::TreePublisher;
use WTSI::NPG::iRODS;

use JSON;

Log::Log4perl::init('./etc/log4perl_tests.conf');

my $pid = $PID;
my $test_counter = 0;
my $data_path = 't/data';
my $bin_path = 'bin';

my $irods_tmp_coll;

Expand Down Expand Up @@ -103,6 +107,81 @@ sub publish_tree : Test(58) {
check_metadata($irods, map { catfile($irods_tmp_coll, $_) } @observed_paths);
}

sub npg_publish_tree_pl_writes_json : Test(2) {
my $source_path = "${data_path}/treepublisher";
my $mlwh_json_filename = "metadata.json";

my @script_args = (q[--mlwh_json], ${mlwh_json_filename}, q[--collection], ${irods_tmp_coll}, q[--source_directory], ${source_path});
ok(system($^X, "${bin_path}/npg_publish_tree.pl", @script_args) == 0, 'Script npg_publish_tree.pl correctly exited');

ok(-e $mlwh_json_filename, 'File json in npg_publish_tree_script correctly created');
unlink $mlwh_json_filename;
}

sub write_json_correct_keyvalue : Test(2) {
my $source_path = "${data_path}/treepublisher";
my $mlwh_json_filename = "metadata.json";

my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
my $pub = WTSI::NPG::HTS::TreePublisher->new
(irods => $irods,
source_directory => $source_path,
dest_collection => $irods_tmp_coll,
mlwh_json => $mlwh_json_filename);
my @files = grep { -f } $pub->list_directory($source_path, recurse => 1);

$pub->publish_tree(\@files);
ok(-e $mlwh_json_filename, 'File json in write_json correctly created');
my ($json_fh, $json_hash);
open $json_fh, '<:encoding(UTF-8)', $mlwh_json_filename or
self->logcroak(q[could not open ml warehouse json file] .
qq[$mlwh_json_filename]);
$json_hash = decode_json <$json_fh>;
ok($json_hash->{irods_collection} eq ${irods_tmp_coll}, 'Correct irods collection folder in json file');
unlink $mlwh_json_filename;
}

sub publish_tree_mlwh_json : Test(1) {
my $source_path = "${data_path}/treepublisher";
my $mlwh_json_filename = "metadata.json";

my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
my $pub = WTSI::NPG::HTS::TreePublisher->new
(irods => $irods,
source_directory => $source_path,
dest_collection => $irods_tmp_coll,
mlwh_json => $mlwh_json_filename);
my @files = grep { -f } $pub->list_directory($source_path, recurse => 1);

$pub->publish_tree(\@files);
ok(-e $mlwh_json_filename, 'File json correctly created with no callback');
unlink $mlwh_json_filename;
}

sub publish_tree_mlwh_json_plus_cb : Test(2) {
my $source_path = "${data_path}/treepublisher";
my $mlwh_json_filename = "metadata.json";

my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
my $pub = WTSI::NPG::HTS::TreePublisher->new
(irods => $irods,
source_directory => $source_path,
dest_collection => $irods_tmp_coll,
mlwh_json => $mlwh_json_filename);
my @files = grep { -f } $pub->list_directory($source_path, recurse => 1);

dies_ok{
$pub->publish_tree(\@files,
mlwh_json_cb => sub {
return 1;
});
}, 'publish_tree correctly exited with error (json callback clash)';
ok(! -e $mlwh_json_filename, 'No json file as expected (json callback clash)');
}

sub publish_tree_filter : Test(4) {
my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
Expand Down