Skip to content

Commit

Permalink
Add a method to TreePublisher to write json file and expose it in npg…
Browse files Browse the repository at this point in the history
…_tree_publisher script (#368)

* Updated the npg_publish_tree client and publish_tree test to write json file

* Fix for perl critic and dest_collection path

* Added write_json function and test for npg_publish_tree.pl script

* Added write_json method to TreePublisher plus tests

* Added predicate to TreePublisher for mlwh_json attribute
  • Loading branch information
marcomoscasgr authored Apr 26, 2022
1 parent 0b38502 commit fcc4e17
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 2 deletions.
10 changes: 8 additions & 2 deletions bin/npg_publish_tree.pl
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
my $restart_file;
my $source_directory;
my $verbose;
my $mlwh_json_filename;

my @include;
my @exclude;
Expand All @@ -61,6 +62,7 @@
'include=s' => \@include,
'max-errors|max_errors=i' => \$max_errors,
'metadata=s' => \$metadata_file,
'mlwh-json|mlwh_json=s' => \$mlwh_json_filename,
'restart-file|restart_file=s' => \$restart_file,
'source-directory|source_directory=s' => \$source_directory,
'verbose' => \$verbose);
Expand Down Expand Up @@ -157,7 +159,9 @@ sub _read_metadata_file {
if ($max_errors) {
push @init_args, max_errors => $max_errors;
}

if (defined $mlwh_json_filename) {
push @init_args, mlwh_json => $mlwh_json_filename;
}
my $coll = WTSI::NPG::iRODS::Collection->new($irods, $dest_collection);
my $publisher = WTSI::NPG::HTS::TreePublisher->new(@init_args);

Expand Down Expand Up @@ -273,7 +277,9 @@ =head1 SYNOPSIS
E.g. [{"attribute": "attr1", "value": "val1"},
{"attribute": "attr2", "value": "val2"}]
--mlwh-json
--mlwh_json Write information about the root collection to json file.
Optional.
--restart-file
--restart_file A file path where a record of successfully published
files will be recorded in JSON format on exit. If the
Expand Down
43 changes: 43 additions & 0 deletions lib/WTSI/NPG/HTS/TreePublisher.pm
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@ use WTSI::DNAP::Utilities::Params qw[function_params];
use WTSI::NPG::HTS::BatchPublisher;
use WTSI::NPG::HTS::PublishState;

use JSON;
use Readonly;

with qw[
WTSI::DNAP::Utilities::Loggable
WTSI::NPG::HTS::RunPublisher
WTSI::NPG::HTS::PathLister
];

our $VERSION = '';
Readonly::Scalar my $JSON_FILE_VERSION => '1.1';

has 'obj_factory' =>
(does => 'WTSI::NPG::HTS::DataObjectFactory',
Expand Down Expand Up @@ -60,6 +64,14 @@ has 'require_checksum_cache' =>
documentation => 'A list of file suffixes for which MD5 cache files ' .
'must be provided and will not be created on the fly');

has 'mlwh_json' =>
(isa => 'Str',
is => 'ro',
required => 0,
predicate => 'has_mlwh_json',
documentation => 'The json file to which information about the irods collection ' .
'folder will be added. Cannot be used with mlwh_json_cb defined.');

=head2 publish_tree
Arg [1] : File batch, ArrayRef[Str].
Expand All @@ -80,6 +92,10 @@ has 'require_checksum_cache' =>
Function returning true for each file path to be published.
CodeRef. Optional.
mlwh_json_cb
Callback writing information about data objects to a JSON file.
CodeRef. Optional. Cannot be used with the mlwh_json attribute set.
Example : my ($num_files, $num_processed, $num_errors) =
$pub->publish_tree($files,
primary_cb => sub { ... },
Expand All @@ -103,9 +119,32 @@ has 'require_checksum_cache' =>
my @named = qw[primary_cb secondary_cb extra_cb filter mlwh_json_cb];
my $params = function_params($positional, @named);

sub write_json {
my ($self) = @_;
my ($json_fh, $json_hash);
open $json_fh, '>:encoding(UTF-8)', $self->mlwh_json or
self->logconfess(q[could not open ml warehouse json file] .
qq[$self->mlwh_json]);
$json_hash = {
version => $JSON_FILE_VERSION,
irods_collection => $self->dest_collection
};
print $json_fh encode_json($json_hash) or
self->logconfess(q[could not write to ml warehouse json file ] .
qq[$self->mlwh_json]);

close $json_fh or
self->logconfess(q[could not close ml warehouse json file] .
qq[$self->mlwh_json]);
return 1;
}

sub publish_tree {
my ($self, $files) = $params->parse(@_);

if ($self->has_mlwh_json && defined $params->mlwh_json_cb) {
$self->logconfess('The mlwh_json_cb cannot be defined with the mlwh_json attribute set');
}
if (defined $params->filter) {
ref $params->filter eq 'CODE' or
$self->logconfess('The filter argument must be a CodeRef');
Expand Down Expand Up @@ -171,6 +210,10 @@ has 'require_checksum_cache' =>
$num_errors += $ne;
}

if ($self->has_mlwh_json) {
$self->write_json();
}

return ($num_files, $num_processed, $num_errors);
}
}
Expand Down
79 changes: 79 additions & 0 deletions t/lib/WTSI/NPG/HTS/TreePublisherTest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,21 @@ use File::Basename;
use File::Spec::Functions qw[abs2rel catfile];
use Log::Log4perl;
use Test::More;
use Test::Exception;

use base qw[WTSI::NPG::HTS::Test];

use WTSI::NPG::HTS::TreePublisher;
use WTSI::NPG::iRODS;

use JSON;

Log::Log4perl::init('./etc/log4perl_tests.conf');

my $pid = $PID;
my $test_counter = 0;
my $data_path = 't/data';
my $bin_path = 'bin';

my $irods_tmp_coll;

Expand Down Expand Up @@ -103,6 +107,81 @@ sub publish_tree : Test(58) {
check_metadata($irods, map { catfile($irods_tmp_coll, $_) } @observed_paths);
}

sub npg_publish_tree_pl_writes_json : Test(2) {
my $source_path = "${data_path}/treepublisher";
my $mlwh_json_filename = "metadata.json";

my @script_args = (q[--mlwh_json], ${mlwh_json_filename}, q[--collection], ${irods_tmp_coll}, q[--source_directory], ${source_path});
ok(system($^X, "${bin_path}/npg_publish_tree.pl", @script_args) == 0, 'Script npg_publish_tree.pl correctly exited');

ok(-e $mlwh_json_filename, 'File json in npg_publish_tree_script correctly created');
unlink $mlwh_json_filename;
}

sub write_json_correct_keyvalue : Test(2) {
my $source_path = "${data_path}/treepublisher";
my $mlwh_json_filename = "metadata.json";

my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
my $pub = WTSI::NPG::HTS::TreePublisher->new
(irods => $irods,
source_directory => $source_path,
dest_collection => $irods_tmp_coll,
mlwh_json => $mlwh_json_filename);
my @files = grep { -f } $pub->list_directory($source_path, recurse => 1);

$pub->publish_tree(\@files);
ok(-e $mlwh_json_filename, 'File json in write_json correctly created');
my ($json_fh, $json_hash);
open $json_fh, '<:encoding(UTF-8)', $mlwh_json_filename or
self->logcroak(q[could not open ml warehouse json file] .
qq[$mlwh_json_filename]);
$json_hash = decode_json <$json_fh>;
ok($json_hash->{irods_collection} eq ${irods_tmp_coll}, 'Correct irods collection folder in json file');
unlink $mlwh_json_filename;
}

sub publish_tree_mlwh_json : Test(1) {
my $source_path = "${data_path}/treepublisher";
my $mlwh_json_filename = "metadata.json";

my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
my $pub = WTSI::NPG::HTS::TreePublisher->new
(irods => $irods,
source_directory => $source_path,
dest_collection => $irods_tmp_coll,
mlwh_json => $mlwh_json_filename);
my @files = grep { -f } $pub->list_directory($source_path, recurse => 1);

$pub->publish_tree(\@files);
ok(-e $mlwh_json_filename, 'File json correctly created with no callback');
unlink $mlwh_json_filename;
}

sub publish_tree_mlwh_json_plus_cb : Test(2) {
my $source_path = "${data_path}/treepublisher";
my $mlwh_json_filename = "metadata.json";

my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
my $pub = WTSI::NPG::HTS::TreePublisher->new
(irods => $irods,
source_directory => $source_path,
dest_collection => $irods_tmp_coll,
mlwh_json => $mlwh_json_filename);
my @files = grep { -f } $pub->list_directory($source_path, recurse => 1);

dies_ok{
$pub->publish_tree(\@files,
mlwh_json_cb => sub {
return 1;
});
}, 'publish_tree correctly exited with error (json callback clash)';
ok(! -e $mlwh_json_filename, 'No json file as expected (json callback clash)');
}

sub publish_tree_filter : Test(4) {
my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
Expand Down

0 comments on commit fcc4e17

Please sign in to comment.