diff --git a/bin/npg_publish_tree.pl b/bin/npg_publish_tree.pl index eb892791..3bd8808c 100755 --- a/bin/npg_publish_tree.pl +++ b/bin/npg_publish_tree.pl @@ -45,6 +45,7 @@ my $restart_file; my $source_directory; my $verbose; +my $mlwh_json_filename; my @include; my @exclude; @@ -61,6 +62,7 @@ 'include=s' => \@include, 'max-errors|max_errors=i' => \$max_errors, 'metadata=s' => \$metadata_file, + 'mlwh-json|mlwh_json=s' => \$mlwh_json_filename, 'restart-file|restart_file=s' => \$restart_file, 'source-directory|source_directory=s' => \$source_directory, 'verbose' => \$verbose); @@ -157,7 +159,9 @@ sub _read_metadata_file { if ($max_errors) { push @init_args, max_errors => $max_errors; } - +if (defined $mlwh_json_filename) { + push @init_args, mlwh_json => $mlwh_json_filename; +} my $coll = WTSI::NPG::iRODS::Collection->new($irods, $dest_collection); my $publisher = WTSI::NPG::HTS::TreePublisher->new(@init_args); @@ -273,7 +277,9 @@ =head1 SYNOPSIS E.g. [{"attribute": "attr1", "value": "val1"}, {"attribute": "attr2", "value": "val2"}] - + --mlwh-json + --mlwh_json Write information about the root collection to json file. + Optional. --restart-file --restart_file A file path where a record of successfully published files will be recorded in JSON format on exit. If the diff --git a/lib/WTSI/NPG/HTS/TreePublisher.pm b/lib/WTSI/NPG/HTS/TreePublisher.pm index ec35d14d..581d7e39 100644 --- a/lib/WTSI/NPG/HTS/TreePublisher.pm +++ b/lib/WTSI/NPG/HTS/TreePublisher.pm @@ -11,6 +11,9 @@ use WTSI::DNAP::Utilities::Params qw[function_params]; use WTSI::NPG::HTS::BatchPublisher; use WTSI::NPG::HTS::PublishState; +use JSON; +use Readonly; + with qw[ WTSI::DNAP::Utilities::Loggable WTSI::NPG::HTS::RunPublisher @@ -18,6 +21,7 @@ with qw[ ]; our $VERSION = ''; +Readonly::Scalar my $JSON_FILE_VERSION => '1.1'; has 'obj_factory' => (does => 'WTSI::NPG::HTS::DataObjectFactory', @@ -60,6 +64,14 @@ has 'require_checksum_cache' => documentation => 'A list of file suffixes for which MD5 cache files ' . 'must be provided and will not be created on the fly'); +has 'mlwh_json' => + (isa => 'Str', + is => 'ro', + required => 0, + predicate => 'has_mlwh_json', + documentation => 'The json file to which information about the irods collection ' . + 'folder will be added. Cannot be used with mlwh_json_cb defined.'); + =head2 publish_tree Arg [1] : File batch, ArrayRef[Str]. @@ -80,6 +92,10 @@ has 'require_checksum_cache' => Function returning true for each file path to be published. CodeRef. Optional. + mlwh_json_cb + Callback writing information about data objects to a JSON file. + CodeRef. Optional. Cannot be used with the mlwh_json attribute set. + Example : my ($num_files, $num_processed, $num_errors) = $pub->publish_tree($files, primary_cb => sub { ... }, @@ -103,9 +119,32 @@ has 'require_checksum_cache' => my @named = qw[primary_cb secondary_cb extra_cb filter mlwh_json_cb]; my $params = function_params($positional, @named); + sub write_json { + my ($self) = @_; + my ($json_fh, $json_hash); + open $json_fh, '>:encoding(UTF-8)', $self->mlwh_json or + self->logconfess(q[could not open ml warehouse json file] . + qq[$self->mlwh_json]); + $json_hash = { + version => $JSON_FILE_VERSION, + irods_collection => $self->dest_collection + }; + print $json_fh encode_json($json_hash) or + self->logconfess(q[could not write to ml warehouse json file ] . + qq[$self->mlwh_json]); + + close $json_fh or + self->logconfess(q[could not close ml warehouse json file] . + qq[$self->mlwh_json]); + return 1; + } + sub publish_tree { my ($self, $files) = $params->parse(@_); + if ($self->has_mlwh_json && defined $params->mlwh_json_cb) { + $self->logconfess('The mlwh_json_cb cannot be defined with the mlwh_json attribute set'); + } if (defined $params->filter) { ref $params->filter eq 'CODE' or $self->logconfess('The filter argument must be a CodeRef'); @@ -171,6 +210,10 @@ has 'require_checksum_cache' => $num_errors += $ne; } + if ($self->has_mlwh_json) { + $self->write_json(); + } + return ($num_files, $num_processed, $num_errors); } } diff --git a/t/lib/WTSI/NPG/HTS/TreePublisherTest.pm b/t/lib/WTSI/NPG/HTS/TreePublisherTest.pm index 141085db..c75ea90a 100644 --- a/t/lib/WTSI/NPG/HTS/TreePublisherTest.pm +++ b/t/lib/WTSI/NPG/HTS/TreePublisherTest.pm @@ -8,17 +8,21 @@ use File::Basename; use File::Spec::Functions qw[abs2rel catfile]; use Log::Log4perl; use Test::More; +use Test::Exception; use base qw[WTSI::NPG::HTS::Test]; use WTSI::NPG::HTS::TreePublisher; use WTSI::NPG::iRODS; +use JSON; + Log::Log4perl::init('./etc/log4perl_tests.conf'); my $pid = $PID; my $test_counter = 0; my $data_path = 't/data'; +my $bin_path = 'bin'; my $irods_tmp_coll; @@ -103,6 +107,81 @@ sub publish_tree : Test(58) { check_metadata($irods, map { catfile($irods_tmp_coll, $_) } @observed_paths); } +sub npg_publish_tree_pl_writes_json : Test(2) { + my $source_path = "${data_path}/treepublisher"; + my $mlwh_json_filename = "metadata.json"; + + my @script_args = (q[--mlwh_json], ${mlwh_json_filename}, q[--collection], ${irods_tmp_coll}, q[--source_directory], ${source_path}); + ok(system($^X, "${bin_path}/npg_publish_tree.pl", @script_args) == 0, 'Script npg_publish_tree.pl correctly exited'); + + ok(-e $mlwh_json_filename, 'File json in npg_publish_tree_script correctly created'); + unlink $mlwh_json_filename; +} + +sub write_json_correct_keyvalue : Test(2) { + my $source_path = "${data_path}/treepublisher"; + my $mlwh_json_filename = "metadata.json"; + + my $irods = WTSI::NPG::iRODS->new(environment => \%ENV, + strict_baton_version => 0); + my $pub = WTSI::NPG::HTS::TreePublisher->new + (irods => $irods, + source_directory => $source_path, + dest_collection => $irods_tmp_coll, + mlwh_json => $mlwh_json_filename); + my @files = grep { -f } $pub->list_directory($source_path, recurse => 1); + + $pub->publish_tree(\@files); + ok(-e $mlwh_json_filename, 'File json in write_json correctly created'); + my ($json_fh, $json_hash); + open $json_fh, '<:encoding(UTF-8)', $mlwh_json_filename or + self->logcroak(q[could not open ml warehouse json file] . + qq[$mlwh_json_filename]); + $json_hash = decode_json <$json_fh>; + ok($json_hash->{irods_collection} eq ${irods_tmp_coll}, 'Correct irods collection folder in json file'); + unlink $mlwh_json_filename; +} + +sub publish_tree_mlwh_json : Test(1) { + my $source_path = "${data_path}/treepublisher"; + my $mlwh_json_filename = "metadata.json"; + + my $irods = WTSI::NPG::iRODS->new(environment => \%ENV, + strict_baton_version => 0); + my $pub = WTSI::NPG::HTS::TreePublisher->new + (irods => $irods, + source_directory => $source_path, + dest_collection => $irods_tmp_coll, + mlwh_json => $mlwh_json_filename); + my @files = grep { -f } $pub->list_directory($source_path, recurse => 1); + + $pub->publish_tree(\@files); + ok(-e $mlwh_json_filename, 'File json correctly created with no callback'); + unlink $mlwh_json_filename; +} + +sub publish_tree_mlwh_json_plus_cb : Test(2) { + my $source_path = "${data_path}/treepublisher"; + my $mlwh_json_filename = "metadata.json"; + + my $irods = WTSI::NPG::iRODS->new(environment => \%ENV, + strict_baton_version => 0); + my $pub = WTSI::NPG::HTS::TreePublisher->new + (irods => $irods, + source_directory => $source_path, + dest_collection => $irods_tmp_coll, + mlwh_json => $mlwh_json_filename); + my @files = grep { -f } $pub->list_directory($source_path, recurse => 1); + + dies_ok{ + $pub->publish_tree(\@files, + mlwh_json_cb => sub { + return 1; + }); + }, 'publish_tree correctly exited with error (json callback clash)'; + ok(! -e $mlwh_json_filename, 'No json file as expected (json callback clash)'); +} + sub publish_tree_filter : Test(4) { my $irods = WTSI::NPG::iRODS->new(environment => \%ENV, strict_baton_version => 0);