Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated the npg_publish_tree client and publish_tree test to write json file #364

Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Added write_json method to TreePublisher plus tests
  • Loading branch information
marcomoscasgr committed Apr 14, 2022
commit 673b5d8f9cc7e9228f4cbb74b048866cd2a9021a
48 changes: 9 additions & 39 deletions bin/npg_publish_tree.pl
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@
use WTSI::NPG::iRODS::Collection;
use WTSI::NPG::HTS::TreePublisher;

use Readonly;
use JSON;

our $VERSION = '';

my $log_config = << 'LOGCONF'
Expand Down Expand Up @@ -65,10 +62,10 @@
'include=s' => \@include,
'max-errors|max_errors=i' => \$max_errors,
'metadata=s' => \$metadata_file,
'mlwh-json|mlwh_json=s' => \$mlwh_json_filename,
'restart-file|restart_file=s' => \$restart_file,
'source-directory|source_directory=s' => \$source_directory,
'verbose' => \$verbose,
'mlwh_json=s' => \$mlwh_json_filename);
'verbose' => \$verbose);

if ($verbose and not $debug) {
Log::Log4perl::init(\$log_config);
Expand Down Expand Up @@ -162,7 +159,9 @@ sub _read_metadata_file {
if ($max_errors) {
push @init_args, max_errors => $max_errors;
}

if (defined $mlwh_json_filename) {
push @init_args, mlwh_json => $mlwh_json_filename;
}
my $coll = WTSI::NPG::iRODS::Collection->new($irods, $dest_collection);
my $publisher = WTSI::NPG::HTS::TreePublisher->new(@init_args);

Expand All @@ -187,34 +186,6 @@ sub handler {

my @files = grep { -f } $publisher->list_directory($source_directory,
recurse => 1);

sub write_json {
my ($json_filename, $irods_collection) = @_;
if (defined $json_filename && (not $json_filename =~ qr/^\s*$/sxm)
&& defined $irods_collection && (not $irods_collection =~ qr/^\s*$/sxm)) {
Readonly::Scalar my $JSON_FILE_VERSION => '1.0';

my ($json_fh, $json_hash);
open $json_fh, '>:encoding(UTF-8)', $json_filename or
self->logcroak(q[could not open ml warehouse json file] .
qq[$json_filename]);
$json_hash = {
version => $JSON_FILE_VERSION,
irods_collection => $irods_collection
};
print $json_fh encode_json($json_hash) or
self->logcroak(q[could not write to ml warehouse json file ] .
qq[$json_filename]);

close $json_fh or
self->logcroak(q[could not close ml warehouse json file] .
qq[$json_filename]);
} else {
self->logcroak(q[Wrong parameters in write_json]);
}
return 1;
}

my @publish_args = (\@files,
secondary_cb => sub {
my ($obj) = @_;
Expand All @@ -234,9 +205,6 @@ sub write_json {
my ($num_files, $num_published, $num_errors) =
$publisher->publish_tree(@publish_args);

if (defined $mlwh_json_filename) {
write_json($mlwh_json_filename, $publisher->dest_collection);
}

# Set any permissions requested
if (@groups) {
Expand Down Expand Up @@ -309,7 +277,9 @@ =head1 SYNOPSIS
E.g. [{"attribute": "attr1", "value": "val1"},
{"attribute": "attr2", "value": "val2"}]


--mlwh-json
--mlwh_json Write information about the root collection to json file.
Optional.
--restart-file
--restart_file A file path where a record of successfully published
files will be recorded in JSON format on exit. If the
Expand All @@ -319,7 +289,7 @@ =head1 SYNOPSIS
--source-directory
--source_directory The local path to load.
--verbose Print messages while processing. Optional.
--mlwh_json Write information about the collection to json file. Optional.

=head1 DESCRIPTION

Publish an arbitrary directory hierarchy to iRODS, set permissions and
Expand Down
42 changes: 40 additions & 2 deletions lib/WTSI/NPG/HTS/TreePublisher.pm
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@ use WTSI::DNAP::Utilities::Params qw[function_params];
use WTSI::NPG::HTS::BatchPublisher;
use WTSI::NPG::HTS::PublishState;

use JSON;
use Readonly;

with qw[
WTSI::DNAP::Utilities::Loggable
WTSI::NPG::HTS::RunPublisher
WTSI::NPG::HTS::PathLister
];

our $VERSION = '';
Readonly::Scalar my $JSON_FILE_VERSION => '1.1';

has 'obj_factory' =>
(does => 'WTSI::NPG::HTS::DataObjectFactory',
Expand Down Expand Up @@ -60,6 +64,13 @@ has 'require_checksum_cache' =>
documentation => 'A list of file suffixes for which MD5 cache files ' .
'must be provided and will not be created on the fly');

has 'mlwh_json' =>
(isa => 'Str',
is => 'ro',
required => 0,
documentation => 'The json file to which information about the irods collection ' .
'folder will be added. Cannot be used with mlwh_json_cb defined.');

=head2 publish_tree

Arg [1] : File batch, ArrayRef[Str].
Expand All @@ -81,8 +92,8 @@ has 'require_checksum_cache' =>
CodeRef. Optional.

mlwh_json_cb
Callback writing metadata of all objects to a JSON file.
CodeRef. Optional.
Callback writing information of a collection to JSON file.
CodeRef. Optional. Cannot be used with mlwh_json attribute set.

Example : my ($num_files, $num_processed, $num_errors) =
$pub->publish_tree($files,
Expand All @@ -107,9 +118,32 @@ has 'require_checksum_cache' =>
my @named = qw[primary_cb secondary_cb extra_cb filter mlwh_json_cb];
my $params = function_params($positional, @named);

sub write_json {
my ($self) = @_;
my ($json_fh, $json_hash);
open $json_fh, '>:encoding(UTF-8)', $self->mlwh_json or
self->logconfess(q[could not open ml warehouse json file] .
qq[$self->mlwh_json]);
$json_hash = {
version => $JSON_FILE_VERSION,
irods_collection => $self->dest_collection
};
print $json_fh encode_json($json_hash) or
self->logconfess(q[could not write to ml warehouse json file ] .
qq[$self->mlwh_json]);

close $json_fh or
self->logconfess(q[could not close ml warehouse json file] .
qq[$self->mlwh_json]);
return 1;
}

sub publish_tree {
my ($self, $files) = $params->parse(@_);

if (defined $self->mlwh_json && defined $params->mlwh_json_cb) {
$self->logconfess('The mlwh_json_cb cannot be defined with mlwh_json variable set');
}
if (defined $params->filter) {
ref $params->filter eq 'CODE' or
$self->logconfess('The filter argument must be a CodeRef');
Expand Down Expand Up @@ -175,6 +209,10 @@ has 'require_checksum_cache' =>
$num_errors += $ne;
}

if (defined $self->mlwh_json) {
$self->write_json();
}

return ($num_files, $num_processed, $num_errors);
}
}
Expand Down
105 changes: 64 additions & 41 deletions t/lib/WTSI/NPG/HTS/TreePublisherTest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,14 @@ use File::Basename;
use File::Spec::Functions qw[abs2rel catfile];
use Log::Log4perl;
use Test::More;
use Test::Exception;

use base qw[WTSI::NPG::HTS::Test];

use WTSI::NPG::HTS::TreePublisher;
use WTSI::NPG::iRODS;

use JSON;
use Readonly;
use IPC::System::Simple qw(system);

Log::Log4perl::init('./etc/log4perl_tests.conf');

Expand All @@ -27,33 +26,6 @@ my $bin_path = 'bin';

my $irods_tmp_coll;

sub write_json {
my ($json_filename, $irods_collection) = @_;
if (defined $json_filename && (not $json_filename =~ qr/^\s*$/sxm)
&& defined $irods_collection && (not $irods_collection =~ qr/^\s*$/sxm)) {
Readonly::Scalar my $JSON_FILE_VERSION => '1.0';

my ($json_fh, $json_hash);
open $json_fh, '>:encoding(UTF-8)', $json_filename or
self->logcroak(q[could not open ml warehouse json file] .
qq[$json_filename]);
$json_hash = {
version => $JSON_FILE_VERSION,
irods_collection => $irods_collection
};
print $json_fh encode_json($json_hash) or
self->logcroak(q[could not write to ml warehouse json file ] .
qq[$json_filename]);

close $json_fh or
self->logcroak(q[could not close ml warehouse json file] .
qq[$json_filename]);
} else {
self->logcroak(q[Wrong parameters in write_json]);
}
return 1;
}

sub setup_test : Test(setup) {
my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
Expand All @@ -69,7 +41,7 @@ sub teardown_test : Test(teardown) {
$irods->remove_collection($irods_tmp_coll);
}

sub publish_tree : Test(59) {
sub publish_tree : Test(58) {
my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
my $source_path = "$data_path/treepublisher";
Expand Down Expand Up @@ -100,9 +72,6 @@ sub publish_tree : Test(59) {
secondary_cb => $secondary_avus,
extra_cb => $extra_avus);

my $mlwh_json_filename = qq[metadata.json];
write_json($mlwh_json_filename, $pub->dest_collection);

my $num_expected = scalar @files;
cmp_ok($num_errors, '==', 0, 'No errors on publishing');
cmp_ok($num_files, '==', $num_expected,
Expand Down Expand Up @@ -136,29 +105,83 @@ sub publish_tree : Test(59) {
diag explain \@observed_paths;

check_metadata($irods, map { catfile($irods_tmp_coll, $_) } @observed_paths);
}

ok(-e $mlwh_json_filename, "File json in public_tree correctly created");
sub npg_publish_tree_pl_writes_json : Test(2) {
my $source_path = "${data_path}/treepublisher";
my $mlwh_json_filename = "metadata.json";

my @script_args = (q[--mlwh_json], ${mlwh_json_filename}, q[--collection], ${irods_tmp_coll}, q[--source_directory], ${source_path});
ok(system($^X, "${bin_path}/npg_publish_tree.pl", @script_args) == 0, 'Script npg_publish_tree.pl correctly exited');

ok(-e $mlwh_json_filename, 'File json in npg_publish_tree_script correctly created');
unlink $mlwh_json_filename;
}

sub npg_publish_tree_script : Test(3) {
sub write_json_correct_keyvalue : Test(2) {
my $source_path = "${data_path}/treepublisher";
my $mlwh_json_filename = "metadata.json";

my @script_args = (qq[--mlwh_json], ${mlwh_json_filename}, qq[--collection], ${irods_tmp_coll}, qq[--source_directory], ${source_path});
my $result = system($^X, "${bin_path}/npg_publish_tree.pl", @script_args);
ok($result == 0, 'Script npg_publish_tree.pl correctly exited');

ok(-e $mlwh_json_filename, "File json in npg_publish_tree_script correctly created");
my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
my $pub = WTSI::NPG::HTS::TreePublisher->new
(irods => $irods,
source_directory => $source_path,
dest_collection => $irods_tmp_coll,
mlwh_json => $mlwh_json_filename);
my @files = grep { -f } $pub->list_directory($source_path, recurse => 1);

$pub->publish_tree(\@files);
ok(-e $mlwh_json_filename, 'File json in write_json correctly created');
my ($json_fh, $json_hash);
open $json_fh, '<:encoding(UTF-8)', $mlwh_json_filename or
self->logcroak(q[could not open ml warehouse json file] .
qq[$mlwh_json_filename]);
$json_hash = decode_json <$json_fh>;
ok($json_hash->{irods_collection} eq ${irods_tmp_coll}, 'Irods collection folder correct');
ok($json_hash->{irods_collection} eq ${irods_tmp_coll}, 'Correct irods collection folder in json file');
unlink $mlwh_json_filename;
}

sub publish_tree_mlwh_json : Test(1) {
my $source_path = "${data_path}/treepublisher";
my $mlwh_json_filename = "metadata.json";

my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
my $pub = WTSI::NPG::HTS::TreePublisher->new
(irods => $irods,
source_directory => $source_path,
dest_collection => $irods_tmp_coll,
mlwh_json => $mlwh_json_filename);
my @files = grep { -f } $pub->list_directory($source_path, recurse => 1);

$pub->publish_tree(\@files);
ok(-e $mlwh_json_filename, 'File json correctly created with no callback');
unlink $mlwh_json_filename;
}

sub publish_tree_mlwh_json_plus_cb : Test(2) {
my $source_path = "${data_path}/treepublisher";
my $mlwh_json_filename = "metadata.json";

my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
my $pub = WTSI::NPG::HTS::TreePublisher->new
(irods => $irods,
source_directory => $source_path,
dest_collection => $irods_tmp_coll,
mlwh_json => $mlwh_json_filename);
my @files = grep { -f } $pub->list_directory($source_path, recurse => 1);

dies_ok{
$pub->publish_tree(\@files,
mlwh_json_cb => sub {
return 1;
});
}, 'publish_tree correctly exited with error (json callback clash)';
ok(! -e $mlwh_json_filename, 'No json file as expected (json callback clash)');
}

sub publish_tree_filter : Test(4) {
my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
Expand Down