diff --git a/config/ena_data_submission.conf b/config/ena_data_submission.conf index 67cb550..b020991 100644 --- a/config/ena_data_submission.conf +++ b/config/ena_data_submission.conf @@ -1,7 +1,7 @@ { 'webin_user' => 'username', 'webin_pass' => 'password', - 'ena_base_path' => 'http://www.ebi.ac.uk/ena/data/view/', + 'ena_base_path' => 'http://www.ebi.ac.uk/ena/browser/api/xml/', 'pubmed_url_base' => 'http://www.ncbi.nlm.nih.gov/pubmed/?term=', 'taxon_lookup_service' => 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&report=xml&id=', 'output_root' => '/nfs/pathogen/ena_updates/', diff --git a/lib/Bio/ENA/DataSubmission/AccessionConverter.pm b/lib/Bio/ENA/DataSubmission/AccessionConverter.pm index 9b2a278..4ee8917 100644 --- a/lib/Bio/ENA/DataSubmission/AccessionConverter.pm +++ b/lib/Bio/ENA/DataSubmission/AccessionConverter.pm @@ -27,7 +27,7 @@ sub convert_secondary_project_accession_to_primary { my ($self, $accession) = @_; if (defined($accession) && $accession =~ /ERP/) { - my $xml = Bio::ENA::DataSubmission::XML->new(url => $self->ena_base_path . "$accession&display=xml", ena_base_path => $self->ena_base_path)->parse_from_url; + my $xml = Bio::ENA::DataSubmission::XML->new(url => $self->ena_base_path . "$accession", ena_base_path => $self->ena_base_path)->parse_from_url; if (defined($xml) && defined($xml->{STUDY}) && defined($xml->{STUDY}->[0]) && @@ -44,7 +44,7 @@ sub convert_secondary_project_accession_to_primary { sub convert_secondary_sample_accession_to_biosample { my ($self, $accession) = @_; if (defined($accession) && $accession =~ /ERS/) { - my $xml = Bio::ENA::DataSubmission::XML->new(url => $self->ena_base_path . "$accession&display=xml", ena_base_path => $self->ena_base_path)->parse_from_url; + my $xml = Bio::ENA::DataSubmission::XML->new(url => $self->ena_base_path . "$accession", ena_base_path => $self->ena_base_path)->parse_from_url; if (defined($xml) && defined($xml->{SAMPLE}) && defined($xml->{SAMPLE}->[0]) && @@ -62,4 +62,4 @@ sub convert_secondary_sample_accession_to_biosample { __PACKAGE__->meta->make_immutable; no Moose; -1; \ No newline at end of file +1; diff --git a/lib/Bio/ENA/DataSubmission/CommandLine/CompareMetadata.pm b/lib/Bio/ENA/DataSubmission/CommandLine/CompareMetadata.pm index ada95c8..0ee4ce7 100644 --- a/lib/Bio/ENA/DataSubmission/CommandLine/CompareMetadata.pm +++ b/lib/Bio/ENA/DataSubmission/CommandLine/CompareMetadata.pm @@ -9,8 +9,8 @@ Bio::ENA::DataSubmission::CommandLine::CompareMetadata =head1 SYNOPSIS use Bio::ENA::DataSubmission::CommandLine::CompareMetadata; - - 1. pull XML from ENA using http://www.ebi.ac.uk/ena/data/view/ERS*****&display=xml + + 1. pull XML from ENA using http://www.ebi.ac.uk/ena/browser/api/xml/ERS***** 2. parse to data structure 3. parse manifest to same data structure 4. compare data structures @@ -65,7 +65,7 @@ sub BUILD { $self->manifest($file) if ( defined $file ); $self->outfile($outfile) if ( defined $outfile ); $self->help($help) if ( defined $help ); - + $self->config_file($config_file) if ( defined $config_file ); ( -e $self->config_file ) or Bio::ENA::DataSubmission::Exception::FileNotFound->throw( error => "Cannot find config file\n" ); $self->_populate_attributes_from_config_file; @@ -81,7 +81,7 @@ sub _populate_attributes_from_config_file } sub check_inputs{ - my $self = shift; + my $self = shift; return( $self->manifest && !$self->help @@ -155,7 +155,7 @@ sub _report{ unshift(@data, ['Total Conflicts', $#data]); } else{ - unshift(@data, ['Total Conflicts', 0]); + unshift(@data, ['Total Conflicts', 0]); } my $xls = Bio::ENA::DataSubmission::Spreadsheet->new( data => \@data, outfile => $outfile ); $xls->write_xls; @@ -174,4 +174,4 @@ USAGE __PACKAGE__->meta->make_immutable; no Moose; -1; \ No newline at end of file +1; diff --git a/lib/Bio/ENA/DataSubmission/CommandLine/ValidateAnalysisManifest.pm b/lib/Bio/ENA/DataSubmission/CommandLine/ValidateAnalysisManifest.pm index 5460fe0..12c92c4 100644 --- a/lib/Bio/ENA/DataSubmission/CommandLine/ValidateAnalysisManifest.pm +++ b/lib/Bio/ENA/DataSubmission/CommandLine/ValidateAnalysisManifest.pm @@ -9,7 +9,7 @@ Bio::ENA::DataSubmission::CommandLine::ValidateManifest =head1 SYNOPSIS use Bio::ENA::DataSubmission::CommandLine::ValidateManifest; - + =head1 METHODS @@ -53,7 +53,7 @@ has 'outfile' => ( is => 'rw', isa => 'Str', required => 0 ); has 'edit' => ( is => 'rw', isa => 'Bool', required => 0 ); has 'help' => ( is => 'rw', isa => 'Bool', required => 0 ); has '_filetypes' => ( is => 'rw', isa => 'ArrayRef', required => 0, lazy_build => 1 ); -has 'ena_base_path' => ( is => 'rw', isa => 'Str', default => 'http://www.ebi.ac.uk/ena/data/view/'); +has 'ena_base_path' => ( is => 'rw', isa => 'Str', default => 'http://www.ebi.ac.uk/ena/browser/api/xml/'); has 'pubmed_url_base' => ( is => 'rw', isa => 'Str', default => 'http://www.ncbi.nlm.nih.gov/pubmed/?term='); has 'config_file' => ( is => 'rw', isa => 'Str', required => 0, default => $ENV{'ENA_SUBMISSIONS_CONFIG'}); @@ -106,7 +106,7 @@ sub _populate_attributes_from_config_file } sub check_inputs{ - my $self = shift; + my $self = shift; return( $self->file && !$self->help @@ -162,7 +162,7 @@ sub run { for my $c ( 0..$#row ) { my $cell = $row[$c]; if ( defined $cell ){ - my $gen_error = Bio::ENA::DataSubmission::Validator::Error::General->new( + my $gen_error = Bio::ENA::DataSubmission::Validator::Error::General->new( identifier => $name, cell => $cell, field => $header[$c] @@ -175,8 +175,8 @@ sub run { # mandatory cells my $mandatory = [ 0, 1, 2, 3, 4, 6, 7, 9, 10, 11 ]; - my $mandatory_error = Bio::ENA::DataSubmission::Validator::Error::MandatoryCells->new( - row => \@row, + my $mandatory_error = Bio::ENA::DataSubmission::Validator::Error::MandatoryCells->new( + row => \@row, mandatory => $mandatory )->validate; push( @errors_found, $mandatory_error ) if ( $mandatory_error->triggered ); @@ -274,7 +274,7 @@ sub run { push( @errors_found, $pubmed_id_error ) if ( $pubmed_id_error->triggered ); } } - + #--------------# # write report # #--------------# @@ -289,7 +289,7 @@ sub run { # edit/fix where possible # #-------------------------# - + return (scalar(@errors_found) > 0) ? 0 : 1; } diff --git a/lib/Bio/ENA/DataSubmission/CommandLine/ValidateManifest.pm b/lib/Bio/ENA/DataSubmission/CommandLine/ValidateManifest.pm index 996f956..b5daadb 100644 --- a/lib/Bio/ENA/DataSubmission/CommandLine/ValidateManifest.pm +++ b/lib/Bio/ENA/DataSubmission/CommandLine/ValidateManifest.pm @@ -9,7 +9,7 @@ Bio::ENA::DataSubmission::CommandLine::ValidateManifest =head1 SYNOPSIS use Bio::ENA::DataSubmission::CommandLine::ValidateManifest; - + =head1 METHODS @@ -52,7 +52,7 @@ has 'edit' => (is => 'rw', isa => 'Bool', required => 0); has 'help' => (is => 'rw', isa => 'Bool', required => 0); has 'config_file' => (is => 'rw', isa => 'Maybe[Str]', required => 0, default => $ENV{'ENA_SUBMISSIONS_CONFIG'}); -has 'ena_base_path' => (is => 'rw', isa => 'Str', default => 'http://www.ebi.ac.uk/ena/data/view/'); +has 'ena_base_path' => (is => 'rw', isa => 'Str', default => 'http://www.ebi.ac.uk/ena/browser/api/xml/'); has 'taxon_lookup_service' => (is => 'rw', isa => 'Str', default => 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&report=xml&id='); has 'data_root' => (is => 'ro', isa => 'Maybe[Str]', required => 0, default => $ENV{'ENA_SUBMISSIONS_DATA'}); has 'valid_countries_file' => (is => 'ro', isa => 'Str', , lazy => 1, builder => '_build_valid_countries_file'); diff --git a/lib/Bio/ENA/DataSubmission/Validator/Error/ProjectAccession.pm b/lib/Bio/ENA/DataSubmission/Validator/Error/ProjectAccession.pm index 99cd967..57088fc 100644 --- a/lib/Bio/ENA/DataSubmission/Validator/Error/ProjectAccession.pm +++ b/lib/Bio/ENA/DataSubmission/Validator/Error/ProjectAccession.pm @@ -15,7 +15,7 @@ use Bio::ENA::DataSubmission::XML; has 'accession' => ( is => 'ro', isa => 'Str', required => 1 ); has 'identifier' => ( is => 'ro', isa => 'Str', required => 1 ); -has 'ena_base_path' => ( is => 'rw', isa => 'Str', default => 'http://www.ebi.ac.uk/ena/data/view/'); +has 'ena_base_path' => ( is => 'rw', isa => 'Str', default => 'http://www.ebi.ac.uk/ena/browser/api/xml/'); sub validate { my $self = shift; @@ -24,9 +24,9 @@ sub validate { if( $acc =~ m/^ERP/ || $acc =~ m/^SRP/ || $acc =~ m/^PRJ/ ){ # pull XML from ENA and verify that it isn't empty - my $xml = Bio::ENA::DataSubmission::XML->new( url => $self->ena_base_path."$acc&display=xml",ena_base_path => $self->ena_base_path )->parse_from_url; - - $self->set_error_message( $id, "Invalid study accession - could not be found at the ENA" ) if( !(defined $xml->{STUDY} || defined $xml->{PROJECT}) ); + my $xml = Bio::ENA::DataSubmission::XML->new( url => $self->ena_base_path."$acc",ena_base_path => $self->ena_base_path )->parse_from_url; + + $self->set_error_message( $id, "Invalid study accession - could not be found at the ENA" ) if( !(defined $xml->{STUDY} || defined $xml->{PROJECT}) ); } else { $self->set_error_message( $id, "Invalid study accession - must take format ERPxxxxx, SRPxxxxx or PRJxxxx" ); @@ -37,9 +37,9 @@ sub validate { } sub fix_it { - + } no Moose; __PACKAGE__->meta->make_immutable; -1; \ No newline at end of file +1; diff --git a/lib/Bio/ENA/DataSubmission/Validator/Error/RunAccession.pm b/lib/Bio/ENA/DataSubmission/Validator/Error/RunAccession.pm index 74d8600..32f20c2 100644 --- a/lib/Bio/ENA/DataSubmission/Validator/Error/RunAccession.pm +++ b/lib/Bio/ENA/DataSubmission/Validator/Error/RunAccession.pm @@ -15,7 +15,7 @@ use Bio::ENA::DataSubmission::XML; has 'accession' => ( is => 'ro', isa => 'Str', required => 1 ); has 'identifier' => ( is => 'ro', isa => 'Str', required => 1 ); -has 'ena_base_path' => ( is => 'rw', isa => 'Str', default => 'http://www.ebi.ac.uk/ena/data/view/'); +has 'ena_base_path' => ( is => 'rw', isa => 'Str', default => 'http://www.ebi.ac.uk/ena/browser/api/xml/'); sub validate { my $self = shift; @@ -24,8 +24,8 @@ sub validate { if( $acc =~ m/^ERR/ || $acc =~ m/^SRR/){ # pull XML from ENA and verify that it isn't empty - my $xml = Bio::ENA::DataSubmission::XML->new( url => $self->ena_base_path."$acc&display=xml",ena_base_path => $self->ena_base_path )->parse_from_url; - $self->set_error_message( $id, "Invalid run accession - could not be found at the ENA" ) unless ( defined $xml->{RUN} ); + my $xml = Bio::ENA::DataSubmission::XML->new( url => $self->ena_base_path."$acc",ena_base_path => $self->ena_base_path )->parse_from_url; + $self->set_error_message( $id, "Invalid run accession - could not be found at the ENA" ) unless ( defined $xml->{RUN} ); } else { $self->set_error_message( $id, "Invalid run accession - must take format ERRxxxxx or SRRxxxxx" ); @@ -36,9 +36,9 @@ sub validate { } sub fix_it { - + } no Moose; __PACKAGE__->meta->make_immutable; -1; \ No newline at end of file +1; diff --git a/lib/Bio/ENA/DataSubmission/Validator/Error/SampleAccession.pm b/lib/Bio/ENA/DataSubmission/Validator/Error/SampleAccession.pm index 635ca6c..51127ae 100644 --- a/lib/Bio/ENA/DataSubmission/Validator/Error/SampleAccession.pm +++ b/lib/Bio/ENA/DataSubmission/Validator/Error/SampleAccession.pm @@ -15,7 +15,7 @@ use Bio::ENA::DataSubmission::XML; has 'accession' => ( is => 'ro', isa => 'Str', required => 1 ); has 'identifier' => ( is => 'ro', isa => 'Str', required => 1 ); -has 'ena_base_path' => ( is => 'rw', isa => 'Str', default => 'http://www.ebi.ac.uk/ena/data/view/'); +has 'ena_base_path' => ( is => 'rw', isa => 'Str', default => 'http://www.ebi.ac.uk/ena/browser/api/xml/'); sub validate { my $self = shift; @@ -24,8 +24,8 @@ sub validate { if( $acc =~ m/^ERS/ || $acc =~ m/^SAM/ || $acc =~ m/^SRS/){ # pull XML from ENA and verify that it isn't empty - my $xml = Bio::ENA::DataSubmission::XML->new( url => $self->ena_base_path."$acc&display=xml",ena_base_path => $self->ena_base_path )->parse_from_url; - $self->set_error_message( $id, "Invalid sample accession - could not be found at the ENA" ) unless ( defined $xml->{SAMPLE} ); + my $xml = Bio::ENA::DataSubmission::XML->new( url => $self->ena_base_path."$acc",ena_base_path => $self->ena_base_path )->parse_from_url; + $self->set_error_message( $id, "Invalid sample accession - could not be found at the ENA" ) unless ( defined $xml->{SAMPLE} ); } else { $self->set_error_message( $id, "Invalid sample accession - must take format ERSxxxx, SAMxxxx, or SRSxxxxx" ); @@ -36,9 +36,9 @@ sub validate { } sub fix_it { - + } no Moose; __PACKAGE__->meta->make_immutable; -1; \ No newline at end of file +1; diff --git a/lib/Bio/ENA/DataSubmission/XML.pm b/lib/Bio/ENA/DataSubmission/XML.pm index d66bbc2..379ae6e 100644 --- a/lib/Bio/ENA/DataSubmission/XML.pm +++ b/lib/Bio/ENA/DataSubmission/XML.pm @@ -9,7 +9,7 @@ Bio::ENA::DataSubmission::XML =head1 SYNOPSIS use Bio::ENA::DataSubmission::XML; - + =head1 METHODS @@ -47,7 +47,7 @@ has 'outfile' => ( is => 'rw', isa => 'Str', required has 'root' => ( is => 'ro', isa => 'Str', required => 0, default => 'root' ); has '_fields' => ( is => 'rw', isa => 'ArrayRef', required => 0, lazy_build => 1 ); has 'validation_report' => ( is => 'rw', isa => 'XML::LibXML::Error', required => 0 ); -has 'ena_base_path' => ( is => 'rw', isa => 'Str', default => 'http://www.ebi.ac.uk/ena/data/view/' ); +has 'ena_base_path' => ( is => 'rw', isa => 'Str', default => 'http://www.ebi.ac.uk/ena/browser/api/xml/' ); has 'proxy' => ( is => 'rw', isa => 'Str', default => 'http://wwwcache.sanger.ac.uk:3128' ); has 'attributes_to_delete' => ( is => 'ro', @@ -108,7 +108,7 @@ sub update_sample { my $acc = $sample->{'sample_accession'}; ( defined $acc ) or Bio::ENA::DataSubmission::Exception::InvalidInput->throw( error => "Accession number data not present\n" ); - $self->url( $self->ena_base_path . "$acc&display=xml" ); + $self->url( $self->ena_base_path . "$acc" ); my $xml = $self->parse_from_url; @@ -244,7 +244,7 @@ sub parse_from_url { sub parse_xml_metadata { my ( $self, $acc ) = @_; - $self->url( $self->ena_base_path . $acc . "&display=xml" ); + $self->url( $self->ena_base_path . $acc ); my $xml = $self->parse_from_url; my @fields = @{ $self->_fields }; diff --git a/t/data/ERP001039&display=xml b/t/data/ERP001039 similarity index 100% rename from t/data/ERP001039&display=xml rename to t/data/ERP001039 diff --git a/t/data/ERR363472&display=xml b/t/data/ERR363472 similarity index 100% rename from t/data/ERR363472&display=xml rename to t/data/ERR363472 diff --git a/t/data/ERR369155&display=xml b/t/data/ERR369155 similarity index 100% rename from t/data/ERR369155&display=xml rename to t/data/ERR369155 diff --git a/t/data/ERR369164&display=xml b/t/data/ERR369164 similarity index 100% rename from t/data/ERR369164&display=xml rename to t/data/ERR369164 diff --git a/t/data/ERS001491&display=xml b/t/data/ERS001491 similarity index 100% rename from t/data/ERS001491&display=xml rename to t/data/ERS001491 diff --git a/t/data/ERS002783&display=xml b/t/data/ERS002783 similarity index 100% rename from t/data/ERS002783&display=xml rename to t/data/ERS002783 diff --git a/t/data/ERS023435&display=xml b/t/data/ERS023435 similarity index 100% rename from t/data/ERS023435&display=xml rename to t/data/ERS023435 diff --git a/t/data/ERS092760&display=xml b/t/data/ERS092760 similarity index 100% rename from t/data/ERS092760&display=xml rename to t/data/ERS092760 diff --git a/t/data/ERS311393&display=xml b/t/data/ERS311393 similarity index 100% rename from t/data/ERS311393&display=xml rename to t/data/ERS311393 diff --git a/t/data/ERS311489&display=xml b/t/data/ERS311489 similarity index 100% rename from t/data/ERS311489&display=xml rename to t/data/ERS311489 diff --git a/t/data/ERS311560&display=xml b/t/data/ERS311560 similarity index 100% rename from t/data/ERS311560&display=xml rename to t/data/ERS311560 diff --git a/t/data/ERS486637&display=xml b/t/data/ERS486637 similarity index 100% rename from t/data/ERS486637&display=xml rename to t/data/ERS486637