diff --git a/conf/docker-aio/0prep_deps.sh b/conf/docker-aio/0prep_deps.sh index d7b875f4b78..13a91705303 100755 --- a/conf/docker-aio/0prep_deps.sh +++ b/conf/docker-aio/0prep_deps.sh @@ -4,9 +4,9 @@ if [ ! -d dv/deps ]; then fi wdir=`pwd` -if [ ! -e dv/deps/payara-5.2021.6.zip ]; then +if [ ! -e dv/deps/payara-5.2022.3.zip ]; then echo "payara dependency prep" - wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2021.6/payara-5.2021.6.zip -O dv/deps/payara-5.2021.6.zip + wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip -O dv/deps/payara-5.2022.3.zip fi if [ ! -e dv/deps/solr-8.11.1dv.tgz ]; then diff --git a/conf/docker-aio/c8.dockerfile b/conf/docker-aio/c8.dockerfile index dab6d814ece..0002464cbf2 100644 --- a/conf/docker-aio/c8.dockerfile +++ b/conf/docker-aio/c8.dockerfile @@ -24,7 +24,7 @@ COPY disableipv6.conf /etc/sysctl.d/ RUN rm /etc/httpd/conf/* COPY httpd.conf /etc/httpd/conf RUN cd /opt ; tar zxf /tmp/dv/deps/solr-8.11.1dv.tgz -RUN cd /opt ; unzip /tmp/dv/deps/payara-5.2021.6.zip ; ln -s /opt/payara5 /opt/glassfish4 +RUN cd /opt ; unzip /tmp/dv/deps/payara-5.2022.3.zip ; ln -s /opt/payara5 /opt/glassfish4 # this copy of domain.xml is the result of running `asadmin set server.monitoring-service.module-monitoring-levels.jvm=LOW` on a default glassfish installation (aka - enable the glassfish REST monitir endpoint for the jvm` # this dies under Java 11, do we keep it? diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml index ead6e882d49..63312ab5d40 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/8.11.1/schema.xml @@ -261,6 +261,9 @@ + + + @@ -498,6 +501,9 @@ + + + diff --git a/doc/release-notes/5.10-release-notes.md b/doc/release-notes/5.10-release-notes.md index 0da42a7b527..c13ae8a6b78 100644 --- a/doc/release-notes/5.10-release-notes.md +++ b/doc/release-notes/5.10-release-notes.md @@ -140,7 +140,7 @@ or To find datasets with a without a CC0 license and with empty terms: ``` -select CONCAT('doi:', dvo.authority, '/', dvo.identifier), v.alias as dataverse_alias, case when versionstate='RELEASED' then concat(dv.versionnumber, '.', dv.minorversionnumber) else versionstate END as version, dv.id as datasetversion_id, t.id as termsofuseandaccess_id, t.termsofuse, t.confidentialitydeclaration, t.specialpermissions, t.restrictions, t.citationrequirements, t.depositorrequirements, t.conditions, t.disclaimer from dvobject dvo, termsofuseandaccess t, datasetversion dv, dataverse v where dv.dataset_id=dvo.id and dv.termsofuseandaccess_id=t.id and dvo.owner_id=v.id and t.license='NONE' and t.termsofuse is null; +select CONCAT('doi:', dvo.authority, '/', dvo.identifier), v.alias as dataverse_alias, case when versionstate='RELEASED' then concat(dv.versionnumber, '.', dv.minorversionnumber) else versionstate END as version, dv.id as datasetversion_id, t.id as termsofuseandaccess_id, t.termsofuse, t.confidentialitydeclaration, t.specialpermissions, t.restrictions, t.citationrequirements, t.depositorrequirements, t.conditions, t.disclaimer from dvobject dvo, termsofuseandaccess t, datasetversion dv, dataverse v where dv.dataset_id=dvo.id and dv.termsofuseandaccess_id=t.id and dvo.owner_id=v.id and (t.license='NONE' or t.license is null) and t.termsofuse is null; ``` As before, there are a couple options. diff --git a/doc/release-notes/5.12-release-notes.md b/doc/release-notes/5.12-release-notes.md new file mode 100644 index 00000000000..7085f859046 --- /dev/null +++ b/doc/release-notes/5.12-release-notes.md @@ -0,0 +1,255 @@ +# Dataverse Software 5.12 + +This release brings new features, enhancements, and bug fixes to the Dataverse Software. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. + +## Release Highlights + +### Support for Globus + +[Globus][] can be used to transfer large files. Part of "Harvard Data Commons Additions" below. + +[Globus]: https://www.globus.org + +### Support for Remote File Storage + +Dataset files can be stored at remote URLs. Part of "Harvard Data Commons Additions" below. + +### New Computational Workflow Metadata Block + +The new Computational Workflow metadata block will allow depositors to effectively tag datasets as computational workflows. + +To add the new metadata block, follow the instructions in the Admin Guide: + +The location of the new metadata block tsv file is `scripts/api/data/metadatablocks/computational_workflow.tsv`. Part of "Harvard Data Commons Additions" below. + +### Support for Linked Data Notifications (LDN) + +[Linked Data Notifications][] (LDN) is a standard from the W3C. Part of "Harvard Data Commons Additions" below. + +[Linked Data Notifications]: https://www.w3.org/TR/ldn/ + + +### Harvard Data Commons Additions + +As reported at the 2022 Dataverse Community Meeting, the [Harvard Data Commons](https://sites.harvard.edu/harvard-data-commons/) project has supported a wide range of additions to the Dataverse software that improve support for Big Data, Workflows, Archiving, and interaction with other repositories. In many cases, these additions build upon features developed within the Dataverse community by Borealis, DANS, QDR, TDL, and others. Highlights from this work include: + +- Initial support for Globus file transfer to upload to and download from a Dataverse managed S3 store. The current implementation disables file restriction and embargo on Globus-enabled stores. +- Initial support for Remote File Storage. This capability, enabled via a new RemoteOverlay store type, allows a file stored in a remote system to be added to a dataset (currently only via API) with download requests redirected to the remote system. Use cases include referencing public files hosted on external web servers as well as support for controlled access managed by Dataverse (e.g. via restricted and embargoed status) and/or by the remote store. +- Initial support for computational workflows, including a new metadata block and detected filetypes. +- Support for archiving to any S3 store using Dataverse's RDA-conformant BagIT file format (a BagPack). +- Improved error handling and performance in archival bag creation and new options such as only supporting archiving of one dataset version. +- Additions/corrections to the OAI-ORE metadata format (which is included in archival bags) such as referencing the name/mimetype/size/checksum/download URL of the original file for ingested files, the inclusion of metadata about the parent collection(s) of an archived dataset version, and use of the URL form of PIDs. +- Display of archival status within the dataset page versions table, richer status options including success, pending, and failure states, with a complete API for managing archival status. +- Support for batch archiving via API as an alternative to the current options of configuring archiving upon publication or archiving each dataset version manually. +- Initial support for sending and receiving Linked Data Notification messages indicating relationships between a dataset and external resources (e.g. papers or other dataset) that can be used to trigger additional actions, such as the creation of a back-link to provide, for example, bi-directional linking between a published paper and a Dataverse dataset. +- A new capability to provide custom per field instructions in dataset templates +- The following file extensions are now detected: + - wdl=text/x-workflow-description-language + - cwl=text/x-computational-workflow-language + - nf=text/x-nextflow + - Rmd=text/x-r-notebook + - rb=text/x-ruby-script + - dag=text/x-dagman + +### Improvements to Fields that Appear in the Citation Metadata Block + +Grammar, style and consistency improvements have been made to the titles, tooltip description text, and watermarks of metadata fields that appear in the Citation metadata block. + +This includes fields that dataset depositors can edit in the Citation Metadata accordion (i.e. fields controlled by the citation.tsv and citation.properties files) and fields whose values are system-generated, such as the Dataset Persistent ID, Previous Dataset Persistent ID, and Publication Date fields whose titles and tooltips are configured in the bundles.properties file. + +The changes should provide clearer information to curators, depositors, and people looking for data about what the fields are for. + +A new page in the Style Guides called "Text" has also been added. The new page includes a section called "Metadata Text Guidelines" with a link to a Google Doc where the guidelines are being maintained for now since we expect them to be revised frequently. + +### New Static Search Facet: Metadata Types + +A new static search facet has been added to the search side panel. This new facet is called "Metadata Types" and is driven from metadata blocks. When a metadata field value is inserted into a dataset, an entry for the metadata block it belongs to is added to this new facet. + +This new facet needs to be configured for it to appear on the search side panel. The configuration assigns to a dataverse what metadata blocks to show. The configuration is inherited by child dataverses. + +To configure the new facet, use the Metadata Block Facet API: + +### Broader MicroProfile Config Support for Developers + +As of this release, many [JVM options](https://guides.dataverse.org/en/5.12/installation/config.html#jvm-options) +can be set using any [MicroProfile Config Source](https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html#config-sources). + +Currently this change is only relevant to developers but as settings are migrated to the new "lookup" pattern documented in the [Consuming Configuration](https://guides.dataverse.org/en/5.12/developers/configuration.html) section of the Developer Guide, anyone installing the Dataverse software will have much greater flexibility when configuring those settings, especially within containers. These changes will be announced in future releases. + +Please note that an upgrade to Payara 5.2021.8 or higher is required to make use of this. Payara 5.2021.5 threw exceptions, as explained in PR #8823. + +### HTTP Range Requests: New HTTP Status Codes and Headers for Datafile Access API + +The Basic File Access resource for datafiles (/api/access/datafile/$id) was slightly modified in order to comply better with the HTTP specification for range requests. + +If the request contains a "Range" header: +* The returned HTTP status is now 206 (Partial Content) instead of 200 +* A "Content-Range" header is returned containing information about the returned bytes +* An "Accept-Ranges" header with value "bytes" is returned + +CORS rules/headers were modified accordingly: +* The "Range" header is added to "Access-Control-Allow-Headers" +* The "Content-Range" and "Accept-Ranges" header are added to "Access-Control-Expose-Headers" + +### File Type Detection When File Has No Extension + +File types are now detected based on the filename when the file has no extension. + +The following filenames are now detected: + +- Makefile=text/x-makefile +- Snakemake=text/x-snakemake +- Dockerfile=application/x-docker-file +- Vagrantfile=application/x-vagrant-file + +These are defined in `MimeTypeDetectionByFileName.properties`. + +### Upgrade to Payara 5.2022.3 Highly Recommended + +With lots of bug and security fixes included, we encourage everyone to upgrade to Payara 5.2022.3 as soon as possible. See below for details. + +## Major Use Cases and Infrastructure Enhancements + +Changes and fixes in this release include: + +- Administrators can configure an S3 store used in Dataverse to support users uploading/downloading files via Globus File Transfer. (PR #8891) +- Administrators can configure a RemoteOverlay store to allow files that remain hosted by a remote system to be added to a dataset. (PR #7325) +- Administrators can configure the Dataverse software to send archival Bag copies of published dataset versions to any S3-compatible service. (PR #8751) +- Users can see information about a dataset's parent collection(s) in the OAI-ORE metadata export. (PR #8770) +- Users and administrators can now use the OAI-ORE metadata export to retrieve and assess the fixity of the original file (for ingested tabular files) via the included checksum. (PR #8901) +- Archiving via RDA-conformant Bags is more robust and is more configurable. (PR #8773, #8747, #8699, #8609, #8606, #8610) +- Users and administrators can see the archival status of the versions of the datasets they manage in the dataset page version table. (PR #8748, #8696) +- Administrators can configure messaging between their Dataverse installation and other repositories that may hold related resources or services interested in activity within that installation. (PR #8775) +- Collection managers can create templates that include custom instructions on how to fill out specific metadata fields. +- Dataset update API users are given more information when the dataset they are updating is out of compliance with Terms of Access requirements (Issue #8859) +- Adds a new setting (:ControlledVocabularyCustomJavaScript) that allows a JavaScript file to be loaded into the dataset page for the purpose of showing controlled vocabulary as a list (Issue #8722) +- Fixes an issue with the Redetect File Type API (Issue #7527) +- Terms of Use is now imported when using DDI format through harvesting or the native API. (Issue #8715, PR #8743) +- Optimizes some code to improve application memory usage (Issue #8871) +- Fixes sample data to reflect custom licenses. +- Fixes the Archival Status Input API (available to superusers) (Issue #8924) +- Small bugs have been fixed in the dataset export in the JSON and DDI formats; eliminating the export of "undefined" as a metadata language in the former, and a duplicate keyword tag in the latter. (Issue #8868) + +## New DB Settings + +The following DB settings have been added: +- `:ShibAffiliationOrder` - Select the first or last entry in an Affiliation array +- `:ShibAffiliationSeparator` (default: ";") - Set the separator for the Affiliation array +- `:LDNMessageHosts` +- `:GlobusBasicToken` +- `:GlobusEndpoint` +- `:GlobusStores` +- `:GlobusAppUrl` +- `:GlobusPollingInterval` +- `:GlobusSingleFileTransfer` +- `:S3ArchiverConfig` +- `:S3ArchiverProfile` +- `:DRSArchiverConfig` +- `:ControlledVocabularyCustomJavaScript` + +See the [Database Settings](https://guides.dataverse.org/en/5.12/installation/config.html#database-settings) section of the Guides for more information. + +## Notes for Dataverse Installation Administrators + +### Enabling Experimental Capabilities + +Several of the capabilities introduced in v5.12 are "experimental" in the sense that further changes and enhancements to these capabilities should be expected and that these changes may involve additional work, for those who use the initial implementations, when upgrading to newer versions of the Dataverse software. Administrators wishing to use them are encouraged to stay in touch, e.g. via the Dataverse Community Slack space, to understand the limits of current capabilities and to plan for future upgrades. + +## Notes for Developers and Integrators + +See the "Backward Incompatibilities" section below. + +## Backward Incompatibilities + +### OAI-ORE and Archiving Changes + +The Admin API call to manually submit a dataset version for archiving has changed to require POST instead of GET and to have a name making it clearer that archiving is being done for a given dataset version: /api/admin/submitDatasetVersionToArchive. + +Earlier versions of the archival bags included the ingested (tab-separated-value) version of tabular files while providing the checksum of the original file (Issue #8449). This release fixes that by including the original file and its metadata in the archival bag. This means that archival bags created prior to this version do not include a way to validate ingested files. Further, it is likely that capabilities in development (i.e. as part of the [Dataverse Uploader](https://github/org/GlobalDataverseCommunityConsortium/dataverse-uploader) to allow re-creation of a dataset version from an archival bag will only be fully compatible with archival bags generated by a Dataverse instance at a release > v5.12. (Specifically, at a minimum, since only the ingested file is included in earlier archival bags, an upload via DVUploader would not result in the same original file/ingested version as in the original dataset.) Administrators should be aware that re-creating archival bags, i.e. via the new batch archiving API, may be advisable now and will be recommended at some point in the future (i.e. there will be a point where we will start versioning archival bags and will start maintaining backward compatibility for older versions as part of transitioning this from being an experimental capability). + +## Installation + +If this is a new installation, please see our [Installation Guide](https://guides.dataverse.org/en/5.12/installation/). Please also contact us to get added to the [Dataverse Project Map](https://guides.dataverse.org/en/5.10/installation/config.html#putting-your-dataverse-installation-on-the-map-at-dataverse-org) if you have not done so already. + +## Upgrade Instructions + +0\. These instructions assume that you've already successfully upgraded from Dataverse Software 4.x to Dataverse Software 5 following the instructions in the [Dataverse Software 5 Release Notes](https://github.com/IQSS/dataverse/releases/tag/v5.0). After upgrading from the 4.x series to 5.0, you should progress through the other 5.x releases before attempting the upgrade to 5.10. + +If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. Use `sudo` to change to that user first. For example, `sudo -i -u dataverse` if `dataverse` is your dedicated application user. + +In the following commands we assume that Payara 5 is installed in `/usr/local/payara5`. If not, adjust as needed. + +### Instructions for Upgrading to Payara 5.2022.3 + +**Note:** with the approaching EOL for the Payara 5 Community release train it's likely we will switch to a +yet-to-be-released Payara 6 in the not-so-far-away future. + +We recommend you ensure you followed all update instructions from the past releases regarding Payara. +(latest Payara update was for [v5.6](https://github.com/IQSS/dataverse/releases/tag/v5.6)) + +Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc. + +The steps below are a simple matter of reusing your existing domain directory with the new distribution. +But we also recommend that you review the Payara upgrade instructions as it could be helpful during any troubleshooting: +[Payara Release Notes](https://docs.payara.fish/community/docs/Release%20Notes/Release%20Notes%205.2022.3.html) + +Please note that the deletion of the `lib/databases` directory below is only required once, for this upgrade (see Issue #8230 for details). + +```shell +export PAYARA=/usr/local/payara5 +``` + +(or `setenv PAYARA /usr/local/payara5` if you are using a `csh`-like shell) + +1\. Undeploy the previous version + +```shell + $PAYARA/bin/asadmin list-applications + $PAYARA/bin/asadmin undeploy dataverse<-version> +``` + +2\. Stop Payara + +```shell + service payara stop + rm -rf $PAYARA/glassfish/domains/domain1/generated + rm -rf $PAYARA/glassfish/domains/domain1/osgi-cache + rm -rf $PAYARA/glassfish/domains/domain1/lib/databases +``` + +3\. Move the current Payara directory out of the way + +```shell + mv $PAYARA $PAYARA.MOVED +``` + +4\. Download the new Payara version (5.2022.3), and unzip it in its place + +5\. Replace the brand new payara/glassfish/domains/domain1 with your old, preserved domain1 + +6\. Start Payara + +```shell + service payara start +``` + +7\. Deploy this version. + +```shell + $PAYARA/bin/asadmin deploy dataverse-5.12.war +``` + +8\. Restart payara + +```shell + service payara stop + service payara start +``` +### Additional Upgrade Steps + +Update the Citation metadata block: + +- `wget https://github.com/IQSS/dataverse/releases/download/v5.12/citation.tsv` +- `curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @citation.tsv -H "Content-type: text/tab-separated-values"` + +- Run ReExportAll to update metadata files (OAI_ORE, JSON and DDI formats are affected by the changes and bug fixes in this release; PRs #8770 and #8868). Optionally, for those using the Dataverse software's BagIt-based archiving, re-archive dataset versions archived using prior versions of the Dataverse software. This will be recommended/required in a future release. diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv index 952595837f1..61db5dfed93 100644 --- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv +++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv @@ -1,5 +1,5 @@ Tool Type Scope Description Data Explorer explore file A GUI which lists the variables in a tabular data file allowing searching, charting and cross tabulation analysis. See the README.md file at https://github.com/scholarsportal/dataverse-data-explorer-v2 for the instructions on adding Data Explorer to your Dataverse. Whole Tale explore dataset A platform for the creation of reproducible research packages that allows users to launch containerized interactive analysis environments based on popular tools such as Jupyter and RStudio. Using this integration, Dataverse users can launch Jupyter and RStudio environments to analyze published datasets. For more information, see the `Whole Tale User Guide `_. -File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, and spreadsheets - allowing them to be viewed without downloading. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/GlobalDataverseCommunityConsortium/dataverse-previewers +File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, and GeoJSON - allowing them to be viewed without downloading. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers Data Curation Tool configure file A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions. diff --git a/doc/sphinx-guides/source/_static/api/dataverse-facets.json b/doc/sphinx-guides/source/_static/api/dataverse-facets.json new file mode 100644 index 00000000000..20a8412440d --- /dev/null +++ b/doc/sphinx-guides/source/_static/api/dataverse-facets.json @@ -0,0 +1 @@ +["authorName", "authorAffiliation"] \ No newline at end of file diff --git a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml index 79e0581131e..05eaadc3458 100644 --- a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml +++ b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml @@ -142,6 +142,7 @@ Terms of Access + Terms of Use Data Access Place Original Archive diff --git a/doc/sphinx-guides/source/_static/api/metadata-block-facets.json b/doc/sphinx-guides/source/_static/api/metadata-block-facets.json new file mode 100644 index 00000000000..bc497846592 --- /dev/null +++ b/doc/sphinx-guides/source/_static/api/metadata-block-facets.json @@ -0,0 +1 @@ +["socialscience", "geospatial"] \ No newline at end of file diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index 8d3f53981e0..f6ca34bf3d4 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -70,6 +70,13 @@ Amnesia is a flexible data anonymization tool that transforms relational and tra For instructions on depositing or loading data from Dataverse installations to Amnesia, visit https://amnesia.openaire.eu/about-documentation.html +SampleDB +++++++++ + +SampleDB is a web-based electronic lab notebook (ELN) with a focus on flexible metadata. SampleDB can export this flexible, process-specific metadata to a new Dataset in a Dataverse installation using the EngMeta Process Metadata block. + +For instructions on using the Dataverse export, you can visit https://scientific-it-systems.iffgit.fz-juelich.de/SampleDB/administrator_guide/dataverse_export.html + Embedding Data on Websites -------------------------- diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index e59d3d4bc3b..ff1b265cef7 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -571,9 +571,13 @@ Configuration involves specifying which fields are to be mapped, whether free-te These are all defined in the :ref:`:CVocConf <:CVocConf>` setting as a JSON array. Details about the required elements as well as example JSON arrays are available at https://github.com/gdcc/dataverse-external-vocab-support, along with an example metadata block that can be used for testing. The scripts required can be hosted locally or retrieved dynamically from https://gdcc.github.io/ (similar to how dataverse-previewers work). +Please note that in addition to the :ref:`:CVocConf` described above, an alternative is the :ref:`:ControlledVocabularyCustomJavaScript` setting. + Tips from the Dataverse Community --------------------------------- +When creating new metadata blocks, please review the :doc:`/style/text` section of the Style Guide, which includes guidance about naming metadata fields and writing text for metadata tooltips and watermarks. + If there are tips that you feel are omitted from this document, please open an issue at https://github.com/IQSS/dataverse/issues and consider making a pull request to make improvements. You can find this document at https://github.com/IQSS/dataverse/blob/develop/doc/sphinx-guides/source/admin/metadatacustomization.rst Alternatively, you are welcome to request "edit" access to this "Tips for Dataverse Software metadata blocks from the community" Google doc: https://docs.google.com/document/d/1XpblRw0v0SvV-Bq6njlN96WyHJ7tqG0WWejqBdl7hE0/edit?usp=sharing diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index c9518b465fc..78b8c8ce223 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -11,19 +11,35 @@ Publishing a dataset automatically starts a metadata export job, that will run i A scheduled timer job that runs nightly will attempt to export any published datasets that for whatever reason haven't been exported yet. This timer is activated automatically on the deployment, or restart, of the application. So, again, no need to start or configure it manually. (See the :doc:`timers` section of this Admin Guide for more information.) -Batch exports through the API +.. _batch-exports-through-the-api: + +Batch Exports Through the API ----------------------------- -In addition to the automated exports, a Dataverse installation admin can start a batch job through the API. The following 2 API calls are provided: +In addition to the automated exports, a Dataverse installation admin can start a batch job through the API. The following four API calls are provided: ``curl http://localhost:8080/api/admin/metadata/exportAll`` ``curl http://localhost:8080/api/admin/metadata/reExportAll`` -The former will attempt to export all the published, local (non-harvested) datasets that haven't been exported yet. -The latter will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not. +``curl http://localhost:8080/api/admin/metadata/clearExportTimestamps`` + +``curl http://localhost:8080/api/admin/metadata/:persistentId/reExportDataset?persistentId=doi:10.5072/FK2/AAA000`` + +The first will attempt to export all the published, local (non-harvested) datasets that haven't been exported yet. +The second will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not. + +The first two calls return a status message informing the administrator that the process has been launched (``{"status":"WORKFLOW_IN_PROGRESS"}``). The administrator can check the progress of the process via log files: ``[Payara directory]/glassfish/domains/domain1/logs/export_[time stamp].log``. + +Instead of running "reExportAll" the same can be accomplished using "clearExportTimestamps" followed by "exportAll". +The difference is that when exporting prematurely fails due to some problem, the datasets that did not get exported yet still have the timestamps cleared. A next call to exportAll will skip the datasets already exported and try to export the ones that still need it. +Calling clearExportTimestamps should return ``{"status":"OK","data":{"message":"cleared: X"}}`` where "X" is the total number of datasets cleared. + +The reExportDataset call gives you the opportunity to *force* a re-export of only a specific dataset and (with some script automation) could allow you the export specific batches of datasets. This might be usefull when handling exporting problems or when reExportAll takes too much time and is overkill. Note that :ref:`export-dataset-metadata-api` is a related API. + +reExportDataset can be called with either ``persistentId`` (as shown above, with a DOI) or with the database id of a dataset (as shown below, with "42" as the database id). -These calls return a status message informing the administrator, that the process has been launched (``{"status":"WORKFLOW_IN_PROGRESS"}``). The administrator can check the progress of the process via log files: ``[Payara directory]/glassfish/domains/domain1/logs/export_[time stamp].log``. +``curl http://localhost:8080/api/admin/metadata/42/reExportDataset`` Note, that creating, modifying, or re-exporting an OAI set will also attempt to export all the unexported datasets found in the set. diff --git a/doc/sphinx-guides/source/admin/troubleshooting.rst b/doc/sphinx-guides/source/admin/troubleshooting.rst index 016a3e68abb..79ce98322a8 100644 --- a/doc/sphinx-guides/source/admin/troubleshooting.rst +++ b/doc/sphinx-guides/source/admin/troubleshooting.rst @@ -96,10 +96,8 @@ Sometimes your Dataverse installation fails to deploy, or Payara fails to restar We don't know what's causing this issue, but here's a known workaround: -- Stop Payara; - -- Remove the ``generated`` and ``osgi-cache`` directories; - +- Stop Payara; +- Remove the ``generated`` and ``osgi-cache`` directories from the ``domain1`` directory; - Start Payara The shell script below performs the steps above. @@ -146,7 +144,7 @@ To identify the specific invalid values in the affected datasets, or to check al Many Files with a File Type of "Unknown", "Application", or "Binary" -------------------------------------------------------------------- -From the home page of a Dataverse installation you can get a count of files by file type by clicking "Files" and then scrolling down to "File Type". If you see a lot of files that are "Unknown", "Application", or "Binary" you can have the Dataverse installation attempt to redetect the file type by using the :ref:`Redetect File Type ` API endpoint. +From the home page of a Dataverse installation you can get a count of files by file type by clicking "Files" and then scrolling down to "File Type". If you see a lot of files that are "Unknown", "Application", or "Binary" you can have the Dataverse installation attempt to redetect the file type by using the :ref:`Redetect File Type ` API endpoint. .. _actionlogrecord-trimming: diff --git a/doc/sphinx-guides/source/api/apps.rst b/doc/sphinx-guides/source/api/apps.rst index 48b5e1f3584..5573056051c 100755 --- a/doc/sphinx-guides/source/api/apps.rst +++ b/doc/sphinx-guides/source/api/apps.rst @@ -28,9 +28,9 @@ https://github.com/scholarsportal/Dataverse-Data-Curation-Tool File Previewers ~~~~~~~~~~~~~~~ -File Previewers are tools that display the content of files - including audio, html, Hypothes.is annotations, images, PDF, text, video - allowing them to be viewed without downloading. +File Previewers are tools that display the content of files - including audio, html, Hypothes.is annotations, images, PDF, text, video, GeoJSON - allowing them to be viewed without downloading. -https://github.com/GlobalDataverseCommunityConsortium/dataverse-previewers +https://github.com/gdcc/dataverse-previewers Python ------ @@ -102,9 +102,9 @@ Please note that there is a Java library for Dataverse Software APIs listed in t DVUploader ~~~~~~~~~~ -The open-source DVUploader tool is a stand-alone command-line Java application that uses the Dataverse Software API to upload files to a specified Dataset. Files can be specified by name, or the DVUploader can upload all files in a directory or recursively from a directory tree. The DVUploader can also verify that uploaded files match their local sources by comparing the local and remote fixity checksums. Source code, release 1.0.0- jar file, and documentation are available on GitHub. DVUploader's creation was supported by the Texas Digital Library. +The open-source DVUploader tool is a stand-alone command-line Java application that uses the Dataverse Software API to upload files to a specified Dataset. Files can be specified by name, or the DVUploader can upload all files in a directory or recursively from a directory tree. The DVUploader can also verify that uploaded files match their local sources by comparing the local and remote fixity checksums. Source code, the latest release - jar file, and documentation are available on GitHub. DVUploader's creation was supported by the Texas Digital Library. -https://github.com/IQSS/dataverse-uploader +https://github.com/GlobalDataverseCommunityConsortium/dataverse-uploader Dataverse for Android ~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/api/client-libraries.rst b/doc/sphinx-guides/source/api/client-libraries.rst index a47eb2d58aa..634f03a8125 100755 --- a/doc/sphinx-guides/source/api/client-libraries.rst +++ b/doc/sphinx-guides/source/api/client-libraries.rst @@ -1,7 +1,7 @@ Client Libraries ================ -Currently there are client libraries for Python, Javascript, R, and Java that can be used to develop against Dataverse Software APIs. We use the term "client library" on this page but "Dataverse Software SDK" (software development kit) is another way of describing these resources. They are designed to help developers express Dataverse Software concepts more easily in the languages listed below. For support on any of these client libraries, please consult each project's README. +Currently there are client libraries for Python, Javascript, R, Java, and Julia that can be used to develop against Dataverse Software APIs. We use the term "client library" on this page but "Dataverse Software SDK" (software development kit) is another way of describing these resources. They are designed to help developers express Dataverse Software concepts more easily in the languages listed below. For support on any of these client libraries, please consult each project's README. Because a Dataverse installation is a SWORD server, additional client libraries exist for Java, Ruby, and PHP per the :doc:`/api/sword` page. @@ -45,3 +45,10 @@ Ruby https://github.com/libis/dataverse_api is a Ruby gem for Dataverse Software APIs. It is registered as a library on Rubygems (https://rubygems.org/search?query=dataverse). The gem is created and maintained by the LIBIS team (https://www.libis.be) at the University of Leuven (https://www.kuleuven.be). + +Julia +----- + +https://github.com/gaelforget/Dataverse.jl is the official Julia package for Dataverse Software APIs. It can be found on JuliaHub (https://juliahub.com/ui/Packages/Dataverse/xWAqY/) and leverages pyDataverse to provide an interface to Dataverse's data access API and native API. Dataverse.jl provides a few additional functionalities with documentation (https://gaelforget.github.io/Dataverse.jl/dev/) and a demo notebook (https://gaelforget.github.io/Dataverse.jl/dev/notebook.html). + +It was created and is maintained by `Gael Forget `_. diff --git a/doc/sphinx-guides/source/api/index.rst b/doc/sphinx-guides/source/api/index.rst index 9fc58ef4e5a..c9e79098546 100755 --- a/doc/sphinx-guides/source/api/index.rst +++ b/doc/sphinx-guides/source/api/index.rst @@ -21,5 +21,6 @@ API Guide client-libraries external-tools curation-labels + linkeddatanotification apps faq diff --git a/doc/sphinx-guides/source/api/linkeddatanotification.rst b/doc/sphinx-guides/source/api/linkeddatanotification.rst new file mode 100644 index 00000000000..d55dc4da084 --- /dev/null +++ b/doc/sphinx-guides/source/api/linkeddatanotification.rst @@ -0,0 +1,65 @@ +Linked Data Notification API +============================ + +Dataverse has a limited, experimental API implementing a Linked Data Notification inbox allowing it to receive messages indicating a link between an external resource and a Dataverse dataset. +The motivating use case is to support a use case where Dataverse administrators may wish to create back-links to the remote resource (e.g. as a Related Publication, Related Material, etc.). + +Upon receipt of a relevant message, Dataverse will create Announcement Received notifications for superusers, who can edit the dataset involved. (In the motivating use case, these users may then add an appropriate relationship and use the Update Curent Version publishing option to add it to the most recently published version of the dataset.) + +The ``:LDNMessageHosts`` setting is a comma-separated whitelist of hosts from which Dataverse will accept and process messages. By default, no hosts are allowed. ``*`` can be used in testing to indicate all hosts are allowed. + +Messages can be sent via POST, using the application/ld+json ContentType: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl -X POST -H 'ContentType:application/ld+json' $SERVER_URL/api/inbox --upload-file message.jsonld + +The supported message format is described by `our preliminary specification `_. The format is expected to change in the near future to match the standard for relationship announcements being developed as part of `the COAR Notify Project `_. + +An example message is shown below. It indicates that a resource with the name "An Interesting Title" exists and "IsSupplementedBy" the dataset with DOI https://doi.org/10.5072/FK2/GGCCDL. If this dataset is managed in the receiving Dataverse, a notification will be sent to user with the relevant permissions (as described above). + +.. code:: json + + { + "@context": [ + "https://www.w3.org/ns/activitystreams", + "https://purl.org/coar/notify" + ], + "id": "urn:uuid:94ecae35-dcfd-4182-8550-22c7164fe23f", + "actor": { + "id": "https://research-organisation.org/dspace", + "name": "DSpace Repository", + "type": "Service" + }, + "context": { + "IsSupplementedBy": + { + "id": "http://dev-hdc3b.lib.harvard.edu/dataset.xhtml?persistentId=doi:10.5072/FK2/GGCCDL", + "ietf:cite-as": "https://doi.org/10.5072/FK2/GGCCDL", + "type": "sorg:Dataset" + } + }, + "object": { + "id": "https://research-organisation.org/dspace/item/35759679-5df3-4633-b7e5-4cf24b4d0614", + "ietf:cite-as": "https://research-organisation.org/authority/resolve/35759679-5df3-4633-b7e5-4cf24b4d0614", + "sorg:name": "An Interesting Title", + "type": "sorg:ScholarlyArticle" + }, + "origin": { + "id": "https://research-organisation.org/dspace", + "inbox": "https://research-organisation.org/dspace/inbox/", + "type": "Service" + }, + "target": { + "id": "https://research-organisation.org/dataverse", + "inbox": "https://research-organisation.org/dataverse/inbox/", + "type": "Service" + }, + "type": [ + "Announce", + "coar-notify:ReleaseAction" + ] + } + diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 5cf90359001..93e1c36f179 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -219,15 +219,82 @@ Assign search facets for a given Dataverse collection identified by ``id``: export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H X-Dataverse-key:$API_TOKEN" -X POST $SERVER_URL/api/dataverses/$ID/facets --upload-file facets.json + curl -H X-Dataverse-key:$API_TOKEN" -X POST $SERVER_URL/api/dataverses/$ID/facets --upload-file dataverse-facets.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/dataverses/root/facets --upload-file facets.json + curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/dataverses/root/facets --upload-file dataverse-facets.json -Where ``facets.json`` contains a JSON encoded list of metadata keys (e.g. ``["authorName","authorAffiliation"]``). +Where :download:`dataverse-facets.json <../_static/api/dataverse-facets.json>` contains a JSON encoded list of metadata keys (e.g. ``["authorName","authorAffiliation"]``). + +List Metadata Block Facets Configured for a Dataverse Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +|CORS| List the metadata block facet configuration with all the metadata block configured for a given Dataverse collection ``id``: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/metadatablockfacets + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/metadatablockfacets + +Set Metadata Block Facets for a Dataverse Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sets the metadata blocks that will appear in the ``Dataset Features`` facet category for a given Dataverse collection identified by ``id``. + +In order to set or clear the metadata blocks for a collection, you must first :ref:`set the metadata block facet root to true`. + +To clear the metadata blocks set by a parent collection, submit an empty array (e.g. ``[]``): + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H X-Dataverse-key:$API_TOKEN" -X POST -H "Content-type:application/json" $SERVER_URL/api/dataverses/$ID/metadatablockfacets --upload-file metadata-block-facets.json + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -H "Content-type:application/json" https://demo.dataverse.org/api/dataverses/root/metadatablockfacets --upload-file metadata-block-facets.json + +Where :download:`metadata-block-facets.json <../_static/api/metadata-block-facets.json>` contains a JSON encoded list of metadata block names (e.g. ``["socialscience","geospatial"]``). This endpoint supports an empty list (e.g. ``[]``) + +.. _metadata-block-facet-root-api: + +Configure a Dataverse Collection to Inherit Its Metadata Block Facets from Its Parent +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Set whether the Dataverse collection is a metadata block facet root, or does it uses its parent metadata block facets. Possible values are ``true`` and ``false`` (both are valid JSON expressions). + +When updating the root to false, it will clear any metadata block facets from the collection. When updating to true, it will copy the metadata block facets from the parent collection: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H X-Dataverse-key:$API_TOKEN -X POST -H "Content-type:application/json" $SERVER_URL/api/dataverses/$ID/metadatablockfacets/isRoot -d 'true' + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -H "Content-type:application/json" https://demo.dataverse.org/api/dataverses/root/metadatablockfacets/isRoot -d 'true' Create a New Role in a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -454,9 +521,9 @@ A dataset is a container for files as explained in the :doc:`/user/dataset-manag To create a dataset, you must supply a JSON file that contains at least the following required metadata fields: - Title -- Author -- Contact -- Description +- Author Name +- Point of Contact Email +- Description Text - Subject As a starting point, you can download :download:`dataset-finch1.json <../../../../scripts/search/tests/data/dataset-finch1.json>` and modify it to meet your needs. (:download:`dataset-create-new-all-default-fields.json <../../../../scripts/api/data/dataset-finch1_fr.json>` is a variant of this file that includes setting the metadata language (see :ref:`:MetadataLanguages`) to French (fr). In addition to this minimal example, you can download :download:`dataset-create-new-all-default-fields.json <../../../../scripts/api/data/dataset-create-new-all-default-fields.json>` which populates all of the metadata fields that ship with a Dataverse installation.) @@ -773,7 +840,9 @@ The fully expanded example above (without environment variables) looks like this Export Metadata of a Dataset in Various Formats ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|CORS| Export the metadata of the current published version of a dataset in various formats see Note below: +|CORS| Export the metadata of the current published version of a dataset in various formats. + +See also :ref:`batch-exports-through-the-api` and the note below: .. code-block:: bash @@ -979,7 +1048,7 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT https://demo.dataverse.org/api/datasets/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/BCCP9Z --upload-file dataset-update-metadata.json -Note that in the example JSON file above, there is a single JSON object with ``metadataBlocks`` as a key. When you download a representation of your dataset in JSON format, the ``metadataBlocks`` object you need is nested inside another object called ``json``. To extract just the ``metadataBlocks`` key when downloading a JSON representation, you can use a tool such as ``jq`` like this: +Note that in the example JSON file above, there is a single JSON object with ``metadataBlocks`` as a key. When you download a representation of your dataset in JSON format, the ``metadataBlocks`` object you need is nested inside another object called ``datasetVersion``. To extract just the ``metadataBlocks`` key when downloading a JSON representation, you can use a tool such as ``jq`` like this: .. code-block:: bash @@ -1245,7 +1314,7 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/24/privateUrl -If Anonymized Access has been enabled on a Dataverse instance (see the :ref:`:AnonymizedFieldTypeNames` setting), an optional 'anonymizedAccess' query parameter is allowed. +If Anonymized Access has been enabled on a Dataverse installation (see the :ref:`:AnonymizedFieldTypeNames` setting), an optional 'anonymizedAccess' query parameter is allowed. Setting anonymizedAccess=true in your call will create a PrivateURL that only allows an anonymized view of the Dataset (see :ref:`privateurl`). .. code-block:: bash @@ -1303,7 +1372,7 @@ When adding a file to a dataset, you can optionally specify the following: - Whether or not the file is restricted. - Whether or not the file skips :doc:`tabular ingest `. If the ``tabIngest`` parameter is not specified, it defaults to ``true``. -Note that when a Dataverse instance is configured to use S3 storage with direct upload enabled, there is API support to send a file directly to S3. This is more complex and is described in the :doc:`/developers/s3-direct-upload-api` guide. +Note that when a Dataverse installation is configured to use S3 storage with direct upload enabled, there is API support to send a file directly to S3. This is more complex and is described in the :doc:`/developers/s3-direct-upload-api` guide. In the curl example below, all of the above are specified but they are optional. @@ -1411,7 +1480,37 @@ In practice, you only need one the ``dataset_id`` or the ``persistentId``. The e print '-' * 40 print r.json() print r.status_code + +.. _add-remote-file-api: +Add a Remote File to a Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your Dataverse installation has been configured to support :ref:`trusted-remote-storage` +you can add files from remote URLs to datasets. These remote files appear in your Dataverse +installation as if they were ordinary files but are stored remotely. + +The location of the remote file is specified in the ``storageIdentifier`` field in JSON you supply. +The base URL of the file is contained in the "store" (e.g. "trsa" in the example below) and is followed by the +path to the file (e.g. "themes/custom..."). + +In the JSON example below, all fields are required except for ``description``. Other optional fields are shown under :ref:`add-file-api`. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/J8SJZB + export JSON_DATA='{"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' + + curl -H "X-Dataverse-key: $API_TOKEN" -X POST "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_ID" -F "jsonData=$JSON_DATA" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB -F 'jsonData={"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' + Report the data (file) size of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1861,7 +1960,7 @@ The API call requires a Json body that includes the embargo's end date (dateAvai Remove an Embargo on Files in a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/api/datasets/$dataset-id/files/actions/:unset-embargo can be used to remove an embargo on one or more files in a dataset. Embargoes can be removed from files that are only in a draft dataset version (and are not in any previously published version) by anyone who can edit the dataset. The same API call can be used by a superuser to remove embargos from files that have already been released as part of a previously published dataset version. +``/api/datasets/$dataset-id/files/actions/:unset-embargo`` can be used to remove an embargo on one or more files in a dataset. Embargoes can be removed from files that are only in a draft dataset version (and are not in any previously published version) by anyone who can edit the dataset. The same API call can be used by a superuser to remove embargos from files that have already been released as part of a previously published dataset version. The API call requires a Json body that includes the list of the fileIds that the embargo should be removed from. All files listed must be in the specified dataset. For example: @@ -1873,6 +1972,63 @@ The API call requires a Json body that includes the list of the fileIds that the export JSON='{"fileIds":[300,301]}' curl -H "X-Dataverse-key: $API_TOKEN" -H "Content-Type:application/json" "$SERVER_URL/api/datasets/:persistentId/files/actions/:unset-embargo?persistentId=$PERSISTENT_IDENTIFIER" -d "$JSON" + + +Get the Archival Status of a Dataset By Version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Archiving is an optional feature that may be configured for a Dataverse installation. When that is enabled, this API call be used to retrieve the status. Note that this requires "superuser" credentials. + +``GET /api/datasets/$dataset-id/$version/archivalStatus`` returns the archival status of the specified dataset version. + +The response is a JSON object that will contain a "status" which may be "success", "pending", or "failure" and a "message" which is archive system specific. For "success" the message should provide an identifier or link to the archival copy. For example: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export VERSION=1.0 + + curl -H "X-Dataverse-key: $API_TOKEN" -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/$VERSION/archivalStatus?persistentId=$PERSISTENT_IDENTIFIER" + +Set the Archival Status of a Dataset By Version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Archiving is an optional feature that may be configured for a Dataverse installation. When that is enabled, this API call be used to set the status. Note that this is intended to be used by the archival system and requires "superuser" credentials. + +``PUT /api/datasets/$dataset-id/$version/archivalStatus`` sets the archival status of the specified dataset version. + +The body is a JSON object that must contain a "status" which may be "success", "pending", or "failure" and a "message" which is archive system specific. For "success" the message should provide an identifier or link to the archival copy. For example: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export VERSION=1.0 + export JSON='{"status":"failure","message":"Something went wrong"}' + + curl -H "X-Dataverse-key: $API_TOKEN" -H "Content-Type:application/json" -X PUT "$SERVER_URL/api/datasets/:persistentId/$VERSION/archivalStatus?persistentId=$PERSISTENT_IDENTIFIER" -d "$JSON" + +Note that if the configured archiver only supports archiving a single version, the call may return 409 CONFLICT if/when another version already has a non-null status. + +Delete the Archival Status of a Dataset By Version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Archiving is an optional feature that may be configured for a Dataverse installation. When that is enabled, this API call be used to delete the status. Note that this is intended to be used by the archival system and requires "superuser" credentials. + +``DELETE /api/datasets/$dataset-id/$version/archivalStatus`` deletes the archival status of the specified dataset version. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export VERSION=1.0 + + curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE "$SERVER_URL/api/datasets/:persistentId/$VERSION/archivalStatus?persistentId=$PERSISTENT_IDENTIFIER" + Files ----- @@ -2071,14 +2227,15 @@ Currently the following methods are used to detect file types: - The file type detected by the browser (or sent via API). - JHOVE: http://jhove.openpreservation.org -- As a last resort the file extension (e.g. ".ipybn") is used, defined in a file called ``MimeTypeDetectionByFileExtension.properties``. +- The file extension (e.g. ".ipybn") is used, defined in a file called ``MimeTypeDetectionByFileExtension.properties``. +- The file name (e.g. "Dockerfile") is used, defined in a file called ``MimeTypeDetectionByFileName.properties``. Replacing Files ~~~~~~~~~~~~~~~ Replace an existing file where ``ID`` is the database id of the file to replace or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. Requires the ``file`` to be passed as well as a ``jsonString`` expressing the new metadata. Note that metadata such as description, directoryLabel (File Path) and tags are not carried over from the file being replaced. -Note that when a Dataverse instance is configured to use S3 storage with direct upload enabled, there is API support to send a replacement file directly to S3. This is more complex and is described in the :doc:`/developers/s3-direct-upload-api` guide. +Note that when a Dataverse installation is configured to use S3 storage with direct upload enabled, there is API support to send a replacement file directly to S3. This is more complex and is described in the :doc:`/developers/s3-direct-upload-api` guide. A curl example using an ``ID`` @@ -3872,13 +4029,13 @@ Superusers can change whether an existing license is active (usable for new data export STATE=true curl -X PUT -H 'Content-Type: application/json' -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/licenses/$ID/:active/$STATE -Superusers can set which license is the default specified by the license ``$ID``: +Superusers may change the default license by specifying the license ``$ID``: .. code-block:: bash - curl -X PUT -H 'Content-Type: application/json' -H X-Dataverse-key:$API_TOKEN --data-binary @edit-license.json $SERVER_URL/api/licenses/default/$ID + curl -X PUT -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/licenses/default/$ID -Superusers can delete a license that is not in use by the license ``$ID``: +Superusers can delete a license, provided it is not in use, by the license ``$ID``: .. code-block:: bash diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py index c8175f21591..880ed561720 100755 --- a/doc/sphinx-guides/source/conf.py +++ b/doc/sphinx-guides/source/conf.py @@ -66,9 +66,9 @@ # built documents. # # The short X.Y version. -version = '5.11.1' +version = '5.12' # The full version, including alpha/beta/rc tags. -release = '5.11.1' +release = '5.12' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 21675bd4960..0782fd239a1 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -1,19 +1,19 @@ Big Data Support ================ -Big data support is highly experimental. Eventually this content will move to the Installation Guide. +Big data support includes some highly experimental options. Eventually more of this content will move to the Installation Guide. .. contents:: |toctitle| :local: -Various components need to be installed and/or configured for big data support. +Various components will need to be installed and/or configured for big data support via the methods described below. S3 Direct Upload and Download ----------------------------- A lightweight option for supporting file sizes beyond a few gigabytes - a size that can cause performance issues when uploaded through a Dataverse installation itself - is to configure an S3 store to provide direct upload and download via 'pre-signed URLs'. When these options are configured, file uploads and downloads are made directly to and from a configured S3 store using secure (https) connections that enforce a Dataverse installation's access controls. (The upload and download URLs are signed with a unique key that only allows access for a short time period and a Dataverse installation will only generate such a URL if the user has permission to upload/download the specific file in question.) -This option can handle files >40GB and could be appropriate for files up to a TB. Other options can scale farther, but this option has the advantages that it is simple to configure and does not require any user training - uploads and downloads are done via the same interface as normal uploads to a Dataverse installation. +This option can handle files >300GB and could be appropriate for files up to a TB or larger. Other options can scale farther, but this option has the advantages that it is simple to configure and does not require any user training - uploads and downloads are done via the same interface as normal uploads to a Dataverse installation. To configure these options, an administrator must set two JVM options for the Dataverse installation using the same process as for other configuration options: @@ -32,13 +32,13 @@ For AWS, the minimum allowed part size is 5*1024*1024 bytes and the maximum is 5 It is also possible to set file upload size limits per store. See the :MaxFileUploadSizeInBytes setting described in the :doc:`/installation/config` guide. -At present, one potential drawback for direct-upload is that files are only partially 'ingested', tabular and FITS files are processed, but zip files are not unzipped, and the file contents are not inspected to evaluate their mimetype. This could be appropriate for large files, or it may be useful to completely turn off ingest processing for performance reasons (ingest processing requires a copy of the file to be retrieved by the Dataverse installation from the S3 store). A store using direct upload can be configured to disable all ingest processing for files above a given size limit: +At present, one potential drawback for direct-upload is that files are only partially 'ingested' - tabular and FITS files are processed, but zip files are not unzipped, and the file contents are not inspected to evaluate their mimetype. This could be appropriate for large files, or it may be useful to completely turn off ingest processing for performance reasons (ingest processing requires a copy of the file to be retrieved by the Dataverse installation from the S3 store). A store using direct upload can be configured to disable all ingest processing for files above a given size limit: ``./asadmin create-jvm-options "-Ddataverse.files..ingestsizelimit="`` **IMPORTANT:** One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers is to allow cross site (CORS) requests on your S3 store. -The example below shows how to enable CORS rules (to support upload and download) on a bucket using the AWS CLI command line tool. Note that you may want to limit the AllowedOrigins and/or AllowedHeaders further. https://github.com/GlobalDataverseCommunityConsortium/dataverse-previewers/wiki/Using-Previewers-with-download-redirects-from-S3 has some additional information about doing this. +The example below shows how to enable CORS rules (to support upload and download) on a bucket using the AWS CLI command line tool. Note that you may want to limit the AllowedOrigins and/or AllowedHeaders further. https://github.com/gdcc/dataverse-previewers/wiki/Using-Previewers-with-download-redirects-from-S3 has some additional information about doing this. ``aws s3api put-bucket-cors --bucket --cors-configuration file://cors.json`` @@ -61,6 +61,93 @@ Alternatively, you can enable CORS using the AWS S3 web interface, using json-en Since the direct upload mechanism creates the final file rather than an intermediate temporary file, user actions, such as neither saving or canceling an upload session before closing the browser page, can leave an abandoned file in the store. The direct upload mechanism attempts to use S3 Tags to aid in identifying/removing such files. Upon upload, files are given a "dv-state":"temp" tag which is removed when the dataset changes are saved and the new file(s) are added in the Dataverse installation. Note that not all S3 implementations support Tags: Minio does not. WIth such stores, direct upload works, but Tags are not used. +Trusted Remote Storage with the ``remote`` Store Type +----------------------------------------------------- + +For very large, and/or very sensitive data, it may not make sense to transfer or copy files to Dataverse at all. The experimental ``remote`` store type in the Dataverse software now supports this use case. + +With this storage option Dataverse stores a URL reference for the file rather than transferring the file bytes to a store managed directly by Dataverse. Basic configuration for a remote store is described at :ref:`file-storage` in the Configuration Guide. + +Once the store is configured, it can be assigned to a collection or individual datasets as with other stores. In a dataset using this store, users can reference remote files which will then appear the same basic way as other datafiles. + +Currently, remote files can only be added via the API. Users can also upload smaller files via the UI or API which will be stored in the configured base store. + +If the store has been configured with a remote-store-name or remote-store-url, the dataset file table will include this information for remote files. These provide a visual indicator that the files are not managed directly by Dataverse and are stored/managed by a remote trusted store. + +Rather than sending the file bytes, metadata for the remote file is added using the "jsonData" parameter. +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For remote references, the jsonData object must also include values for: + +* "storageIdentifier" - String, as specified in prior calls +* "fileName" - String +* "mimeType" - String +* fixity/checksum: either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +(The remote store leverages the same JSON upload syntax as the last step in direct upload to S3 described in the :ref:`Adding the Uploaded file to the Dataset ` section of the :doc:`/developers/s3-direct-upload-api`.) + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + export JSON_DATA="{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'trs://images/dataverse_project_logo.svg', 'fileName':'dataverse_logo.svg', 'mimeType':'image/svg+xml', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}" + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +The variant allowing multiple files to be added once that is discussed in the :doc:`/developers/s3-direct-upload-api` document can also be used. + +Considerations: + +* Remote stores are configured with a base-url which limits what files can be referenced, i.e. the absolute URL for the file is /. +* The current store will not prevent you from providing a relative URL that results in a 404 when resolved. (I.e. if you make a typo). You should check to make sure the file exists at the location you specify - by trying to download in Dataverse, by checking to see that Dataverse was able to get the file size (which it does by doing a HEAD call to that location), or just manually trying the URL in your browser. +* Admins are trusting the organization managing the site/service at base-url to maintain the referenced files for as long as the Dataverse instance needs them. Formal agreements are recommended for production +* For large files, direct-download should always be used with a remote store. (Otherwise the Dataverse will be involved in the download.) +* For simple websites, a remote store should be marked public which will turn off restriction and embargo functionality in Dataverse (since Dataverse cannot restrict access to the file on the remote website) +* Remote stores can be configured with a secret-key. This key will be used to sign URLs when Dataverse retrieves the file content or redirects a user for download. If remote service is able to validate the signature and reject invalid requests, the remote store mechanism can be used to manage restricted and embargoes files, access requests in Dataverse, etc. Dataverse contains Java code that validates these signatures which could be used, for example, to create a validation proxy in front of a web server to allow Dataverse to manage access. The secret-key is a shared secret between Dataverse and the remote service and is not shared with/is not accessible by users or those with access to user's machines. +* Sophisticated remote services may wish to register file URLs that do not directly reference the file contents (bytes) but instead direct the user to a website where further information about the remote service's download process can be found. +* Due to the current design, ingest cannot be done on remote files and administrators should disable ingest when using a remote store. This can be done by setting the ingest size limit for the store to 0 and/or using the recently added option to not perform tabular ingest on upload. +* Dataverse will normally try to access the file contents itself, i.e. for ingest (in future versions), full-text indexing, thumbnail creation, etc. This processing may not be desirable for large/sensitive data, and, for the case where the URL does not reference the file itself, would not be possible. At present, administrators should configure the relevant size limits to avoid such actions. +* The current implementation of remote stores is experimental in the sense that future work to enhance it is planned. This work may result in changes to how the store works and lead to additional work when upgrading for sites that start using this mechanism now. + +To configure the options mentioned above, an administrator must set two JVM options for the Dataverse installation using the same process as for other configuration options: + +``./asadmin create-jvm-options "-Ddataverse.files..download-redirect=true"`` +``./asadmin create-jvm-options "-Ddataverse.files..secret-key=somelongrandomalphanumerickeythelongerthebetter123456"`` +``./asadmin create-jvm-options "-Ddataverse.files..public=true"`` +``./asadmin create-jvm-options "-Ddataverse.files..ingestsizelimit="`` + +.. _globus-support: + +Globus File Transfer +-------------------- + +Note: Globus file transfer is still experimental but feedback is welcome! See :ref:`support`. + +Users can transfer files via `Globus `_ into and out of datasets when their Dataverse installation is configured to use a Globus accessible S3 store and a community-developed `dataverse-globus `_ "transfer" app has been properly installed and configured. + +Due to differences in the access control models of a Dataverse installation and Globus, enabling the Globus capability on a store will disable the ability to restrict and embargo files in that store. + +As Globus aficionados know, Globus endpoints can be in a variety of places, from data centers to personal computers. This means that from within the Dataverse software, a Globus transfer can feel like an upload or a download (with Globus Personal Connect running on your laptop, for example) or it can feel like a true transfer from one server to another (from a cluster in a data center into a Dataverse dataset or vice versa). + +Globus transfer uses a very efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: + +* robust file transfer capable of restarting after network or endpoint failures +* third-party transfer, which enables a user accessing a Dataverse installation in their desktop browser to initiate transfer of their files from a remote endpoint (i.e. on a local high-performance computing cluster), directly to an S3 store managed by the Dataverse installation + +Globus transfer requires use of the Globus S3 connector which requires a paid Globus subscription at the host institution. Users will need a Globus account which could be obtained via their institution or directly from Globus (at no cost). + +The setup required to enable Globus is described in the `Community Dataverse-Globus Setup and Configuration document `_ and the references therein. + +As described in that document, Globus transfers can be initiated by choosing the Globus option in the dataset upload panel. (Globus, which does asynchronous transfers, is not available during dataset creation.) Analogously, "Globus Transfer" is one of the download options in the "Access Dataset" menu and optionally the file landing page download menu (if/when supported in the dataverse-globus app). + +An overview of the control and data transfer interactions between components was presented at the 2022 Dataverse Community Meeting and can be viewed in the `Integrations and Tools Session Video `_ around the 1 hr 28 min mark. + +See also :ref:`Globus settings <:GlobusBasicToken>`. + Data Capture Module (DCM) ------------------------- diff --git a/doc/sphinx-guides/source/developers/configuration.rst b/doc/sphinx-guides/source/developers/configuration.rst index 0eac7de3134..fb15fea7900 100644 --- a/doc/sphinx-guides/source/developers/configuration.rst +++ b/doc/sphinx-guides/source/developers/configuration.rst @@ -18,12 +18,14 @@ authentication providers, harvesters and others. Simple Configuration Options ---------------------------- -Developers have accessed the simple properties via +Developers can access simple properties via: -1. ``System.getProperty(...)`` for JVM system property settings -2. ``SettingsServiceBean.get(...)`` for database settings and +1. ``JvmSettings..lookup(...)`` for JVM system property settings. +2. ``SettingsServiceBean.get(...)`` for database settings. 3. ``SystemConfig.xxx()`` for specially treated settings, maybe mixed from 1 and 2 and other sources. -4. ``SettingsWrapper`` must be used to obtain settings from 2 and 3 in frontend JSF (xhtml) pages. Please see the note on how to :ref:`avoid common efficiency issues with JSF render logic expressions `. +4. ``SettingsWrapper`` for use in frontend JSF (xhtml) pages to obtain settings from 2 and 3. Using the wrapper is a must for performance as explained in :ref:`avoid common efficiency issues with JSF render logic expressions + `. +5. ``System.getProperty()`` only for very special use cases not covered by ``JvmSettings``. As of Dataverse Software 5.3, we start to streamline our efforts into using a more consistent approach, also bringing joy and happiness to all the system administrators out there. This will be done by adopting the use of @@ -49,6 +51,7 @@ Developers benefit from: - Config API is also pushing for validation of configuration, as it's typesafe and converters for non-standard types can be added within our codebase. - Defaults in code or bundled in ``META-INF/microprofile-config.properties`` allow for optional values without much hassle. +- A single place to lookup any existing JVM setting in code, easier to keep in sync with the documentation. System administrators benefit from: @@ -57,9 +60,9 @@ System administrators benefit from: - Running a Dataverse installation in containers gets much easier when configuration can be provisioned in a streamlined fashion, mitigating the need for scripting glue and distinguishing between setting types. - Classic installations have a profit, too: we can enable using a single config file, e.g. living in - ``/etc/dataverse/config.properties``. + ``/etc/dataverse/config.properties`` by adding our own, hot-reload config source. - Features for monitoring resources and others are easier to use with this streamlined configuration, as we can - avoid people having to deal with ``asadmin`` commands and change a setting comfortably instead. + avoid people having to deal with ``asadmin`` commands and change a setting with comfort instead. Adopting MicroProfile Config API --------------------------------- @@ -68,33 +71,41 @@ This technology is introduced on a step-by-step basis. There will not be a big s Instead, we will provide backward compatibility by deprecating renamed or moved config options, while still supporting the old way of setting them. -- Introducing a new setting or moving and old one should result in a key ``dataverse..``. - That way we enable sys admins to recognize the meaning of an option and avoid name conflicts. +- Introducing a new setting or moving an old one should result in a scoped key + ``dataverse..``. That way we enable sys admins to recognize the meaning of an option + and avoid name conflicts. Starting with ``dataverse`` makes it perfectly clear that this is a setting meant for this application, which is important when using environment variables, system properties or other MPCONFIG sources. -- Replace ``System.getProperty()`` calls with either injected configs or retrieve programmatically if more complex - handling is necessary. If you rename the property, you should provide an alias. See below. -- Database settings need to be refactored in multiple steps. First you need to change the code retrieving it to use - MicroProfile Config API instead (just like above). Then you should provide an alias to retain backward compatibility. - See below. +- Replace ``System.getProperty()`` calls with ``JvmSettings..lookup(...)``, adding the setting there first. + This might be paired with renaming and providing backward-compatible aliases. +- Database settings need to be refactored in multiple steps and it is not yet clear how this will be done. + Many Database settings are of very static nature and might be moved to JVM settings (in backward compatible ways). -Moving or Replacing a JVM Setting -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Adding a JVM Setting +^^^^^^^^^^^^^^^^^^^^ -When moving an old key to a new (especially when doing so with a former JVM system property setting), you should -add an alias to ``src/main/resources/META-INF/microprofile-aliases.properties`` to enable backward compatibility. -The format is always like ``dataverse..newname...=old.property.name``. +Whenever a new option gets added or an existing configuration gets migrated to +``edu.harvard.iq.dataverse.settings.JvmSettings``, you will attach the setting to an existing scope or create new +sub-scopes first. -Details can be found in ``edu.harvard.iq.dataverse.settings.source.AliasConfigSource`` +- Scopes and settings are organised in a tree-like structure within a single enum ``JvmSettings``. +- The root scope is "dataverse". +- All sub-scopes are below that. +- Scopes are separated by dots (periods). +- A scope may be a placeholder, filled with a variable during lookup. (Named object mapping.) -Aliasing Database Setting -^^^^^^^^^^^^^^^^^^^^^^^^^ +Any consumer of the setting can choose to use one of the fluent ``lookup()`` methods, which hides away alias handling, +conversion etc from consuming code. See also the detailed Javadoc for these methods. -When moving a database setting (``:ExampleSetting``), configure an alias -``dataverse.my.example.setting=dataverse.settings.fromdb.ExampleSetting`` in -``src/main/resources/META-INF/microprofile-aliases.properties``. This will enable backward compatibility. +Moving or Replacing a JVM Setting +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When moving an old key to a new (especially when doing so with a former JVM system property setting), you should +add an alias to the ``JvmSettings`` definition to enable backward compatibility. Old names given there are capable of +being used with patterned lookups. -A database setting with an i18n attribute using *lang* will have available language codes appended to the name. -Example: ``dataverse.settings.fromdb.ExampleI18nSetting.en``, ``dataverse.settings.fromdb.ExampleI18nSetting.de`` +Another option is to add the alias in ``src/main/resources/META-INF/microprofile-aliases.properties``. The format is +always like ``dataverse..newname...=old.property.name``. Note this doesn't provide support for patterned +aliases. -More details in ``edu.harvard.iq.dataverse.settings.source.DbSettingConfigSource`` +Details can be found in ``edu.harvard.iq.dataverse.settings.source.AliasConfigSource`` diff --git a/doc/sphinx-guides/source/developers/dev-environment.rst b/doc/sphinx-guides/source/developers/dev-environment.rst index 38549637f8a..e44a70a405f 100755 --- a/doc/sphinx-guides/source/developers/dev-environment.rst +++ b/doc/sphinx-guides/source/developers/dev-environment.rst @@ -79,15 +79,15 @@ On Linux, install ``jq`` from your package manager or download a binary from htt Install Payara ~~~~~~~~~~~~~~ -Payara 5.2021.6 or higher is required. +Payara 5.2022.3 or higher is required. To install Payara, run the following commands: ``cd /usr/local`` -``sudo curl -O -L https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2021.6/payara-5.2021.6.zip`` +``sudo curl -O -L https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip`` -``sudo unzip payara-5.2021.6.zip`` +``sudo unzip payara-5.2022.3.zip`` ``sudo chown -R $USER /usr/local/payara5`` diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index 064ed6f1b78..53fc11a5915 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -5,61 +5,72 @@ Making Releases .. contents:: |toctitle| :local: -Use the number of the milestone with a "v" in front for the release tag. For example: ``v4.6.2``. +Introduction +------------ -Create the release GitHub issue and branch ------------------------------------------- +See :doc:`version-control` for background on our branching strategy. + +The steps below describe making both normal releases and hotfix releases. + +Write Release Notes +------------------- + +Developers express the need for an addition to release notes by creating a file in ``/doc/release-notes`` containing the name of the issue they're working on. The name of the branch could be used for the filename with ".md" appended (release notes are written in Markdown) such as ``5053-apis-custom-homepage.md``. + +The task at or near release time is to collect these notes into a single doc. + +- Create an issue in GitHub to track the work of creating release notes for the upcoming release. +- Create a branch, add a .md file for the release (ex. 5.10.1 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the issue-specific release notes mentioned above. +- Delete the previously-created, issue-specific release notes as the content is added to the main release notes file. +- Take the release notes .md through the regular Code Review and QA process. -Use the GitHub issue number and the release tag for the name of the branch. -For example: 4734-update-v-4.8.6-to-4.9 +Create a GitHub Issue and Branch for the Release +------------------------------------------------ + +Usually we branch from the "develop" branch to create the release branch. If we are creating a hotfix for a particular version (5.11, for example), we branch from the tag (e.g. ``v5.11``). + +Use the GitHub issue number and the release tag for the name of the branch. (e.g. ``8583-update-version-to-v5.10.1`` **Note:** the changes below must be the very last commits merged into the develop branch before it is merged into master and tagged for the release! -Make the following changes in the release branch: +Make the following changes in the release branch. -1. Bump Version Numbers -======================= +Bump Version Numbers +-------------------- -Increment the version number to the milestone (e.g. 4.6.2) in the following two files: +Increment the version number to the milestone (e.g. 5.10.1) in the following two files: -- modules/dataverse-parent/pom.xml -> ```` -> ```` -- doc/sphinx-guides/source/conf.py (two places) +- modules/dataverse-parent/pom.xml -> ```` -> ```` (e.g. `pom.xml commit `_) +- doc/sphinx-guides/source/conf.py (two places, e.g. `conf.py commit `_) Add the version being released to the lists in the following two files: -- doc/sphinx-guides/source/versions.rst -- scripts/database/releases.txt +- doc/sphinx-guides/source/versions.rst (e.g. `versions.rst commit `_) -Here's an example commit where three of the four files above were updated at once: https://github.com/IQSS/dataverse/commit/99e23f96ec362ac2f524cb5cd80ca375fa13f196 -(Note: the version has been moved to a property in parent module since this commit was created.) +Check in the Changes Above into a Release Branch and Merge It +------------------------------------------------------------- -2. Check in the Changes Above... -================================ +For any ordinary release, make the changes above in the release branch you created, make a pull request, and merge it into the "develop" branch. Like usual, you can safely delete the branch after the merge is complete. -... into the release branch, make a pull request and merge the release branch into develop. +If you are making a hotfix release, make the pull request against the "master" branch. Do not delete the branch after merging because we will later merge it into the "develop" branch to pick up the hotfix. More on this later. +Either way, as usual, you should ensure that all tests are passing. Please note that you might need to bump the version in `jenkins.yml `_ in dataverse-ansible to get the tests to run. Merge "develop" into "master" ----------------------------- -The "develop" branch should be merged into "master" before tagging. See also the branching strategy described in the :doc:`version-control` section. +Note: If you are making a hotfix release, the "develop" branch is not involved so you can skip this step. -Write Release Notes -------------------- +The "develop" branch should be merged into "master" before tagging. -Developers should express the need for an addition to release notes by creating a file in ``/doc/release-notes`` containing the name of the issue they're working on. The name of the branch could be used for the filename with ".md" appended (release notes are written in Markdown) such as ``5053-apis-custom-homepage.md``. +Create a Draft Release on GitHub +-------------------------------- -At or near release time: +Create a draft release at https://github.com/IQSS/dataverse/releases/new -- Create an issue in Github to track the work of creating release notes for the upcoming release -- Create a branch, add a .md file for the release (ex. 4.16 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the issue-specific release notes mentioned above -- Delete the previously-created, issue-specific release notes as the content is added to the main release notes file -- Take the release notes .md through the regular Code Review and QA process -- Create a draft release at https://github.com/IQSS/dataverse/releases/new -- The "tag version" and "title" should be the number of the milestone with a "v" in front (i.e. v4.16). -- Copy in the content from the .md file -- For the description, follow post-4.16 examples at https://github.com/IQSS/dataverse/releases +The "tag version" and "title" should be the number of the milestone with a "v" in front (i.e. v5.10.1). +Copy in the content from the .md file created in the "Write Release Notes" steps above. Make Artifacts Available for Download ------------------------------------- @@ -70,11 +81,46 @@ Upload the following artifacts to the draft release you created: - installer (``cd scripts/installer && make``) - other files as needed, such as updated Solr schema and config files -Publish Release ---------------- +Publish the Release +------------------- Click the "Publish release" button. +Close Milestone on GitHub and Create a New One +---------------------------------------------- + +You can find our milestones at https://github.com/IQSS/dataverse/milestones + +Now that we've published the release, close the milestone and create a new one. + +Note that for milestones we use just the number without the "v" (e.g. "5.10.1"). + +Add the Release to the Dataverse Roadmap +---------------------------------------- + +Add an entry to the list of releases at https://www.iq.harvard.edu/roadmap-dataverse-project + +Announce the Release on the Dataverse Blog +------------------------------------------ + +Make a blog post at https://dataverse.org/blog + +Announce the Release on the Mailing List +---------------------------------------- + +Post a message at https://groups.google.com/g/dataverse-community + +For Hotfixes, Merge Hotfix Branch into "develop" and Rename SQL Scripts +----------------------------------------------------------------------- + +Note: this only applies to hotfixes! + +We've merged the hotfix into the "master" branch but now we need the fixes (and version bump) in the "develop" branch. Make a new branch off the hotfix branch and create a pull request against develop. Merge conflicts are possible and this pull request should go through review and QA like normal. Afterwards it's fine to delete this branch and the hotfix brach that was merged into master. + +Because of the hotfix version, any SQL scripts in "develop" should be renamed (from "5.11.0" to "5.11.1" for example). To read more about our naming conventions for SQL scripts, see :doc:`sql-upgrade-scripts`. + +Please note that version bumps and SQL script renaming both require all open pull requests to be updated with the latest from the "develop" branch so you might want to add any SQL script renaming to the hotfix branch before you put it through QA to be merged with develop. This way, open pull requests only need to be updated once. + ---- Previous: :doc:`containers` | Next: :doc:`tools` diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst index d1a71c313ca..3dc73ce6a0c 100644 --- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst +++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst @@ -88,6 +88,8 @@ If the client is unable to complete the multipart upload, it should call the abo curl -X DELETE "$SERVER_URL/api/datasets/mpload?..." +.. _direct-add-to-dataset-api: + Adding the Uploaded file to the Dataset --------------------------------------- @@ -117,7 +119,7 @@ Note that this API call can be used independently of the others, e.g. supporting With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. To add multiple Uploaded Files to the Dataset -------------------------------------------------- +--------------------------------------------- Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: diff --git a/doc/sphinx-guides/source/developers/testing.rst b/doc/sphinx-guides/source/developers/testing.rst index 7bde4055e33..4b3d5fd0a55 100755 --- a/doc/sphinx-guides/source/developers/testing.rst +++ b/doc/sphinx-guides/source/developers/testing.rst @@ -79,6 +79,22 @@ greatly extended parameterized testing. Some guidance how to write those: - https://blog.codefx.org/libraries/junit-5-parameterized-tests/ - See also some examples in our codebase. +JUnit 5 Test Helper Extensions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Our codebase provides little helpers to ease dealing with state during tests. +Some tests might need to change something which should be restored after the test ran. + +For unit tests, the most interesting part is to set a JVM setting just for the current test. +Please use the ``@JvmSetting(key = JvmSettings.XXX, value = "")`` annotation on a test method or +a test class to set and clear the property automatically. + +To set arbitrary system properties for the current test, a similar extension +``@SystemProperty(key = "", value = "")`` has been added. + +Both extensions will ensure the global state of system properties is non-interfering for +test executions. Tests using these extensions will be executed in serial. + Observing Changes to Code Coverage ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index c41d958fdea..aacc245af5a 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -46,6 +46,11 @@ Feature branches are used for both developing features and fixing bugs. They are "3728-doc-apipolicy-fix" is an example of a fine name for your feature branch. It tells us that you are addressing https://github.com/IQSS/dataverse/issues/3728 and the "slug" is short, descriptive, and starts with the issue number. +Hotfix Branches +*************** + +Hotfix branches are described under :doc:`making-releases`. + .. _how-to-make-a-pull-request: How to Make a Pull Request diff --git a/doc/sphinx-guides/source/developers/workflows.rst b/doc/sphinx-guides/source/developers/workflows.rst index df63bf239fe..38ca6f4e141 100644 --- a/doc/sphinx-guides/source/developers/workflows.rst +++ b/doc/sphinx-guides/source/developers/workflows.rst @@ -201,3 +201,31 @@ Note - the example step includes two settings required for any archiver, three ( } } + +ldnannounce ++++++++++++ + +An experimental step that sends a Linked Data Notification (LDN) message to a specific LDN Inbox announcing the publication/availability of a dataset meeting certain criteria. + +The two parameters are +* ``:LDNAnnounceRequiredFields`` - a list of metadata fields that must exist to trigger the message. Currently, the message also includes the values for these fields but future versions may only send the dataset's persistent identifier (making the receiver responsible for making a call-back to get any metadata). +* ``:LDNTarget`` - a JSON object containing an ``inbox`` key whose value is the URL of the target LDN inbox to which messages should be sent, e.g. ``{"id": "https://dashv7-dev.lib.harvard.edu","inbox": "https://dashv7-api-dev.lib.harvard.edu/server/ldn/inbox","type": "Service"}`` ). + +The supported message format is desribed by `our preliminary specification `_. The format is expected to change in the near future to match the standard for relationship announcements being developed as part of `the COAR Notify Project `_. + + +.. code:: json + + + { + "provider":":internal", + "stepType":"ldnannounce", + "parameters": { + "stepName":"LDN Announce" + }, + "requiredSettings": { + ":LDNAnnounceRequiredFields": "string", + ":LDNTarget": "string" + } + } + diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 5c227417271..f2de9d5702f 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -157,7 +157,7 @@ and restart Payara. The prefix can be configured via the API (where it is referr Once this is done, you will be able to publish datasets and files, but the persistent identifiers will not be citable, and they will only resolve from the DataCite test environment (and then only if the Dataverse installation from which you published them is accessible - DOIs minted from your laptop will not resolve). Note that any datasets or files created using the test configuration cannot be directly migrated and would need to be created again once a valid DOI namespace is configured. -To properly configure persistent identifiers for a production installation, an account and associated namespace must be acquired for a fee from a DOI or HDL provider. **DataCite** (https://www.datacite.org) is the recommended DOI provider (see https://dataverse.org/global-dataverse-community-consortium for more on joining DataCite) but **EZID** (http://ezid.cdlib.org) is an option for the University of California according to https://www.cdlib.org/cdlinfo/2017/08/04/ezid-doi-service-is-evolving/ . **Handle.Net** (https://www.handle.net) is the HDL provider. +To properly configure persistent identifiers for a production installation, an account and associated namespace must be acquired for a fee from a DOI or HDL provider. **DataCite** (https://www.datacite.org) is the recommended DOI provider (see https://dataversecommunity.global for more on joining DataCite) but **EZID** (http://ezid.cdlib.org) is an option for the University of California according to https://www.cdlib.org/cdlinfo/2017/08/04/ezid-doi-service-is-evolving/ . **Handle.Net** (https://www.handle.net) is the HDL provider. Once you have your DOI or Handle account credentials and a namespace, configure your Dataverse installation to use them using the JVM options and database settings below. @@ -238,13 +238,17 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. -File Storage: Using a Local Filesystem and/or Swift and/or object stores ------------------------------------------------------------------------- +.. _file-storage: + +File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores +----------------------------------------------------------------------------------------------------- By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara5/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. A Dataverse installation can alternately store files in a Swift or S3-compatible object store, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis. +A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a web-accessible trusted remote store. + The following sections describe how to set up various types of stores and how to configure for multiple stores. Multi-store Basics @@ -600,7 +604,7 @@ Reported Working S3-Compatible Storage possibly slow) https://play.minio.io:9000 service. `StorJ Object Store `_ - StorJ is a distributed object store that can be configured with an S3 gateway. Per the S3 Storage instructions above, you'll first set up the StorJ S3 store by defining the id, type, and label. After following the general installation, set the following configurations to use a StorJ object store: ``dataverse.files..payload-signing=true`` and ``dataverse.files..chunked-encoding=false``. + StorJ is a distributed object store that can be configured with an S3 gateway. Per the S3 Storage instructions above, you'll first set up the StorJ S3 store by defining the id, type, and label. After following the general installation, set the following configurations to use a StorJ object store: ``dataverse.files..payload-signing=true`` and ``dataverse.files..chunked-encoding=false``. For step-by-step instructions see https://docs.storj.io/dcs/how-tos/dataverse-integration-guide/ Note that for direct uploads and downloads, Dataverse redirects to the proxy-url but presigns the urls based on the ``dataverse.files..custom-endpoint-url``. Also, note that if you choose to enable ``dataverse.files..download-redirect`` the S3 URLs expire after 60 minutes by default. You can change that minute value to reflect a timeout value that’s more appropriate by using ``dataverse.files..url-expiration-minutes``. @@ -663,6 +667,40 @@ Migrating from Local Storage to S3 Is currently documented on the :doc:`/developers/deployment` page. +.. _trusted-remote-storage: + +Trusted Remote Storage +++++++++++++++++++++++ + +In addition to having the type "remote" and requiring a label, Trusted Remote Stores are defined in terms of a baseURL - all files managed by this store must be at a path starting with this URL, and a baseStore - a file, s3, or swift store that can be used to store additional ancillary dataset files (e.g. metadata exports, thumbnails, auxiliary files, etc.). +These and other available options are described in the table below. + +Trusted remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity +and/or managing access to a secure enclave. See :doc:`/developers/big-data-support` for additional information on how to use a trusted remote store. For specific remote stores, consult their documentation when configuring the remote store in your Dataverse installation. + +Note that in the current implementation, activites where Dataverse needs access to data bytes, e.g. to create thumbnails or validate hash values at publication will fail if a remote store does not allow Dataverse access. Implementers of such trusted remote stores should consider using Dataverse's settings to disable ingest, validation of files at publication, etc. as needed. + +Once you have configured a trusted remote store, you can point your users to the :ref:`add-remote-file-api` section of the API Guide. + +.. table:: + :align: left + + =========================================== ================== ========================================================================== =================== + JVM Option Value Description Default value + =========================================== ================== ========================================================================== =================== + dataverse.files..type ``remote`` **Required** to mark this storage as remote. (none) + dataverse.files..label **Required** label to be shown in the UI for this storage. (none) + dataverse.files..base-url **Required** All files must have URLs of the form /* . (none) + dataverse.files..base-store **Optional** The id of a base store (of type file, s3, or swift). (the default store) + dataverse.files..download-redirect ``true``/``false`` Enable direct download (should usually be true). ``false`` + dataverse.files..secret-key A key used to sign download requests sent to the remote store. Optional. (none) + dataverse.files..url-expiration-minutes If direct downloads and using signing: time until links expire. Optional. 60 + dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional. (none) + dataverse.files..remote-store-url A url to an info page about the remote store used in the UI. Optional. (none) + + =========================================== ================== ========================================================================== =================== + + .. _Branding Your Installation: @@ -941,6 +979,16 @@ Some external tools are also ready to be translated, especially if they are usin .. _dataverse-internationalization-wg: https://groups.google.com/forum/#!forum/dataverse-internationalization-wg + +Tools for Translators ++++++++++++++++++++++ + +The list below depicts a set of tools that can be used to ease the amount of work necessary for translating the Dataverse software by facilitating this collaborative effort and enabling the reuse of previous work: + +- `Weblate for the Dataverse Software `_, made available in the scope of the `SSHOC `_ project. + +- `easyTranslationHelper `_, a tool developed by `University of Aveiro `_. + .. _Web-Analytics-Code: Web Analytics Code @@ -988,12 +1036,14 @@ Once this script is running, you can look in the Google Analytics console (Realt Configuring Licenses -------------------- -Out of the box, users select from the following licenses or terms: +On a new Dataverse installation, users may select from the following licenses or terms: - CC0 1.0 (default) - CC BY 4.0 - Custom Dataset Terms +(Note that existing Dataverse installations which are upgraded from 5.9 or previous will only offer CC0 1.0, added automatically during the upgrade to version 5.10.) + You have a lot of control over which licenses and terms are available. You can remove licenses and add new ones. You can decide which license is the default. You can remove "Custom Dataset Terms" as a option. You can remove all licenses and make "Custom Dataset Terms" the only option. Before making changes, you are encouraged to read the :ref:`license-terms` section of the User Guide about why CC0 is the default and what the "Custom Dataset Terms" option allows. @@ -1063,18 +1113,22 @@ BagIt file handler configuration settings: BagIt Export ------------ -Your Dataverse installation may be configured to submit a copy of published Datasets, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ bags to `Chronopolis `_ via `DuraCloud `_ or alternately to any folder on the local filesystem. +Your Dataverse installation may be configured to submit a copy of published Datasets, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ archival Bags (sometimes called BagPacks) to `Chronopolis `_ via `DuraCloud `_ or alternately to any folder on the local filesystem. + +These archival Bags include all of the files and metadata in a given dataset version and are sufficient to recreate the dataset, e.g. in a new Dataverse instance, or postentially in another RDA-conformant repository. The Dataverse Software offers an internal archive workflow which may be configured as a PostPublication workflow via an admin API call to manually submit previously published Datasets and prior versions to a configured archive such as Chronopolis. The workflow creates a `JSON-LD `_ serialized `OAI-ORE `_ map file, which is also available as a metadata export format in the Dataverse Software web interface. -At present, the DPNSubmitToArchiveCommand, LocalSubmitToArchiveCommand, and GoogleCloudSubmitToArchive are the only implementations extending the AbstractSubmitToArchiveCommand and using the configurable mechanisms discussed below. +At present, archiving classes include the DuraCloudSubmitToArchiveCommand, LocalSubmitToArchiveCommand, GoogleCloudSubmitToArchive, and S3SubmitToArchiveCommand , which all extend the AbstractSubmitToArchiveCommand and use the configurable mechanisms discussed below. (A DRSSubmitToArchiveCommand, which works with Harvard's DRS also exists and, while specific to DRS, is a useful example of how Archivers can support single-version-only semantics and support archiving only from specified collections (with collection specific parameters)). + +All current options support the archival status APIs and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). .. _Duracloud Configuration: Duracloud Configuration +++++++++++++++++++++++ -Also note that while the current Chronopolis implementation generates the bag and submits it to the archive's DuraCloud interface, the step to make a 'snapshot' of the space containing the Bag (and verify it's successful submission) are actions a curator must take in the DuraCloud interface. +Also note that while the current Chronopolis implementation generates the archival Bag and submits it to the archive's DuraCloud interface, the step to make a 'snapshot' of the space containing the archival Bag (and verify it's successful submission) are actions a curator must take in the DuraCloud interface. The minimal configuration to support an archiver integration involves adding a minimum of two Dataverse Software Keys and any required Payara jvm options. The example instructions here are specific to the DuraCloud Archiver\: @@ -1098,7 +1152,7 @@ It also can use one setting that is common to all Archivers: :BagGeneratorThread ``curl http://localhost:8080/api/admin/settings/:BagGeneratorThreads -X PUT -d '8'`` -By default, the Bag generator zips two datafiles at a time when creating the Bag. This setting can be used to lower that to 1, i.e. to decrease system load, or to increase it, e.g. to 4 or 8, to speed processing of many small files. +By default, the Bag generator zips two datafiles at a time when creating the archival Bag. This setting can be used to lower that to 1, i.e. to decrease system load, or to increase it, e.g. to 4 or 8, to speed processing of many small files. Archivers may require JVM options as well. For the Chronopolis archiver, the username and password associated with your organization's Chronopolis/DuraCloud account should be configured in Payara: @@ -1115,7 +1169,7 @@ ArchiverClassName - the fully qualified class to be used for archiving. For exam ``curl -X PUT -d "edu.harvard.iq.dataverse.engine.command.impl.LocalSubmitToArchiveCommand" http://localhost:8080/api/admin/settings/:ArchiverClassName`` -\:BagItLocalPath - the path to where you want to store BagIt. For example\: +\:BagItLocalPath - the path to where you want to store the archival Bags. For example\: ``curl -X PUT -d /home/path/to/storage http://localhost:8080/api/admin/settings/:BagItLocalPath`` @@ -1130,7 +1184,7 @@ ArchiverClassName - the fully qualified class to be used for archiving. For exam Google Cloud Configuration ++++++++++++++++++++++++++ -The Google Cloud Archiver can send Dataverse Project Bags to a bucket in Google's cloud, including those in the 'Coldline' storage class (cheaper, with slower access) +The Google Cloud Archiver can send Dataverse Archival Bags to a bucket in Google's cloud, including those in the 'Coldline' storage class (cheaper, with slower access) ``curl http://localhost:8080/api/admin/settings/:ArchiverClassName -X PUT -d "edu.harvard.iq.dataverse.engine.command.impl.GoogleCloudSubmitToArchiveCommand"`` @@ -1154,14 +1208,39 @@ For example: ``cp /usr/local/payara5/glassfish/domains/domain1/files/googlecloudkey.json`` +.. _S3 Archiver Configuration: + +S3 Configuration +++++++++++++++++ + +The S3 Archiver can send Dataverse Archival Bag to a bucket at any S3 endpoint. The configuration for the S3 Archiver is independent of any S3 store that may be configured in Dataverse and may, for example, leverage colder (cheaper, slower access) storage. + +``curl http://localhost:8080/api/admin/settings/:ArchiverClassName -X PUT -d "edu.harvard.iq.dataverse.engine.command.impl.S3SubmitToArchiveCommand"`` + +``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":S3ArchiverConfig, :BagGeneratorThreads"`` + +The S3 Archiver defines one custom setting, a required :S3ArchiverConfig. It can also use the :BagGeneratorThreads setting as described in the DuraCloud Configuration section above. + +The credentials for your S3 account, can be stored in a profile in a standard credentials file (e.g. ~/.aws/credentials) referenced via "profile" key in the :S3ArchiverConfig setting (will default to the default entry), or can via MicroProfile settings as described for S3 stores (dataverse.s3archiver.access-key and dataverse.s3archiver.secret-key) + +The :S3ArchiverConfig setting is a JSON object that must include an "s3_bucket_name" and may include additional S3-related parameters as described for S3 Stores, including "profile", "connection-pool-size","custom-endpoint-url", "custom-endpoint-region", "path-style-access", "payload-signing", and "chunked-encoding". + +\:S3ArchiverConfig - minimally includes the name of the bucket to use. For example: + +``curl http://localhost:8080/api/admin/settings/:S3ArchiverConfig -X PUT -d '{"s3_bucket_name":"archival-bucket"}'`` + +\:S3ArchiverConfig - example to also set the name of an S3 profile to use. For example: + +``curl http://localhost:8080/api/admin/settings/:S3ArchiverConfig -X PUT -d '{"s3_bucket_name":"archival-bucket", "profile":"archiver"}'`` + .. _Archiving API Call: -API Call -++++++++ +API Calls ++++++++++ -Once this configuration is complete, you, as a user with the *PublishDataset* permission, should be able to use the API call to manually submit a DatasetVersion for processing: +Once this configuration is complete, you, as a user with the *PublishDataset* permission, should be able to use the admin API call to manually submit a DatasetVersion for processing: -``curl -H "X-Dataverse-key: " http://localhost:8080/api/admin/submitDataVersionToArchive/{id}/{version}`` +``curl -X POST -H "X-Dataverse-key: " http://localhost:8080/api/admin/submitDatasetVersionToArchive/{id}/{version}`` where: @@ -1169,10 +1248,22 @@ where: ``{version}`` is the friendly version number, e.g. "1.2". -The submitDataVersionToArchive API (and the workflow discussed below) attempt to archive the dataset version via an archive specific method. For Chronopolis, a DuraCloud space named for the dataset (it's DOI with ':' and '.' replaced with '-') is created and two files are uploaded to it: a version-specific datacite.xml metadata file and a BagIt bag containing the data and an OAI-ORE map file. (The datacite.xml file, stored outside the Bag as well as inside is intended to aid in discovery while the ORE map file is 'complete', containing all user-entered metadata and is intended as an archival record.) +The submitDatasetVersionToArchive API (and the workflow discussed below) attempt to archive the dataset version via an archive specific method. For Chronopolis, a DuraCloud space named for the dataset (it's DOI with ':' and '.' replaced with '-') is created and two files are uploaded to it: a version-specific datacite.xml metadata file and a BagIt bag containing the data and an OAI-ORE map file. (The datacite.xml file, stored outside the Bag as well as inside is intended to aid in discovery while the ORE map file is 'complete', containing all user-entered metadata and is intended as an archival record.) In the Chronopolis case, since the transfer from the DuraCloud front-end to archival storage in Chronopolis can take significant time, it is currently up to the admin/curator to submit a 'snap-shot' of the space within DuraCloud and to monitor its successful transfer. Once transfer is complete the space should be deleted, at which point the Dataverse Software API call can be used to submit a Bag for other versions of the same Dataset. (The space is reused, so that archival copies of different Dataset versions correspond to different snapshots of the same DuraCloud space.). +A batch version of this admin api call is also available: + +``curl -X POST -H "X-Dataverse-key: " 'http://localhost:8080/api/admin/archiveAllUnarchivedDatasetVersions?listonly=true&limit=10&latestonly=true'`` + +The archiveAllUnarchivedDatasetVersions call takes 3 optional configuration parameters. +* listonly=true will cause the API to list dataset versions that would be archived but will not take any action. +* limit= will limit the number of dataset versions archived in one api call to <= . +* latestonly=true will limit archiving to only the latest published versions of datasets instead of archiving all unarchived versions. + +Note that because archiving is done asynchronously, the calls above will return OK even if the user does not have the *PublishDataset* permission on the dataset(s) involved. Failures are indocated in the log and the archivalStatus calls in the native api can be used to check the status as well. + + PostPublication Workflow ++++++++++++++++++++++++ @@ -1427,10 +1518,6 @@ doi.baseurlstring As of this writing, "https://mds.datacite.org" (DataCite) and "https://ezid.cdlib.org" (EZID) are the main valid values. -While the above two options are recommended because they have been tested by the Dataverse Project Team, it is also possible to use a DataCite Client API as a proxy to DataCite. In this case, requests made to the Client API are captured and passed on to DataCite for processing. The application will interact with the DataCite Client API exactly as if it were interacting directly with the DataCite API, with the only difference being the change to the base endpoint URL. - -For example, the Australian Data Archive (ADA) successfully uses the Australian National Data Service (ANDS) API (a proxy for DataCite) to mint their DOIs through their Dataverse installation using a ``doi.baseurlstring`` value of "https://researchdata.ands.org.au/api/doi/datacite" as documented at https://documentation.ands.org.au/display/DOC/ANDS+DataCite+Client+API . As ADA did for ANDS DOI minting, any DOI provider (and their corresponding DOI configuration parameters) other than DataCite must be tested with the Dataverse Software to establish whether or not it will function properly. - Out of the box, the Dataverse Software is configured to use a test MDS DataCite base URL string. You can delete it like this: ``./asadmin delete-jvm-options '-Ddoi.baseurlstring=https\://mds.test.datacite.org'`` @@ -2369,6 +2456,30 @@ In the case you get garbled characters in Shibboleth-supplied fields (e.g. given If you managed to get correct accented characters from shibboleth while this setting is _false_, please contact us with your application server and Shibboleth configuration! +:ShibAffiliationOrder ++++++++++++++++++++++ + +Will select the last or first value of an array in affiliation, the array separator can be set using ``:ShibAffiliationSeparator`` . + +To select the last value : + +``curl -X PUT -d "lastAffiliation" http://localhost:8080/api/admin/settings/:ShibAffiliationOrder`` + +To select the first value : + +``curl -X PUT -d "firstAffiliation" http://localhost:8080/api/admin/settings/:ShibAffiliationOrder`` + + +:ShibAffiliationSeparator ++++++++++++++++++++++++++ + +Set the separator to be used for ``:ShibAffiliationOrder``. +Default separator : ";" + +To change the separator : + +``curl -X PUT -d ";" http://localhost:8080/api/admin/settings/:ShibAffiliationSeparator`` + .. _:ComputeBaseUrl: :ComputeBaseUrl @@ -2566,7 +2677,7 @@ Number of errors to display to the user when creating DataFiles from a file uplo .. _:BagItHandlerEnabled: :BagItHandlerEnabled -+++++++++++++++++++++ +++++++++++++++++++++ Part of the database settings to configure the BagIt file handler. Enables the BagIt file handler. By default, the handler is disabled. @@ -2643,6 +2754,12 @@ This is the local file system path to be used with the LocalSubmitToArchiveComma These are the bucket and project names to be used with the GoogleCloudSubmitToArchiveCommand class. Further information is in the :ref:`Google Cloud Configuration` section above. +:S3ArchiverConfig ++++++++++++++++++ + +This is the JSON configuration object setting to be used with the S3SubmitToArchiveCommand class. Further information is in the :ref:`S3 Archiver Configuration` section above. + + .. _:InstallationName: :InstallationName @@ -2715,6 +2832,23 @@ Scripts that implement this association for specific service protocols are maint ``curl -X PUT --upload-file cvoc-conf.json http://localhost:8080/api/admin/settings/:CVocConf`` +.. _:ControlledVocabularyCustomJavaScript: + +:ControlledVocabularyCustomJavaScript ++++++++++++++++++++++++++++++++++++++ + +``:ControlledVocabularyCustomJavaScript`` allows a JavaScript file to be loaded into the dataset page for the purpose of showing controlled vocabulary as a list (with optionally translated values) such as author names. + +To specify the URL for a custom script ``covoc.js`` to be loaded from an external site: + +``curl -X PUT -d 'https://example.com/js/covoc.js' http://localhost:8080/api/admin/settings/:ControlledVocabularyCustomJavaScript`` + +To remove the custom script URL: + +``curl -X DELETE http://localhost:8080/api/admin/settings/:ControlledVocabularyCustomJavaScript`` + +Please note that :ref:`:CVocConf` is a better option if the list is large or needs to be searchable from an external service using protocols such as SKOSMOS. + .. _:AllowedCurationLabels: :AllowedCurationLabels @@ -2849,3 +2983,51 @@ For configuration details, see :ref:`mute-notifications`. Overrides the default empty list of never muted notifications. Never muted notifications cannot be muted by the users. Always muted notifications are grayed out and are not adjustable by the user. For configuration details, see :ref:`mute-notifications`. + +:LDNMessageHosts +++++++++++++++++ + +The comma-separated list of hosts allowed to send Dataverse Linked Data Notification messages. See :doc:`/api/linkeddatanotification` for details. ``*`` allows messages from anywhere (not recommended for production). By default, messages are not accepted from anywhere. + + +:LDN_TARGET ++++++++++++ + +The URL of an LDN Inbox to which the LDN Announce workflow step will send messages. See :doc:`/developers/workflows` for details. + +:LDNAnnounceRequiredFields +++++++++++++++++++++++++++ + +The list of parent dataset field names for which the LDN Announce workflow step should send messages. See :doc:`/developers/workflows` for details. + +.. _:GlobusBasicToken: + +:GlobusBasicToken ++++++++++++++++++ + +GlobusBasicToken encodes credentials for Globus integration. See :ref:`globus-support` for details. + +:GlobusEndpoint ++++++++++++++++ + +GlobusEndpoint is Globus endpoint id used with Globus integration. See :ref:`globus-support` for details. + +:GlobusStores ++++++++++++++ + +A comma-separated list of the S3 stores that are configured to support Globus integration. See :ref:`globus-support` for details. + +:GlobusAppURL ++++++++++++++ + +The URL where the `dataverse-globus `_ "transfer" app has been deployed to support Globus integration. See :ref:`globus-support` for details. + +:GlobusPollingInterval +++++++++++++++++++++++ + +The interval in seconds between Dataverse calls to Globus to check on upload progress. Defaults to 50 seconds. See :ref:`globus-support` for details. + +:GlobusSingleFileTransfer ++++++++++++++++++++++++++ + +A true/false option to add a Globus transfer option to the file download menu which is not yet fully supported in the dataverse-globus app. See :ref:`globus-support` for details. diff --git a/doc/sphinx-guides/source/installation/installation-main.rst b/doc/sphinx-guides/source/installation/installation-main.rst index 430010261b1..4b000f1ef9e 100755 --- a/doc/sphinx-guides/source/installation/installation-main.rst +++ b/doc/sphinx-guides/source/installation/installation-main.rst @@ -207,7 +207,7 @@ Fresh Reinstall Early on when you're installing the Dataverse Software, you may think, "I just want to blow away what I've installed and start over." That's fine. You don't have to uninstall the various components like Payara, PostgreSQL and Solr, but you should be conscious of how to clear out their data. For Payara, a common helpful process is to: - Stop Payara; -- Remove the ``generated`` and ``osgi-cache`` directories; +- Remove the ``generated``, ``lib/databases`` and ``osgi-cache`` directories from the ``domain1`` directory; - Start Payara Drop database diff --git a/doc/sphinx-guides/source/installation/prep.rst b/doc/sphinx-guides/source/installation/prep.rst index c841cd55fb3..c491659cd56 100644 --- a/doc/sphinx-guides/source/installation/prep.rst +++ b/doc/sphinx-guides/source/installation/prep.rst @@ -27,11 +27,12 @@ Advanced Installation There are some community-lead projects to use configuration management tools such as Ansible and Puppet to automate the installation and configuration of the Dataverse Software, but support for these solutions is limited to what the Dataverse Community can offer as described in each project's webpage: - https://github.com/GlobalDataverseCommunityConsortium/dataverse-ansible +- https://gitlab.com/lip-computing/dataverse - https://github.com/IQSS/dataverse-puppet (Please note that the "dataverse-ansible" repo is used in a script that allows the Dataverse Software to be installed on Amazon Web Services (AWS) from arbitrary GitHub branches as described in the :doc:`/developers/deployment` section of the Developer Guide.) -The Dataverse Project team is happy to "bless" additional community efforts along these lines (i.e. Docker, Chef, Salt, etc.) by creating a repo under https://github.com/GlobalDataverseCommunityConsortium and managing team access. +The Dataverse Project team is happy to "bless" additional community efforts along these lines (i.e. Docker, Chef, Salt, etc.) by creating a repo under https://github.com/gdcc and managing team access. The Dataverse Software permits a fair amount of flexibility in where you choose to install the various components. The diagram below shows a load balancer, multiple proxies and web servers, redundant database servers, and offloading of potentially resource intensive work to a separate server. (Glassfish is shown rather than Payara.) diff --git a/doc/sphinx-guides/source/installation/prerequisites.rst b/doc/sphinx-guides/source/installation/prerequisites.rst index 0ad3bf600c9..3cf876a2251 100644 --- a/doc/sphinx-guides/source/installation/prerequisites.rst +++ b/doc/sphinx-guides/source/installation/prerequisites.rst @@ -44,7 +44,7 @@ On RHEL/derivative you can make Java 11 the default with the ``alternatives`` co Payara ------ -Payara 5.2021.6 is recommended. Newer versions might work fine, regular updates are recommended. +Payara 5.2022.3 is recommended. Newer versions might work fine, regular updates are recommended. Installing Payara ================= @@ -55,8 +55,8 @@ Installing Payara - Download and install Payara (installed in ``/usr/local/payara5`` in the example commands below):: - # wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2021.6/payara-5.2021.6.zip - # unzip payara-5.2021.6.zip + # wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip + # unzip payara-5.2022.3.zip # mv payara5 /usr/local If you intend to install and run Payara under a service account (and we hope you do), chown -R the Payara hierarchy to root to protect it but give the service account access to the below directories: diff --git a/doc/sphinx-guides/source/style/index.rst b/doc/sphinx-guides/source/style/index.rst index ba6995e1b53..0e93716e146 100755 --- a/doc/sphinx-guides/source/style/index.rst +++ b/doc/sphinx-guides/source/style/index.rst @@ -14,3 +14,4 @@ This style guide is meant to help developers implement clear and appropriate UI foundations patterns + text diff --git a/doc/sphinx-guides/source/style/text.rst b/doc/sphinx-guides/source/style/text.rst new file mode 100644 index 00000000000..4fb2352300c --- /dev/null +++ b/doc/sphinx-guides/source/style/text.rst @@ -0,0 +1,12 @@ +Text +++++ + +Here we describe the guidelines that help us provide helpful, clear and consistent textual information to users. + +.. contents:: |toctitle| + :local: + +Metadata Text Guidelines +======================== + +These guidelines are maintained in `a Google Doc `__ as we expect to make frequent changes to them. We welcome comments in the Google Doc. \ No newline at end of file diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index 003f02cdd61..b05459b6aaf 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -8,6 +8,8 @@ Additional documentation complementary to the User Guide. .. contents:: |toctitle| :local: +.. _metadata-references: + Metadata References ====================== @@ -15,16 +17,28 @@ The Dataverse Project is committed to using standard-compliant metadata to ensur metadata can be mapped easily to standard metadata schemas and be exported into JSON format (XML for tabular file metadata) for preservation and interoperability. +Supported Metadata +~~~~~~~~~~~~~~~~~~ + Detailed below are what metadata schemas we support for Citation and Domain Specific Metadata in the Dataverse Project: -- `Citation Metadata `__: compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 3.1 `__, and Dublin Core's `DCMI Metadata Terms `__ (`see .tsv version `__). Language field uses `ISO 639-1 `__ controlled vocabulary. -- `Geospatial Metadata `__: compliant with DDI Lite, DDI 2.5 Codebook, DataCite, and Dublin Core (`see .tsv version `__). Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. -- `Social Science & Humanities Metadata `__: compliant with DDI Lite, DDI 2.5 Codebook, and Dublin Core (`see .tsv version `__). -- `Astronomy and Astrophysics Metadata `__ - : These metadata elements can be mapped/exported to the International Virtual Observatory Alliance’s (IVOA) +- `Citation Metadata `__ (`see .tsv version `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 3.1 `__, and Dublin Core's `DCMI Metadata Terms `__ . Language field uses `ISO 639-1 `__ controlled vocabulary. +- `Geospatial Metadata `__ (`see .tsv version `__): compliant with DDI Lite, DDI 2.5 Codebook, DataCite, and Dublin Core. Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. +- `Social Science & Humanities Metadata `__ (`see .tsv version `__): compliant with DDI Lite, DDI 2.5 Codebook, and Dublin Core. +- `Astronomy and Astrophysics Metadata `__ (`see .tsv version `__): These metadata elements can be mapped/exported to the International Virtual Observatory Alliance’s (IVOA) `VOResource Schema format `__ and is based on - `Virtual Observatory (VO) Discovery and Provenance Metadata `__ (`see .tsv version `__). -- `Life Sciences Metadata `__: based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__ (`see .tsv version `__). -- `Journal Metadata `__: based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__ (`see .tsv version `__). + `Virtual Observatory (VO) Discovery and Provenance Metadata `__. +- `Life Sciences Metadata `__ (`see .tsv version `__): based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__. +- `Journal Metadata `__ (`see .tsv version `__): based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__. + +Experimental Metadata +~~~~~~~~~~~~~~~~~~~~~ + +Unlike supported metadata, experimental metadata is not enabled by default in a new Dataverse installation. Feedback via any `channel `_ is welcome! + +- `Computational Workflow Metadata `__ (`see .tsv version `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. + +See Also +~~~~~~~~ See also the `Dataverse Software 4.0 Metadata Crosswalk: DDI, DataCite, DC, DCTerms, VO, ISA-Tab `__ document and the :doc:`/admin/metadatacustomization` section of the Admin Guide. diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index 10b770bd09b..77a760ef838 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -38,13 +38,15 @@ Once a dataset has been published, its metadata can be exported in a variety of Each of these metadata exports contains the metadata of the most recently published version of the dataset. +.. _adding-new-dataset: + Adding a New Dataset ==================== #. Navigate to the Dataverse collection in which you want to add a dataset. #. Click on the "Add Data" button and select "New Dataset" in the dropdown menu. **Note:** If you are on the root Dataverse collection, your My Data page or click the "Add Data" link in the navbar, the dataset you create will be hosted in the root Dataverse collection. You can change this by selecting another Dataverse collection you have proper permissions to create datasets in, from the Host Dataverse collection dropdown in the create dataset form. This option to choose will not be available after you create the dataset. -#. To quickly get started, enter at minimum all the required fields with an asterisk (e.g., the Dataset Title, Author, - Description, Contact Email and Subject) to get a Data Citation with a DOI. +#. To quickly get started, enter at minimum all the required fields with an asterisk (e.g., the Dataset Title, Author Name, + Description Text, Point of Contact Email, and Subject) to get a Data Citation with a DOI. #. Scroll down to the "Files" section and click on "Select Files to Add" to add all the relevant files to your Dataset. You can also upload your files directly from your Dropbox. **Tip:** You can drag and drop or select multiple files at a time from your desktop directly into the upload widget. Your files will appear below the "Select Files to Add" button where you can add a @@ -62,6 +64,8 @@ We currently only support the following HTML tags for any of our textbox metadat
, , ,
,
,
, ,
,

-

, , , ,
  • ,
      ,

      ,

      , , , , 
       , , , 
        . +.. _dataset-file-upload: + File Upload =========== @@ -129,15 +133,15 @@ The open-source DVUploader tool is a stand-alone command-line Java application t Usage ~~~~~ -The DVUploader is open source and is available as source, as a Java jar, and with documentation at https://github.com/IQSS/dataverse-uploader. The DVUploader requires Java 1.8+. Users will need to install Java if they don't already have it and then download the DVUploader-v1.0.0.jar file. Users will need to know the URL of the Dataverse installation, the DOI of their existing dataset, and have generated an API Key for the Dataverse installation (an option in the user's profile menu). +The DVUploader is open source and is available as source, as a Java jar, and with documentation at https://github.com/GlobalDataverseCommunityConsortium/dataverse-uploader. The DVUploader requires Java 1.8+. Users will need to install Java if they don't already have it and then download the latest release of the DVUploader - jar file. Users will need to know the URL of the Dataverse installation, the DOI of their existing dataset, and have generated an API Key for the Dataverse installation (an option in the user's profile menu). Basic usage is to run the command: :: - java -jar DVUploader-v1.0.0.jar -server= -did= -key= + java -jar DVUploader-*.jar -server= -did= -key= Additional command line arguments are available to make the DVUploader list what it would do without uploading, limit the number of files it uploads, recurse through sub-directories, verify fixity, exclude files with specific extensions or name patterns, and/or wait longer than 60 seconds for any Dataverse installation ingest lock to clear (e.g. while the previously uploaded file is processed, as discussed in the :ref:`File Handling ` section below). -DVUploader is a community-developed tool, and its creation was primarily supported by the Texas Digital Library. Further information and support for DVUploader can be sought at `the project's GitHub repository `_ . +DVUploader is a community-developed tool, and its creation was primarily supported by the Texas Digital Library. Further information and support for DVUploader can be sought at `the project's GitHub repository `_ . .. _duplicate-files: @@ -153,6 +157,19 @@ Beginning with Dataverse Software 5.0, the way a Dataverse installation handles - If a user attempts to replace a file with another file that has the same checksum, an error message will be displayed and the file will not be able to be replaced. - If a user attempts to replace a file with a file that has the same checksum as a different file in the dataset, a warning will be displayed. +BagIt Support +------------- + +BagIt is a set of hierarchical file system conventions designed to support disk-based storage and network transfer of arbitrary digital content. It offers several benefits such as integration with digital libraries, easy implementation, and transfer validation. See `the Wikipedia article `__ for more information. + +If the Dataverse installation you are using has enabled BagIt file handling, when uploading BagIt files the repository will validate the checksum values listed in each BagIt’s manifest file against the uploaded files and generate errors about any mismatches. The repository will identify a certain number of errors, such as the first five errors in each BagIt file, before reporting the errors. + +|bagit-image1| + +You can fix the errors and reupload the BagIt files. + +More information on how your admin can enable and configure the BagIt file handler can be found in the :ref:`Installation Guide `. + .. _file-handling: File Handling @@ -211,6 +228,72 @@ Finally, automating your code can be immensely helpful to the code and research **Note:** Capturing code dependencies and automating your code will create new files in your directory. Make sure to include them when depositing your dataset. +Computational Workflow +---------------------- + +Computational Workflow Definition +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Computational workflows precisely describe a multi-step process to coordinate multiple computational tasks and their data dependencies that lead to data products in a scientific application. The computational tasks take different forms, such as running code (e.g. Python, C++, MATLAB, R, Julia), invoking a service, calling a command-line tool, accessing a database (e.g. SQL, NoSQL), submitting a job to a compute cloud (e.g. on-premises cloud, AWS, GCP, Azure), and execution of data processing scripts or workflow. The following diagram shows an example of a computational workflow with multiple computational tasks. + +|cw-image1| + + +FAIR Computational Workflow +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The FAIR Principles (Findable, Accessible, Interoperable, Reusable) apply to computational workflows (https://doi.org/10.1162/dint_a_00033) in two areas: as FAIR data and as FAIR criteria for workflows as digital objects. In the FAIR data area, "*properly designed workflows contribute to FAIR data principles since they provide the metadata and provenance necessary to describe their data products, and they describe the involved data in a formalized, completely traceable way*" (https://doi.org/10.1162/dint_a_00033). Regarding the FAIR criteria for workflows as digital objects, "*workflows are research products in their own right, encapsulating methodological know-how that is to be found and published, accessed and cited, exchanged and combined with others, and reused as well as adapted*" (https://doi.org/10.1162/dint_a_00033). + +How to Create a Computational Workflow +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are multiple approaches to creating computational workflows. You may consider standard frameworks and tools such as Common Workflow Language (CWL), Snakemake, Galaxy, Nextflow, Ruffus or *ad hoc* methods using different programming languages (e.g. Python, C++, MATLAB, Julia, R), notebooks (e.g. Jupyter Notebook, R Notebook, and MATLAB Live Script) and command-line interpreters (e.g. Bash). Each computational task is defined differently, but all meet the definition of a computational workflow and all result in data products. You can find a few examples of computational workflows in the following GitHub repositories, where each follows several aspects of FAIR principles: + +- Common Workflow Language (`GitHub Repository URL `__) +- R Notebook (`GitHub Repository URL `__) +- Jupyter Notebook (`GitHub Repository URL `__) +- MATLAB Script (`GitHub Repository URL `__) + +You are encouraged to review these examples when creating a computational workflow and publishing in a Dataverse repository. + +At https://workflows.community, the Workflows Community Initiative offers resources for computational workflows, such as a list of workflow systems (https://workflows.community/systems) and other workflow registries (https://workflows.community/registries). The initiative also helps organize working groups related to workflows research, development and application. + +How to Upload Your Computational Workflow +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +After you :ref:`upload your files `, you can apply a "Workflow" tag to your workflow files, such as your Snakemake or R Notebooks files, so that you and others can find them more easily among your deposit’s other files. + +|cw-image3| + +|cw-image4| + +How to Describe Your Computational Workflow +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Dataverse installation you are using may have enabled Computational Workflow metadata fields for your use. If so, when :ref:`editing your dataset metadata `, you will see the fields described below. + +|cw-image2| + +As described in the :ref:`metadata-references` section of the :doc:`/user/appendix`, the three fields are adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__: + +- **Workflow Type**: The kind of Computational Workflow, which is designed to compose and execute a series of computational or data manipulation steps in a scientific application +- **External Code Repository URL**: A link to another public repository where the un-compiled, human-readable code and related code is also located (e.g., GitHub, GitLab, SVN) +- **Documentation**: A link (URL) to the documentation or text describing the Computational Workflow and its use + + +How to Search for Computational Workflows +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If the search page of the Dataverse repository you are using includes a "Dataset Feature" facet with a Computational Workflows link, you can follow that link to find only datasets that contain computational workflows. + +You can also search on the "Workflow Type" facet, if the Dataverse installation has the field enabled, to find datasets that contain certain types of computational workflows, such as workflows written in Common Workflow Language files or Jupyter Notebooks. + +|cw-image5| + +You can also search for files within datasets that have been tagged as "Workflow" files by clicking the Files checkbox to show only files and using the File Tag facet to show only files tagged as "Workflow". + +|cw-image6| + Astronomy (FITS) ---------------- @@ -622,6 +705,20 @@ If you deaccession the most recently published version of the dataset but not al :class: img-responsive .. |image-file-tree-view| image:: ./img/file-tree-view.png :class: img-responsive +.. |cw-image1| image:: ./img/computational-workflow-diagram.png + :class: img-responsive +.. |cw-image2| image:: ./img/computational-workflow-metadata.png + :class: img-responsive +.. |cw-image3| image:: ./img/file-tags-link.png + :class: img-responsive +.. |cw-image4| image:: ./img/file-tags-options.png + :class: img-responsive +.. |cw-image5| image:: ./img/computational-workflow-facets.png + :class: img-responsive +.. |cw-image6| image:: ./img/file-tags-facets.png + :class: img-responsive +.. |bagit-image1| image:: ./img/bagit-handler-errors.png + :class: img-responsive .. _Make Data Count: https://makedatacount.org .. _Crossref: https://crossref.org diff --git a/doc/sphinx-guides/source/user/dataverse-management.rst b/doc/sphinx-guides/source/user/dataverse-management.rst index efe98e8327c..ed90497da8c 100755 --- a/doc/sphinx-guides/source/user/dataverse-management.rst +++ b/doc/sphinx-guides/source/user/dataverse-management.rst @@ -44,7 +44,7 @@ To edit your Dataverse collection, navigate to your Dataverse collection's landi - :ref:`Theme `: upload a logo for your Dataverse collection, add a link to your department or personal website, add a custom footer image, and select colors for your Dataverse collection in order to brand it - :ref:`Widgets `: get code to add to your website to have your Dataverse collection display on it - :ref:`Permissions `: give other users permissions to your Dataverse collection, i.e.-can edit datasets, and see which users already have which permissions for your Dataverse collection -- :ref:`Dataset Templates `: these are useful when you have several datasets that have the same information in multiple metadata fields that you would prefer not to have to keep manually typing in +- :ref:`Dataset Templates `: these are useful when you want to provide custom instructions on how to fill out fields or have several datasets that have the same information in multiple metadata fields that you would prefer not to have to keep manually typing in - :ref:`Dataset Guestbooks `: allows you to collect data about who is downloading the files from your datasets - :ref:`Featured Dataverse collections `: if you have one or more Dataverse collection, you can use this option to show them at the top of your Dataverse collection page to help others easily find interesting or important Dataverse collections - **Delete Dataverse**: you are able to delete your Dataverse collection as long as it is not published and does not have any draft datasets @@ -52,7 +52,7 @@ To edit your Dataverse collection, navigate to your Dataverse collection's landi .. _general-information: General Information ---------------------- +------------------- The General Information page is how you edit the information you filled in while creating your Dataverse collection. If you need to change or add a contact email address, this is the place to do it. Additionally, you can update the metadata elements used for datasets within the Dataverse collection, change which metadata fields are hidden, required, or optional, and update the facets you would like displayed for browsing the Dataverse collection. If you plan on using templates, you need to select the metadata fields on the General Information page. @@ -60,8 +60,8 @@ Tip: The metadata fields you select as required will appear on the Create Datase .. _theme: -Theme ---------- +Theme +----- The Theme features provides you with a way to customize the look of your Dataverse collection. You can: @@ -77,7 +77,7 @@ Supported image types for logo images and footer images are JPEG, TIFF, or PNG a .. _dataverse-widgets: Widgets --------------- +------- The Widgets feature provides you with code for you to put on your personal website to have your Dataverse collection displayed there. There are two types of Widgets for a Dataverse collection, a Dataverse collection Search Box widget and a Dataverse collection Listing widget. Once a Dataverse collection has been published, from the Widgets tab on the Dataverse collection's Theme + Widgets page, it is possible to copy the code snippets for the widget(s) you would like to add to your website. If you need to adjust the height of the widget on your website, you may do so by editing the `heightPx=500` parameter in the code snippet. @@ -94,7 +94,7 @@ The Dataverse Collection Listing Widget provides a listing of all your Dataverse .. _openscholar-dataverse-level: Adding Widgets to an OpenScholar Website -****************************************** +**************************************** #. Log in to your OpenScholar website #. Either build a new page or navigate to the page you would like to use to show the Dataverse collection widgets. #. Click on the Settings Cog and select Layout @@ -102,8 +102,8 @@ Adding Widgets to an OpenScholar Website .. _dataverse-permissions: -Roles & Permissions ---------------------- +Roles & Permissions +------------------- Dataverse installation user accounts can be granted roles that define which actions they are allowed to take on specific Dataverse collections, datasets, and/or files. Each role comes with a set of permissions, which define the specific actions that users may take. Roles and permissions may also be granted to groups. Groups can be defined as a collection of Dataverse installation user accounts, a collection of IP addresses (e.g. all users of a library's computers), or a collection of all users who log in using a particular institutional login (e.g. everyone who logs in with a particular university's account credentials). @@ -127,7 +127,7 @@ When you access a Dataverse collection's permissions page, you will see three se Please note that even on a newly created Dataverse collection, you may see user and groups have already been granted role(s) if your installation has ``:InheritParentRoleAssignments`` set. For more on this setting, see the :doc:`/installation/config` section of the Installation Guide. Setting Access Configurations -******************************* +***************************** Under the Permissions tab, you can click the "Edit Access" button to open a box where you can add to your Dataverse collection and what permissions are granted to those who add to your Dataverse collection. @@ -140,7 +140,7 @@ The second question on this page allows you to choose the role (and thus the per Both of these settings can be changed at any time. Assigning Roles to Users and Groups -************************************* +*********************************** Under the Users/Groups tab, you can add, edit, or remove the roles granted to users and groups on your Dataverse collection. A role is a set of permissions granted to a user or group when they're using your Dataverse collection. For example, giving your research assistant the "Contributor" role would give them the following self-explanatory permissions on your Dataverse collection and all datasets within your Dataverse collection: "ViewUnpublishedDataset", "DownloadFile", "EditDataset", and "DeleteDatasetDraft". They would, however, lack the "PublishDataset" permission, and thus would be unable to publish datasets on your Dataverse collection. If you wanted to give them that permission, you would give them a role with that permission, like the Curator role. Users and groups can hold multiple roles at the same time if needed. Roles can be removed at any time. All roles and their associated permissions are listed under the "Roles" tab of the same page. @@ -155,15 +155,16 @@ Note: If you need to assign a role to ALL user accounts in a Dataverse installat .. _dataset-templates: Dataset Templates -------------------- +----------------- -Templates are useful when you have several datasets that have the same information in multiple metadata fields that you would prefer not to have to keep manually typing in, or if you want to use a custom set of Terms of Use and Access for multiple datasets in a Dataverse collection. In Dataverse Software 4.0+, templates are created at the Dataverse collection level, can be deleted (so it does not show for future datasets), set to default (not required), or can be copied so you do not have to start over when creating a new template with similar metadata from another template. When a template is deleted, it does not impact the datasets that have used the template already. +Templates are useful when you want to provide custom instructions on how to fill out a field, have several datasets that have the same information in multiple metadata fields that you would prefer not to have to keep manually typing in, or if you want to use a custom set of Terms of Use and Access for multiple datasets in a Dataverse collection. In Dataverse Software 4.0+, templates are created at the Dataverse collection level, can be deleted (so it does not show for future datasets), set to default (not required), or can be copied so you do not have to start over when creating a new template with similar metadata from another template. When a template is deleted, it does not impact the datasets that have used the template already. How do you create a template? #. Navigate to your Dataverse collection, click on the Edit Dataverse button and select Dataset Templates. #. Once you have clicked on Dataset Templates, you will be brought to the Dataset Templates page. On this page, you can 1) decide to use the dataset templates from your parent Dataverse collection 2) create a new dataset template or 3) do both. #. Click on the Create Dataset Template to get started. You will see that the template is the same as the create dataset page with an additional field at the top of the page to add a name for the template. +#. To add custom instructions, click on ''(None - click to add)'' and enter the instructions you wish users to see. If you wish to edit existing instructions, click on them to make the text editable. #. After adding information into the metadata fields you have information for and clicking Save and Add Terms, you will be brought to the page where you can add custom Terms of Use and Access. If you do not need custom Terms of Use and Access, click the Save Dataset Template, and only the metadata fields will be saved. #. After clicking Save Dataset Template, you will be brought back to the Manage Dataset Templates page and should see your template listed there now with the make default, edit, view, or delete options. #. A Dataverse collection does not have to have a default template and users can select which template they would like to use while on the Create Dataset page. @@ -174,7 +175,7 @@ How do you create a template? .. _dataset-guestbooks: Dataset Guestbooks ------------------------------ +------------------ Guestbooks allow you to collect data about who is downloading the files from your datasets. You can decide to collect account information (username, given name & last name, affiliation, etc.) as well as create custom questions (e.g., What do you plan to use this data for?). You are also able to download the data collected from the enabled guestbooks as CSV files to store and use outside of the Dataverse installation. @@ -227,7 +228,7 @@ Similarly to dataset linking, Dataverse collection linking allows a Dataverse co If you need to have a Dataverse collection linked to your Dataverse collection, please contact the support team for the Dataverse installation you are using. Publish Your Dataverse Collection -================================================================= +================================= Once your Dataverse collection is ready to go public, go to your Dataverse collection page, click on the "Publish" button on the right hand side of the page. A pop-up will appear to confirm that you are ready to actually Publish, since once a Dataverse collection diff --git a/doc/sphinx-guides/source/user/img/DatasetDiagram.png b/doc/sphinx-guides/source/user/img/DatasetDiagram.png old mode 100755 new mode 100644 index 45a21456a08..471a54c2d83 Binary files a/doc/sphinx-guides/source/user/img/DatasetDiagram.png and b/doc/sphinx-guides/source/user/img/DatasetDiagram.png differ diff --git a/doc/sphinx-guides/source/user/img/bagit-handler-errors.png b/doc/sphinx-guides/source/user/img/bagit-handler-errors.png new file mode 100644 index 00000000000..d4059ca53c9 Binary files /dev/null and b/doc/sphinx-guides/source/user/img/bagit-handler-errors.png differ diff --git a/doc/sphinx-guides/source/user/img/computational-workflow-diagram.png b/doc/sphinx-guides/source/user/img/computational-workflow-diagram.png new file mode 100644 index 00000000000..efb073737dd Binary files /dev/null and b/doc/sphinx-guides/source/user/img/computational-workflow-diagram.png differ diff --git a/doc/sphinx-guides/source/user/img/computational-workflow-facets.png b/doc/sphinx-guides/source/user/img/computational-workflow-facets.png new file mode 100644 index 00000000000..c790e1d5ffb Binary files /dev/null and b/doc/sphinx-guides/source/user/img/computational-workflow-facets.png differ diff --git a/doc/sphinx-guides/source/user/img/computational-workflow-metadata.png b/doc/sphinx-guides/source/user/img/computational-workflow-metadata.png new file mode 100644 index 00000000000..2c477e75b1e Binary files /dev/null and b/doc/sphinx-guides/source/user/img/computational-workflow-metadata.png differ diff --git a/doc/sphinx-guides/source/user/img/file-tags-facets.png b/doc/sphinx-guides/source/user/img/file-tags-facets.png new file mode 100644 index 00000000000..ce2a9bd72a8 Binary files /dev/null and b/doc/sphinx-guides/source/user/img/file-tags-facets.png differ diff --git a/doc/sphinx-guides/source/user/img/file-tags-link.png b/doc/sphinx-guides/source/user/img/file-tags-link.png new file mode 100644 index 00000000000..c0496a4e1ba Binary files /dev/null and b/doc/sphinx-guides/source/user/img/file-tags-link.png differ diff --git a/doc/sphinx-guides/source/user/img/file-tags-options.png b/doc/sphinx-guides/source/user/img/file-tags-options.png new file mode 100644 index 00000000000..4af196c690e Binary files /dev/null and b/doc/sphinx-guides/source/user/img/file-tags-options.png differ diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst index f46b9477d92..1cbd785b5dd 100755 --- a/doc/sphinx-guides/source/versions.rst +++ b/doc/sphinx-guides/source/versions.rst @@ -6,7 +6,8 @@ Dataverse Software Documentation Versions This list provides a way to refer to the documentation for previous versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. -- 5.11.1 +- 5.12 +- `5.11.1 `__ - `5.11 `__ - `5.10.1 `__ - `5.10 `__ diff --git a/downloads/download.sh b/downloads/download.sh index 3d37d9f0940..7b9de0397cb 100755 --- a/downloads/download.sh +++ b/downloads/download.sh @@ -1,5 +1,5 @@ #!/bin/sh -curl -L -O https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2021.6/payara-5.2021.6.zip +curl -L -O https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip curl -L -O https://archive.apache.org/dist/lucene/solr/8.11.1/solr-8.11.1.tgz curl -L -O https://search.maven.org/remotecontent?filepath=org/jboss/weld/weld-osgi-bundle/2.2.10.Final/weld-osgi-bundle-2.2.10.Final-glassfish4.jar curl -s -L http://sourceforge.net/projects/schemaspy/files/schemaspy/SchemaSpy%205.0.0/schemaSpy_5.0.0.jar/download > schemaSpy_5.0.0.jar diff --git a/local_lib/com/apicatalog/titanium-json-ld/1.3.0-SNAPSHOT/titanium-json-ld-1.3.0-SNAPSHOT.jar b/local_lib/com/apicatalog/titanium-json-ld/1.3.0-SNAPSHOT/titanium-json-ld-1.3.0-SNAPSHOT.jar new file mode 100644 index 00000000000..ee499ae4b76 Binary files /dev/null and b/local_lib/com/apicatalog/titanium-json-ld/1.3.0-SNAPSHOT/titanium-json-ld-1.3.0-SNAPSHOT.jar differ diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 22ea30795ba..8fe611d7716 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -129,7 +129,7 @@ - 5.11.1 + 5.12 11 UTF-8 @@ -146,11 +146,11 @@ -Duser.timezone=${project.timezone} -Dfile.encoding=${project.build.sourceEncoding} -Duser.language=${project.language} -Duser.region=${project.region} - 5.2021.6 - 42.3.5 + 5.2022.3 + 42.5.0 8.11.1 - 1.11.762 - 0.157.0 + 1.12.290 + 0.177.0 8.0.0 @@ -164,7 +164,7 @@ 1.15.0 - 0.4.1 + 2.10.1 4.13.1 5.7.0 diff --git a/pom.xml b/pom.xml index ce9f1c4b63d..6faba5086be 100644 --- a/pom.xml +++ b/pom.xml @@ -24,7 +24,7 @@ 1.20.1 0.8.7 5.2.1 - 2.3.0 + 2.4.1 - + @@ -112,12 +112,12 @@ com.apicatalog titanium-json-ld - 0.8.6 + 1.3.0-SNAPSHOT com.google.code.gson gson - 2.2.4 + 2.8.9 compile @@ -142,7 +142,7 @@ org.mindrot jbcrypt - 0.3m + 0.4 org.postgresql @@ -347,7 +347,7 @@ org.jsoup jsoup - 1.14.2 + 1.15.3 io.searchbox @@ -357,7 +357,7 @@ commons-codec commons-codec - 1.9 + 1.15 @@ -380,7 +380,7 @@ com.nimbusds oauth2-oidc-sdk - 9.9.1 + 9.41.1 @@ -463,7 +463,7 @@ org.duracloud common - 7.1.0 + 7.1.1 org.slf4j @@ -478,7 +478,7 @@ org.duracloud storeclient - 7.1.0 + 7.1.1 org.slf4j @@ -516,7 +516,19 @@ google-cloud-storage - + + + + com.auth0 + java-jwt + 3.19.1 + + + + io.github.erdtman + java-json-canonicalization + 1.1 + @@ -601,9 +613,9 @@ test - org.microbean - microbean-microprofile-config - ${microbean-mpconfig.version} + io.smallrye.config + smallrye-config + ${smallrye-mpconfig.version} test @@ -641,10 +653,17 @@ **/*.xml **/firstNames/*.* **/*.xsl - **/*.properties **/services/* + + src/main/resources + + true + + **/*.properties + + diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 1b14f9d0c14..29d121aae16 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -1,84 +1,84 @@ -#metadataBlock name dataverseAlias displayName blockURI - citation Citation Metadata https://dataverse.org/schema/citation/ +#metadataBlock name dataverseAlias displayName blockURI + citation Citation Metadata https://dataverse.org/schema/citation/ #datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI - title Title Full title by which the Dataset is known. Enter title... text 0 TRUE FALSE FALSE FALSE TRUE TRUE citation http://purl.org/dc/terms/title - subtitle Subtitle A secondary title used to amplify or state certain limitations on the main title. text 1 FALSE FALSE FALSE FALSE FALSE FALSE citation - alternativeTitle Alternative Title A title by which the work is commonly referred, or an abbreviation of the title. text 2 FALSE FALSE FALSE FALSE FALSE FALSE citation http://purl.org/dc/terms/alternative - alternativeURL Alternative URL A URL where the dataset can be viewed, such as a personal or project website. Enter full URL, starting with http:// url 3 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE citation https://schema.org/distribution - otherId Other ID Another unique identifier that identifies this Dataset (e.g., producer's or another repository's number). none 4 : FALSE FALSE TRUE FALSE FALSE FALSE citation - otherIdAgency Agency Name of agency which generated this identifier. text 5 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE otherId citation - otherIdValue Identifier Other identifier that corresponds to this Dataset. text 6 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE otherId citation - author Author The person(s), corporate body(ies), or agency(ies) responsible for creating the work. none 7 FALSE FALSE TRUE FALSE TRUE TRUE citation http://purl.org/dc/terms/creator - authorName Name The author's Family Name, Given Name or the name of the organization responsible for this Dataset. FamilyName, GivenName or Organization text 8 #VALUE TRUE FALSE FALSE TRUE TRUE TRUE author citation - authorAffiliation Affiliation The organization with which the author is affiliated. text 9 (#VALUE) TRUE FALSE FALSE TRUE TRUE FALSE author citation - authorIdentifierScheme Identifier Scheme Name of the identifier scheme (ORCID, ISNI). text 10 - #VALUE: FALSE TRUE FALSE FALSE TRUE FALSE author citation http://purl.org/spar/datacite/AgentIdentifierScheme - authorIdentifier Identifier Uniquely identifies an individual author or organization, according to various schemes. text 11 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE author citation http://purl.org/spar/datacite/AgentIdentifier - datasetContact Contact The contact(s) for this Dataset. none 12 FALSE FALSE TRUE FALSE TRUE TRUE citation - datasetContactName Name The contact's Family Name, Given Name or the name of the organization. FamilyName, GivenName or Organization text 13 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE datasetContact citation - datasetContactAffiliation Affiliation The organization with which the contact is affiliated. text 14 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE datasetContact citation - datasetContactEmail E-mail The e-mail address(es) of the contact(s) for the Dataset. This will not be displayed. email 15 #EMAIL FALSE FALSE FALSE FALSE TRUE TRUE datasetContact citation - dsDescription Description A summary describing the purpose, nature, and scope of the Dataset. none 16 FALSE FALSE TRUE FALSE TRUE TRUE citation - dsDescriptionValue Text A summary describing the purpose, nature, and scope of the Dataset. textbox 17 #VALUE TRUE FALSE FALSE FALSE TRUE TRUE dsDescription citation - dsDescriptionDate Date In cases where a Dataset contains more than one description (for example, one might be supplied by the data producer and another prepared by the data repository where the data are deposited), the date attribute is used to distinguish between the two descriptions. The date attribute follows the ISO convention of YYYY-MM-DD. YYYY-MM-DD date 18 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE dsDescription citation - subject Subject Domain-specific Subject Categories that are topically relevant to the Dataset. text 19 TRUE TRUE TRUE TRUE TRUE TRUE citation http://purl.org/dc/terms/subject - keyword Keyword Key terms that describe important aspects of the Dataset. none 20 FALSE FALSE TRUE FALSE TRUE FALSE citation - keywordValue Term Key terms that describe important aspects of the Dataset. Can be used for building keyword indexes and for classification and retrieval purposes. A controlled vocabulary can be employed. The vocab attribute is provided for specification of the controlled vocabulary in use, such as LCSH, MeSH, or others. The vocabURI attribute specifies the location for the full controlled vocabulary. text 21 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE keyword citation - keywordVocabulary Vocabulary For the specification of the keyword controlled vocabulary in use, such as LCSH, MeSH, or others. text 22 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE keyword citation - keywordVocabularyURI Vocabulary URL Keyword vocabulary URL points to the web presence that describes the keyword vocabulary, if appropriate. Enter an absolute URL where the keyword vocabulary web site is found, such as http://www.my.org. Enter full URL, starting with http:// url 23 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE keyword citation - topicClassification Topic Classification The classification field indicates the broad important topic(s) and subjects that the data cover. Library of Congress subject terms may be used here. none 24 FALSE FALSE TRUE FALSE FALSE FALSE citation - topicClassValue Term Topic or Subject term that is relevant to this Dataset. text 25 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE topicClassification citation - topicClassVocab Vocabulary Provided for specification of the controlled vocabulary in use, e.g., LCSH, MeSH, etc. text 26 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation - topicClassVocabURI Vocabulary URL Specifies the URL location for the full controlled vocabulary. Enter full URL, starting with http:// url 27 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation - publication Related Publication Publications that use the data from this Dataset. The full list of Related Publications will be displayed on the metadata tab. none 28 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy - publicationCitation Citation The full bibliographic citation for this related publication. textbox 29 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation - publicationIDType ID Type The type of digital identifier used for this publication (e.g., Digital Object Identifier (DOI)). text 30 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme - publicationIDNumber ID Number The identifier for the selected ID type. text 31 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier - publicationURL URL Link to the publication web page (e.g., journal article page, archive record page, or other). Enter full URL, starting with http:// url 32 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE publication citation https://schema.org/distribution - notesText Notes Additional important information about the Dataset. textbox 33 FALSE FALSE FALSE FALSE TRUE FALSE citation - language Language Language of the Dataset text 34 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language - producer Producer Person or organization with the financial or administrative responsibility over this Dataset none 35 FALSE FALSE TRUE FALSE FALSE FALSE citation - producerName Name Producer name FamilyName, GivenName or Organization text 36 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation - producerAffiliation Affiliation The organization with which the producer is affiliated. text 37 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerAbbreviation Abbreviation The abbreviation by which the producer is commonly known. (ex. IQSS, ICPSR) text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerURL URL Producer URL points to the producer's web presence, if appropriate. Enter an absolute URL where the producer's web site is found, such as http://www.my.org. Enter full URL, starting with http:// url 39 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerLogoURL Logo URL URL for the producer's logo, which points to this producer's web-accessible logo image. Enter an absolute URL where the producer's logo image is found, such as http://www.my.org/images/logo.gif. Enter full URL for image, starting with http:// url 40
        FALSE FALSE FALSE FALSE FALSE FALSE producer citation - productionDate Production Date Date when the data collection or other materials were produced (not distributed, published or archived). YYYY-MM-DD date 41 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Place The location where the data collection and any other related materials were produced. text 42 FALSE FALSE FALSE FALSE FALSE FALSE citation - contributor Contributor The organization or person responsible for either collecting, managing, or otherwise contributing in some form to the development of the resource. none 43 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor - contributorType Type The type of contributor of the resource. text 44 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation - contributorName Name The Family Name, Given Name or organization name of the contributor. FamilyName, GivenName or Organization text 45 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation - grantNumber Grant Information Grant Information none 46 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor - grantNumberAgency Grant Agency Grant Number Agency text 47 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - grantNumberValue Grant Number The grant or contract number of the project that sponsored the effort. text 48 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - distributor Distributor The organization designated by the author or producer to generate copies of the particular work including any necessary editions or revisions. none 49 FALSE FALSE TRUE FALSE FALSE FALSE citation - distributorName Name Distributor name FamilyName, GivenName or Organization text 50 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation - distributorAffiliation Affiliation The organization with which the distributor contact is affiliated. text 51 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorAbbreviation Abbreviation The abbreviation by which this distributor is commonly known (e.g., IQSS, ICPSR). text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorURL URL Distributor URL points to the distributor's web presence, if appropriate. Enter an absolute URL where the distributor's web site is found, such as http://www.my.org. Enter full URL, starting with http:// url 53 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorLogoURL Logo URL URL of the distributor's logo, which points to this distributor's web-accessible logo image. Enter an absolute URL where the distributor's logo image is found, such as http://www.my.org/images/logo.gif. Enter full URL for image, starting with http:// url 54
        FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributionDate Distribution Date Date that the work was made available for distribution/presentation. YYYY-MM-DD date 55 TRUE FALSE FALSE TRUE FALSE FALSE citation - depositor Depositor The person (Family Name, Given Name) or the name of the organization that deposited this Dataset to the repository. text 56 FALSE FALSE FALSE FALSE FALSE FALSE citation - dateOfDeposit Deposit Date Date that the Dataset was deposited into the repository. YYYY-MM-DD date 57 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted - timePeriodCovered Time Period Covered Time period to which the data refer. This item reflects the time period covered by the data, not the dates of coding or making documents machine-readable or the dates the data were collected. Also known as span. none 58 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage - timePeriodCoveredStart Start Start date which reflects the time period covered by the data, not the dates of coding or making documents machine-readable or the dates the data were collected. YYYY-MM-DD date 59 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - timePeriodCoveredEnd End End date which reflects the time period covered by the data, not the dates of coding or making documents machine-readable or the dates the data were collected. YYYY-MM-DD date 60 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - dateOfCollection Date of Collection Contains the date(s) when the data were collected. none 61 ; FALSE FALSE TRUE FALSE FALSE FALSE citation - dateOfCollectionStart Start Date when the data collection started. YYYY-MM-DD date 62 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - dateOfCollectionEnd End Date when the data collection ended. YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - kindOfData Kind of Data Type of data included in the file: survey data, census/enumeration data, aggregate data, clinical data, event/transaction data, program source code, machine-readable text, administrative records data, experimental data, psychological test, textual data, coded textual, coded documents, time budget diaries, observation data/ratings, process-produced data, or other. text 64 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData - series Series Information about the Dataset series. none 65 : FALSE FALSE FALSE FALSE FALSE FALSE citation - seriesName Name Name of the dataset series to which the Dataset belongs. text 66 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation - seriesInformation Information History of the series and summary of those features that apply to the series as a whole. textbox 67 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation - software Software Information about the software used to generate the Dataset. none 68 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy - softwareName Name Name of software used to generate the Dataset. text 69 #VALUE FALSE TRUE FALSE FALSE FALSE FALSE software citation - softwareVersion Version Version of the software used to generate the Dataset. text 70 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - relatedMaterial Related Material Any material related to this Dataset. textbox 71 FALSE FALSE TRUE FALSE FALSE FALSE citation - relatedDatasets Related Datasets Any Datasets that are related to this Dataset, such as previous research on this subject. textbox 72 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation - otherReferences Other References Any references that would serve as background or supporting material to this Dataset. text 73 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references - dataSources Data Sources List of books, articles, serials, or machine-readable data files that served as the sources of the data collection. textbox 74 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom - originOfSources Origin of Sources For historical materials, information about the origin of the sources and the rules followed in establishing the sources should be specified. textbox 75 FALSE FALSE FALSE FALSE FALSE FALSE citation - characteristicOfSources Characteristic of Sources Noted Assessment of characteristics and source material. textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation - accessToSources Documentation and Access to Sources Level of documentation of the original sources. textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation + title Title The main title of the Dataset text 0 TRUE FALSE FALSE FALSE TRUE TRUE citation http://purl.org/dc/terms/title + subtitle Subtitle A secondary title that amplifies or states certain limitations on the main title text 1 FALSE FALSE FALSE FALSE FALSE FALSE citation + alternativeTitle Alternative Title Either 1) a title commonly used to refer to the Dataset or 2) an abbreviation of the main title text 2 FALSE FALSE FALSE FALSE FALSE FALSE citation http://purl.org/dc/terms/alternative + alternativeURL Alternative URL Another URL where one can view or access the data in the Dataset, e.g. a project or personal webpage https:// url 3 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE citation https://schema.org/distribution + otherId Other Identifier Another unique identifier for the Dataset (e.g. producer's or another repository's identifier) none 4 : FALSE FALSE TRUE FALSE FALSE FALSE citation + otherIdAgency Agency The name of the agency that generated the other identifier text 5 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE otherId citation + otherIdValue Identifier Another identifier uniquely identifies the Dataset text 6 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE otherId citation + author Author The entity, e.g. a person or organization, that created the Dataset none 7 FALSE FALSE TRUE FALSE TRUE TRUE citation http://purl.org/dc/terms/creator + authorName Name The name of the author, such as the person's name or the name of an organization 1) Family Name, Given Name or 2) Organization XYZ text 8 #VALUE TRUE FALSE FALSE TRUE TRUE TRUE author citation + authorAffiliation Affiliation The name of the entity affiliated with the author, e.g. an organization's name Organization XYZ text 9 (#VALUE) TRUE FALSE FALSE TRUE TRUE FALSE author citation + authorIdentifierScheme Identifier Type The type of identifier that uniquely identifies the author (e.g. ORCID, ISNI) text 10 - #VALUE: FALSE TRUE FALSE FALSE TRUE FALSE author citation http://purl.org/spar/datacite/AgentIdentifierScheme + authorIdentifier Identifier Uniquely identifies the author when paired with an identifier type text 11 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE author citation http://purl.org/spar/datacite/AgentIdentifier + datasetContact Point of Contact The entity, e.g. a person or organization, that users of the Dataset can contact with questions none 12 FALSE FALSE TRUE FALSE TRUE TRUE citation + datasetContactName Name The name of the point of contact, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 13 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE datasetContact citation + datasetContactAffiliation Affiliation The name of the entity affiliated with the point of contact, e.g. an organization's name Organization XYZ text 14 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE datasetContact citation + datasetContactEmail E-mail The point of contact's email address name@email.xyz email 15 #EMAIL FALSE FALSE FALSE FALSE TRUE TRUE datasetContact citation + dsDescription Description A summary describing the purpose, nature, and scope of the Dataset none 16 FALSE FALSE TRUE FALSE TRUE TRUE citation + dsDescriptionValue Text A summary describing the purpose, nature, and scope of the Dataset textbox 17 #VALUE TRUE FALSE FALSE FALSE TRUE TRUE dsDescription citation + dsDescriptionDate Date The date when the description was added to the Dataset. If the Dataset contains more than one description, e.g. the data producer supplied one description and the data repository supplied another, this date is used to distinguish between the descriptions YYYY-MM-DD date 18 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE dsDescription citation + subject Subject The area of study relevant to the Dataset text 19 TRUE TRUE TRUE TRUE TRUE TRUE citation http://purl.org/dc/terms/subject + keyword Keyword A key term that describes an important aspect of the Dataset and information about any controlled vocabulary used none 20 FALSE FALSE TRUE FALSE TRUE FALSE citation + keywordValue Term A key term that describes important aspects of the Dataset text 21 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE keyword citation + keywordVocabulary Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 22 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE keyword citation + keywordVocabularyURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 23 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE keyword citation + topicClassification Topic Classification Indicates a broad, important topic or subject that the Dataset covers and information about any controlled vocabulary used none 24 FALSE FALSE TRUE FALSE FALSE FALSE citation + topicClassValue Term A topic or subject term text 25 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE topicClassification citation + topicClassVocab Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 26 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation + topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 27 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation + publication Related Publication The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab none 28 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy + publicationCitation Citation The full bibliographic citation for the related publication textbox 29 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation + publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 30 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme + publicationIDNumber Identifier The identifier for a related publication text 31 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier + publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 32 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE publication citation https://schema.org/distribution + notesText Notes Additional information about the Dataset textbox 33 FALSE FALSE FALSE FALSE TRUE FALSE citation + language Language A language that the Dataset's files is written in text 34 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language + producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 35 FALSE FALSE TRUE FALSE FALSE FALSE citation + producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 36 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation + producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 37 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerURL URL The URL of the producer's website https:// url 39 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerLogoURL Logo URL The URL of the producer's logo https:// url 40
        FALSE FALSE FALSE FALSE FALSE FALSE producer citation + productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 41 TRUE FALSE FALSE TRUE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 42 FALSE FALSE FALSE FALSE FALSE FALSE citation + contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 43 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor + contributorType Type Indicates the type of contribution made to the dataset text 44 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation + contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 45 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation + grantNumber Funding Information Information about the Dataset's financial support none 46 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor + grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 47 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 48 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 49 FALSE FALSE TRUE FALSE FALSE FALSE citation + distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 50 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation + distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 51 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorURL URL The URL of the distributor's webpage https:// url 53 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 54
        FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 55 TRUE FALSE FALSE TRUE FALSE FALSE citation + depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 56 FALSE FALSE FALSE FALSE FALSE FALSE citation + dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 57 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted + timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 58 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage + timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 59 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 60 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + dateOfCollection Date of Collection The dates when the data were collected or generated none 61 ; FALSE FALSE TRUE FALSE FALSE FALSE citation + dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 62 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 64 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData + series Series Information about the dataset series to which the Dataset belong none 65 : FALSE FALSE FALSE FALSE FALSE FALSE citation + seriesName Name The name of the dataset series text 66 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation + seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 67 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation + software Software Information about the software used to generate the Dataset none 68 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy + softwareName Name The name of software used to generate the Dataset text 69 #VALUE FALSE TRUE FALSE FALSE FALSE FALSE software citation + softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 70 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 71 FALSE FALSE TRUE FALSE FALSE FALSE citation + relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 72 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation + otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 73 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references + dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 74 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom + originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 75 FALSE FALSE FALSE FALSE FALSE FALSE citation + characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation + accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation #controlledVocabulary DatasetField Value identifier displayOrder subject Agricultural Sciences D01 0 subject Arts and Humanities D0 1 @@ -111,6 +111,7 @@ publicationIDType upc 14 publicationIDType url 15 publicationIDType urn 16 + publicationIDType DASH-NRS 17 contributorType Data Collector 0 contributorType Data Curator 1 contributorType Data Manager 2 diff --git a/scripts/api/data/metadatablocks/computational_workflow.tsv b/scripts/api/data/metadatablocks/computational_workflow.tsv new file mode 100644 index 00000000000..51b69cfdb80 --- /dev/null +++ b/scripts/api/data/metadatablocks/computational_workflow.tsv @@ -0,0 +1,21 @@ +#metadataBlock name dataverseAlias displayName + computationalworkflow Computational Workflow Metadata +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI + workflowType Computational Workflow Type The kind of Computational Workflow, which is designed to compose and execute a series of computational or data manipulation steps in a scientific application text 0 TRUE TRUE TRUE TRUE TRUE FALSE computationalworkflow + workflowCodeRepository External Code Repository URL A link to the repository where the un-compiled, human readable code and related code is located (e.g. GitHub, GitLab, SVN) https://... url 1 FALSE FALSE TRUE FALSE TRUE FALSE computationalworkflow + workflowDocumentation Documentation A link (URL) to the documentation or text describing the Computational Workflow and its use textbox 2 FALSE FALSE TRUE FALSE TRUE FALSE computationalworkflow +#controlledVocabulary DatasetField Value identifier displayOrder + workflowType Common Workflow Language (CWL) workflowtype_cwl 1 + workflowType Workflow Description Language (WDL) workflowtype_wdl 2 + workflowType Nextflow workflowtype_nextflow 3 + workflowType Snakemake workflowtype_snakemake 4 + workflowType Ruffus workflowtype_ruffus 5 + workflowType DAGMan workflowtype_dagman 6 + workflowType Jupyter Notebook workflowtype_jupyter 7 + workflowType R Notebook workflowtype_rstudio 8 + workflowType MATLAB Script workflowtype_matlab 9 + workflowType Bash Script workflowtype_bash 10 + workflowType Makefile workflowtype_makefile 11 + workflowType Other Python-based workflow workflowtype_otherpython 12 + workflowType Other R-based workflow workflowtype_otherrbased 13 + workflowType Other workflowtype_other 100 diff --git a/scripts/api/data/workflows/internal-ldnannounce-workflow.json b/scripts/api/data/workflows/internal-ldnannounce-workflow.json new file mode 100644 index 00000000000..9cf058b68a1 --- /dev/null +++ b/scripts/api/data/workflows/internal-ldnannounce-workflow.json @@ -0,0 +1,16 @@ +{ + "name": "LDN Announce workflow", + "steps": [ + { + "provider":":internal", + "stepType":"ldnannounce", + "parameters": { + "stepName":"LDN Announce" + }, + "requiredSettings": { + ":LDNAnnounceRequiredFields": "string", + ":LDNTarget": "string" + } + } + ] +} diff --git a/scripts/api/setup-datasetfields.sh b/scripts/api/setup-datasetfields.sh index 0d2d60b9538..0d79176c099 100755 --- a/scripts/api/setup-datasetfields.sh +++ b/scripts/api/setup-datasetfields.sh @@ -7,3 +7,4 @@ curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @da curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values" + diff --git a/scripts/vagrant/setup.sh b/scripts/vagrant/setup.sh index e4915ae9ffa..0af4afb22af 100644 --- a/scripts/vagrant/setup.sh +++ b/scripts/vagrant/setup.sh @@ -51,7 +51,7 @@ SOLR_USER=solr echo "Ensuring Unix user '$SOLR_USER' exists" useradd $SOLR_USER || : DOWNLOAD_DIR='/dataverse/downloads' -PAYARA_ZIP="$DOWNLOAD_DIR/payara-5.2021.6.zip" +PAYARA_ZIP="$DOWNLOAD_DIR/payara-5.2022.3.zip" SOLR_TGZ="$DOWNLOAD_DIR/solr-8.11.1.tgz" if [ ! -f $PAYARA_ZIP ] || [ ! -f $SOLR_TGZ ]; then echo "Couldn't find $PAYARA_ZIP or $SOLR_TGZ! Running download script...." diff --git a/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java b/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java index 213d648da71..181d939f4a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java +++ b/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java @@ -148,7 +148,7 @@ public static String getLocaleStrValue(String strValue, String fieldTypeName, St return sendDefault ? strValue : null; } } catch (MissingResourceException | NullPointerException e) { - logger.warning("Error finding" + "controlledvocabulary." + fieldTypeName + "." + key + " in " + ((locale==null)? "defaultLang" : locale.getLanguage()) + " : " + e.getLocalizedMessage()); + logger.warning("Error finding " + "controlledvocabulary." + fieldTypeName + "." + key + " in " + ((locale==null)? "defaultLang" : locale.getLanguage()) + " : " + e.getLocalizedMessage()); return sendDefault ? strValue : null; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index b21ab5fb7ba..cb43dff0e20 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -605,7 +605,11 @@ public void setFilesize(long filesize) { * @return */ public String getFriendlySize() { - return FileSizeChecker.bytesToHumanReadable(filesize); + if (filesize != null) { + return FileSizeChecker.bytesToHumanReadable(filesize); + } else { + return BundleUtil.getStringFromBundle("file.sizeNotAvailable"); + } } public boolean isRestricted() { diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index c60ea7020bd..a4f82d41bac 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -33,8 +33,8 @@ import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; -import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.StringUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; /** * @@ -152,6 +152,19 @@ public void setCitationDateDatasetFieldType(DatasetFieldType citationDateDataset this.citationDateDatasetFieldType = citationDateDatasetFieldType; } + + @ManyToOne + @JoinColumn(name="template_id",nullable = true) + private Template template; + + public Template getTemplate() { + return template; + } + + public void setTemplate(Template template) { + this.template = template; + } + public Dataset() { DatasetVersion datasetVersion = new DatasetVersion(); datasetVersion.setDataset(this); @@ -743,6 +756,11 @@ public void setHarvestIdentifier(String harvestIdentifier) { this.harvestIdentifier = harvestIdentifier; } + public String getLocalURL() { + //Assumes GlobalId != null + return SystemConfig.getDataverseSiteUrlStatic() + "/dataset.xhtml?persistentId=" + this.getGlobalId().asString(); + } + public String getRemoteArchiveURL() { if (isHarvested()) { if (HarvestingClient.HARVEST_STYLE_DATAVERSE.equals(this.getHarvestedFrom().getHarvestStyle())) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index 192052c68c5..9bc5a5c09a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -672,6 +672,10 @@ public List getVocabScripts( Map cvocConf) { for(JsonObject jo: cvocConf.values()) { scripts.add(jo.getString("js-url")); } + String customScript = settingsService.getValueForKey(SettingsServiceBean.Key.ControlledVocabularyCustomJavaScript); + if (customScript != null && !customScript.isEmpty()) { + scripts.add(customScript); + } return Arrays.asList(scripts.toArray(new String[0])); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java b/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java index d0ba86ab68e..7b857545c20 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java @@ -77,7 +77,10 @@ public enum Reason { /** DCM (rsync) upload in progress */ DcmUpload, - + + /** Globus upload in progress */ + GlobusUpload, + /** Tasks handled by FinalizeDatasetPublicationCommand: Registering PIDs for DS and DFs and/or file validation */ finalizePublication, diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 1a2bcee4b12..0a8db69bf5b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -63,6 +63,8 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -111,6 +113,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.SubmitDatasetForReviewCommand; import edu.harvard.iq.dataverse.externaltools.ExternalTool; import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.export.SchemaDotOrgExporter; import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; @@ -249,6 +252,8 @@ public enum DisplayMode { LicenseServiceBean licenseServiceBean; @Inject DataFileCategoryServiceBean dataFileCategoryService; + @Inject + GlobusServiceBean globusService; private Dataset dataset = new Dataset(); @@ -332,7 +337,7 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) { private Boolean hasRsyncScript = false; private Boolean hasTabular = false; - + /** * If the dataset version has at least one tabular file. The "hasTabular" @@ -343,6 +348,10 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) { private boolean versionHasTabular = false; private boolean showIngestSuccess; + + private Boolean archivable = null; + private Boolean versionArchivable = null; + private Boolean someVersionArchived = null; public boolean isShowIngestSuccess() { return showIngestSuccess; @@ -412,7 +421,7 @@ public Boolean isHasValidTermsOfAccess() { private Boolean hasRestrictedFiles = null; - public Boolean isHasRestrictedFiles(){ + public boolean isHasRestrictedFiles(){ //cache in page to limit processing if (hasRestrictedFiles != null){ return hasRestrictedFiles; @@ -1185,7 +1194,7 @@ public String getComputeUrl(FileMetadata metadata) { } catch (IOException e) { logger.info("DatasetPage: Failed to get storageIO"); } - if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, false)) { + if (isHasPublicStore()) { return settingsWrapper.getValueForKey(SettingsServiceBean.Key.ComputeBaseUrl) + "?" + this.getPersistentId() + "=" + swiftObject.getSwiftFileName(); } @@ -1762,6 +1771,7 @@ public void handleChangeButton() { workingVersion.initDefaultValues(licenseServiceBean.getDefault()); updateDatasetFieldInputLevels(); } + dataset.setTemplate(selectedTemplate); /* Issue 8646: necessary for the access popup which is shared by the dataset page and the file page */ @@ -1821,15 +1831,21 @@ public void updateOwnerDataverse() { // initiate from scratch: (isolate the creation of a new dataset in its own method?) init(true); - // rebuild the bred crumbs display: + // rebuild the bread crumbs display: dataverseHeaderFragment.initBreadcrumbs(dataset); } } public boolean rsyncUploadSupported() { - return settingsWrapper.isRsyncUpload() && DatasetUtil.isAppropriateStorageDriver(dataset); + return settingsWrapper.isRsyncUpload() && DatasetUtil.isRsyncAppropriateStorageDriver(dataset); } + + public boolean globusUploadSupported() { + return settingsWrapper.isGlobusUpload() && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId()); + } + + private String init(boolean initFull) { @@ -1999,10 +2015,10 @@ private String init(boolean initFull) { } } catch (RuntimeException ex) { logger.warning("Problem getting rsync script(RuntimeException): " + ex.getLocalizedMessage()); - FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_ERROR, "Problem getting rsync script:", ex.getLocalizedMessage())); + FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_ERROR, "Problem getting rsync script:", ex.getLocalizedMessage())); } catch (CommandException cex) { logger.warning("Problem getting rsync script (Command Exception): " + cex.getLocalizedMessage()); - FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_ERROR, "Problem getting rsync script:", cex.getLocalizedMessage())); + FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_ERROR, "Problem getting rsync script:", cex.getLocalizedMessage())); } } @@ -2049,6 +2065,8 @@ private String init(boolean initFull) { selectedTemplate = testT; } } + //Initalize with the default if there is one + dataset.setTemplate(selectedTemplate); workingVersion = dataset.getEditVersion(selectedTemplate, null); updateDatasetFieldInputLevels(); } else { @@ -2056,7 +2074,7 @@ private String init(boolean initFull) { updateDatasetFieldInputLevels(); } - if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, false)){ + if (isHasPublicStore()){ JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("dataset.message.label.fileAccess"), BundleUtil.getStringFromBundle("dataset.message.publicInstall")); } @@ -2169,6 +2187,10 @@ private void displayLockInfo(Dataset dataset) { BundleUtil.getStringFromBundle("file.rsyncUpload.inProgressMessage.details")); lockedDueToDcmUpload = true; } + if (dataset.isLockedFor(DatasetLock.Reason.GlobusUpload)) { + JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("file.globusUpload.inProgressMessage.summary"), + BundleUtil.getStringFromBundle("file.globusUpload.inProgressMessage.details")); + } //This is a hack to remove dataset locks for File PID registration if //the dataset is released //in testing we had cases where datasets with 1000 files were remaining locked after being published successfully @@ -2748,7 +2770,7 @@ public String updateCurrentVersion() { */ try { updateVersion = commandEngine.submit(archiveCommand); - if (updateVersion.getArchivalCopyLocation() != null) { + if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); } else { errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); @@ -2890,7 +2912,7 @@ public String editFileMetadata(){ public String deleteDatasetVersion() { DeleteDatasetVersionCommand cmd; - + Map deleteStorageLocations = datafileService.getPhysicalFilesToDelete(dataset.getLatestVersion()); boolean deleteCommandSuccess = false; try { @@ -2902,7 +2924,7 @@ public String deleteDatasetVersion() { JH.addMessage(FacesMessage.SEVERITY_FATAL, BundleUtil.getStringFromBundle("dataset.message.deleteFailure")); logger.severe(ex.getMessage()); } - + if (deleteCommandSuccess && !deleteStorageLocations.isEmpty()) { datafileService.finalizeFileDeletes(deleteStorageLocations); } @@ -3566,6 +3588,7 @@ public String save() { if (editMode == EditMode.CREATE) { //Lock the metadataLanguage once created dataset.setMetadataLanguage(getEffectiveMetadataLanguage()); + //ToDo - could drop use of selectedTemplate and just use the persistent dataset.getTemplate() if ( selectedTemplate != null ) { if ( isSessionUserAuthenticated() ) { cmd = new CreateNewDatasetCommand(dataset, dvRequestService.getDataverseRequest(), false, selectedTemplate); @@ -5016,7 +5039,7 @@ public boolean isFileAccessRequestMultiButtonRequired(){ } for (FileMetadata fmd : workingVersion.getFileMetadatas()){ //Change here so that if all restricted files have pending requests there's no Request Button - if ((!this.fileDownloadHelper.canDownloadFile(fmd) && (fmd.getDataFile().getFileAccessRequesters() == null + if ((!this.fileDownloadHelper.canDownloadFile(fmd) && (fmd.getDataFile().getFileAccessRequesters() == null || ( fmd.getDataFile().getFileAccessRequesters() != null && !fmd.getDataFile().getFileAccessRequesters().contains((AuthenticatedUser)session.getUser()))))){ return true; @@ -5550,17 +5573,20 @@ public void refreshPaginator() { */ public void archiveVersion(Long id) { if (session.getUser() instanceof AuthenticatedUser) { - AuthenticatedUser au = ((AuthenticatedUser) session.getUser()); - DatasetVersion dv = datasetVersionService.retrieveDatasetVersionByVersionId(id).getDatasetVersion(); - String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); + String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { try { DatasetVersion version = commandEngine.submit(cmd); - logger.info("Archived to " + version.getArchivalCopyLocation()); + if (!version.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { + logger.info( + "DatasetVersion id=" + version.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); + } else { + logger.severe("Error submitting version " + version.getId() + " due to conflict/error at Archive"); + } if (version.getArchivalCopyLocation() != null) { - resetVersionTabList(); + setVersionTabList(resetVersionTabList()); this.setVersionTabListForPostLoad(getVersionTabList()); JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.success")); } else { @@ -5577,6 +5603,70 @@ public void archiveVersion(Long id) { } } } + + public boolean isArchivable() { + if (archivable == null) { + archivable = false; + String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); + if (className != null) { + try { + Class clazz = Class.forName(className); + Method m = clazz.getMethod("isArchivable", Dataset.class, SettingsWrapper.class); + Object[] params = { dataset, settingsWrapper }; + archivable = ((Boolean) m.invoke(null, params) == true); + } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException + | InvocationTargetException | NoSuchMethodException | SecurityException e) { + logger.warning("Failed to call isArchivable on configured archiver class: " + className); + e.printStackTrace(); + } + } + } + return archivable; + } + + public boolean isVersionArchivable() { + if (versionArchivable == null) { + // If this dataset isn't in an archivable collection return false + versionArchivable = false; + if (isArchivable()) { + boolean checkForArchivalCopy = false; + // Otherwise, we need to know if the archiver is single-version-only + // If it is, we have to check for an existing archived version to answer the + // question + String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); + if (className != null) { + try { + Class clazz = Class.forName(className); + Method m = clazz.getMethod("isSingleVersion", SettingsWrapper.class); + Object[] params = { settingsWrapper }; + checkForArchivalCopy = (Boolean) m.invoke(null, params); + + if (checkForArchivalCopy) { + // If we have to check (single version archiving), we can't allow archiving if + // one version is already archived (or attempted - any non-null status) + versionArchivable = !isSomeVersionArchived(); + } else { + // If we allow multiple versions or didn't find one that has had archiving run + // on it, we can archive, so return true + versionArchivable = true; + } + } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException + | InvocationTargetException | NoSuchMethodException | SecurityException e) { + logger.warning("Failed to call isSingleVersion on configured archiver class: " + className); + e.printStackTrace(); + } + } + } + } + return versionArchivable; + } + + public boolean isSomeVersionArchived() { + if (someVersionArchived == null) { + someVersionArchived = ArchiverUtil.isSomeVersionArchived(dataset); + } + return someVersionArchived; + } private static Date getFileDateToCompare(FileMetadata fileMetadata) { DataFile datafile = fileMetadata.getDataFile(); @@ -5637,9 +5727,7 @@ public void explore(ExternalTool externalTool) { apiToken.setTokenString(privUrl.getToken()); } ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataset, apiToken, session.getLocaleCode()); - String toolUrl = externalToolHandler.getToolUrlWithQueryParams(); - logger.fine("Exploring with " + toolUrl); - PrimeFaces.current().executeScript("window.open('"+toolUrl + "', target='_blank');"); + PrimeFaces.current().executeScript(externalToolHandler.getExploreScript()); } private FileMetadata fileMetadataForAction; @@ -5679,7 +5767,7 @@ public boolean isFileDeleted (DataFile dataFile) { return dataFile.getDeleted(); } - + public String getEffectiveMetadataLanguage() { return getEffectiveMetadataLanguage(false); } @@ -5690,16 +5778,16 @@ public String getEffectiveMetadataLanguage(boolean ofParent) { } return mdLang; } - + public String getLocaleDisplayName(String code) { String displayName = settingsWrapper.getBaseMetadataLanguageMap(false).get(code); if(displayName==null && !code.equals(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE)) { //Default (for cases such as :when a Dataset has a metadatalanguage code but :MetadataLanguages is no longer defined). - displayName = new Locale(code).getDisplayName(); + displayName = new Locale(code).getDisplayName(); } - return displayName; + return displayName; } - + public Set> getMetadataLanguages() { return settingsWrapper.getBaseMetadataLanguageMap(false).entrySet(); } @@ -5711,7 +5799,7 @@ public List getVocabScripts() { public String getFieldLanguage(String languages) { return fieldService.getFieldLanguage(languages,session.getLocaleCode()); } - + public void setExternalStatus(String status) { try { dataset = commandEngine.submit(new SetCurationStatusCommand(dvRequestService.getDataverseRequest(), dataset, status)); @@ -5942,7 +6030,7 @@ public void validateTerms(FacesContext context, UIComponent component, Object va } } } - + public boolean downloadingRestrictedFiles() { if (fileMetadataForAction != null) { return fileMetadataForAction.isRestricted(); @@ -5954,4 +6042,24 @@ public boolean downloadingRestrictedFiles() { } return false; } + + + //Determines whether this Dataset uses a public store and therefore doesn't support embargoed or restricted files + public boolean isHasPublicStore() { + return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); + } + + public void startGlobusTransfer() { + ApiToken apiToken = null; + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); + } else if (user instanceof PrivateUrlUser) { + PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; + PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); + apiToken = new ApiToken(); + apiToken.setTokenString(privUrl.getToken()); + } + PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken)); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index b9b54fb6216..91ec050fe5c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -16,23 +16,17 @@ import edu.harvard.iq.dataverse.engine.command.impl.FinalizeDatasetPublicationCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand; import edu.harvard.iq.dataverse.export.ExportService; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.workflows.WorkflowComment; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; + +import java.io.*; import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.logging.FileHandler; import java.util.logging.Level; import java.util.logging.Logger; @@ -96,6 +90,12 @@ public class DatasetServiceBean implements java.io.Serializable { @EJB SystemConfig systemConfig; + @EJB + GlobusServiceBean globusServiceBean; + + @EJB + UserNotificationServiceBean userNotificationService; + private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); @PersistenceContext(unitName = "VDCNet-ejbPU") @@ -802,6 +802,35 @@ public void exportAllDatasets(boolean forceReExport) { } + + @Asynchronous + public void reExportDatasetAsync(Dataset dataset) { + exportDataset(dataset, true); + } + + public void exportDataset(Dataset dataset, boolean forceReExport) { + if (dataset != null) { + // Note that the logic for handling a dataset is similar to what is implemented in exportAllDatasets, + // but when only one dataset is exported we do not log in a separate export logging file + if (dataset.isReleased() && dataset.getReleasedVersion() != null && !dataset.isDeaccessioned()) { + + // can't trust dataset.getPublicationDate(), no. + Date publicationDate = dataset.getReleasedVersion().getReleaseTime(); // we know this dataset has a non-null released version! Maybe not - SEK 8/19 (We do now! :) + if (forceReExport || (publicationDate != null + && (dataset.getLastExportTime() == null + || dataset.getLastExportTime().before(publicationDate)))) { + try { + recordService.exportAllFormatsInNewTransaction(dataset); + logger.info("Success exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalIdString()); + } catch (Exception ex) { + logger.info("Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalIdString() + "; " + ex.getMessage()); + } + } + } + } + + } + public String getReminderString(Dataset dataset, boolean canPublishDataset) { return getReminderString( dataset, canPublishDataset, false); } @@ -842,9 +871,11 @@ public String getReminderString(Dataset dataset, boolean canPublishDataset, bool } } - public void updateLastExportTimeStamp(Long datasetId) { - Date now = new Date(); - em.createNativeQuery("UPDATE Dataset SET lastExportTime='"+now.toString()+"' WHERE id="+datasetId).executeUpdate(); + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public int clearAllExportTimes() { + Query clearExportTimes = em.createQuery("UPDATE Dataset SET lastExportTime = NULL"); + int numRowsUpdated = clearExportTimes.executeUpdate(); + return numRowsUpdated; } public Dataset setNonDatasetFileAsThumbnail(Dataset dataset, InputStream inputStream) { @@ -1135,4 +1166,5 @@ public void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Lo hdLogger.warning("Failed to destroy the dataset"); } } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index faa91b87e12..30815c43381 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -6,11 +6,11 @@ import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.license.License; -import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.DateUtil; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import edu.harvard.iq.dataverse.workflows.WorkflowComment; import java.io.Serializable; @@ -27,6 +27,7 @@ import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonArrayBuilder; +import javax.json.JsonObject; import javax.json.JsonObjectBuilder; import javax.persistence.CascadeType; import javax.persistence.Column; @@ -39,6 +40,8 @@ import javax.persistence.Index; import javax.persistence.JoinColumn; import javax.persistence.ManyToOne; +import javax.persistence.NamedQueries; +import javax.persistence.NamedQuery; import javax.persistence.OneToMany; import javax.persistence.OneToOne; import javax.persistence.OrderBy; @@ -59,6 +62,13 @@ * * @author skraffmiller */ + +@NamedQueries({ + @NamedQuery(name = "DatasetVersion.findUnarchivedReleasedVersion", + query = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.dataset.harvestedFrom IS NULL and o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL" + )}) + + @Entity @Table(indexes = {@Index(columnList="dataset_id")}, uniqueConstraints = @UniqueConstraint(columnNames = {"dataset_id,versionnumber,minorversionnumber"})) @@ -94,6 +104,14 @@ public enum VersionState { public static final int ARCHIVE_NOTE_MAX_LENGTH = 1000; public static final int VERSION_NOTE_MAX_LENGTH = 1000; + //Archival copies: Status message required components + public static final String ARCHIVAL_STATUS = "status"; + public static final String ARCHIVAL_STATUS_MESSAGE = "message"; + //Archival Copies: Allowed Statuses + public static final String ARCHIVAL_STATUS_PENDING = "pending"; + public static final String ARCHIVAL_STATUS_SUCCESS = "success"; + public static final String ARCHIVAL_STATUS_FAILURE = "failure"; + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) private Long id; @@ -152,6 +170,11 @@ public enum VersionState { // removed pending further investigation (v4.13) private String archiveNote; + // Originally a simple string indicating the location of the archival copy. As + // of v5.12, repurposed to provide a more general json archival status (failure, + // pending, success) and message (serialized as a string). The archival copy + // location is now expected as the contents of the message for the status + // 'success'. See the /api/datasets/{id}/{version}/archivalStatus API calls for more details @Column(nullable=true, columnDefinition = "TEXT") private String archivalCopyLocation; @@ -180,6 +203,8 @@ public enum VersionState { @Transient private DatasetVersionDifference dvd; + @Transient + private JsonObject archivalStatus; public Long getId() { return this.id; @@ -319,9 +344,39 @@ public void setArchiveNote(String note) { public String getArchivalCopyLocation() { return archivalCopyLocation; } + + public String getArchivalCopyLocationStatus() { + populateArchivalStatus(false); + + if(archivalStatus!=null) { + return archivalStatus.getString(ARCHIVAL_STATUS); + } + return null; + } + public String getArchivalCopyLocationMessage() { + populateArchivalStatus(false); + if(archivalStatus!=null) { + return archivalStatus.getString(ARCHIVAL_STATUS_MESSAGE); + } + return null; + } + + private void populateArchivalStatus(boolean force) { + if(archivalStatus ==null || force) { + if(archivalCopyLocation!=null) { + try { + archivalStatus = JsonUtil.getJsonObject(archivalCopyLocation); + } catch(Exception e) { + logger.warning("DatasetVersion id: " + id + "has a non-JsonObject value, parsing error: " + e.getMessage()); + logger.fine(archivalCopyLocation); + } + } + } + } public void setArchivalCopyLocation(String location) { this.archivalCopyLocation = location; + populateArchivalStatus(true); } public String getDeaccessionLink() { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 580d95b4b1d..23fc1961b7d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1187,4 +1187,32 @@ private DatasetVersion getPreviousVersionWithUnf(DatasetVersion datasetVersion) return null; } + /** + * Merges the passed datasetversion to the persistence context. + * @param ver the DatasetVersion whose new state we want to persist. + * @return The managed entity representing {@code ver}. + */ + public DatasetVersion merge( DatasetVersion ver ) { + return em.merge(ver); + } + + /** + * Execute a query to return DatasetVersion + * + * @param queryString + * @return + */ + public List getUnarchivedDatasetVersions(){ + + try { + List dsl = em.createNamedQuery("DatasetVersion.findUnarchivedReleasedVersion", DatasetVersion.class).getResultList(); + return dsl; + } catch (javax.persistence.NoResultException e) { + logger.log(Level.FINE, "No unarchived DatasetVersions found: {0}"); + return null; + } catch (EJBException e) { + logger.log(Level.WARNING, "EJBException exception: {0}", e.getMessage()); + return null; + } + } // end getUnarchivedDatasetVersions } // end class diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index 342aaec187a..bc8716b6129 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -5,6 +5,8 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; + import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; @@ -322,8 +324,31 @@ public boolean isHarvested() { return harvestingClient != null; } */ - - + private boolean metadataBlockFacetRoot; + + public boolean isMetadataBlockFacetRoot() { + return metadataBlockFacetRoot; + } + + public void setMetadataBlockFacetRoot(boolean metadataBlockFacetRoot) { + this.metadataBlockFacetRoot = metadataBlockFacetRoot; + } + + @OneToMany(mappedBy = "dataverse",cascade={ CascadeType.REMOVE, CascadeType.MERGE,CascadeType.PERSIST }, orphanRemoval=true) + private List metadataBlockFacets = new ArrayList<>(); + + public List getMetadataBlockFacets() { + if (isMetadataBlockFacetRoot() || getOwner() == null) { + return metadataBlockFacets; + } else { + return getOwner().getMetadataBlockFacets(); + } + } + + public void setMetadataBlockFacets(List metadataBlockFacets) { + this.metadataBlockFacets = metadataBlockFacets; + } + public List getParentGuestbooks() { List retList = new ArrayList<>(); Dataverse testDV = this; @@ -765,4 +790,8 @@ public boolean isAncestorOf( DvObject other ) { } return false; } + + public String getLocalURL() { + return SystemConfig.getDataverseSiteUrlStatic() + "/dataverse/" + this.getAlias(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseMetadataBlockFacet.java b/src/main/java/edu/harvard/iq/dataverse/DataverseMetadataBlockFacet.java new file mode 100644 index 00000000000..a2659b81974 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseMetadataBlockFacet.java @@ -0,0 +1,82 @@ +package edu.harvard.iq.dataverse; + +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.JoinColumn; +import javax.persistence.ManyToOne; +import javax.persistence.Table; +import java.io.Serializable; +import java.util.Objects; + +/** + * + * @author adaybujeda + */ +@Entity +@Table(indexes = {@Index(columnList="dataverse_id") + , @Index(columnList="metadatablock_id")}) +public class DataverseMetadataBlockFacet implements Serializable { + private static final long serialVersionUID = 1L; + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @ManyToOne + @JoinColumn(name = "dataverse_id") + private Dataverse dataverse; + + @ManyToOne + @JoinColumn(name = "metadatablock_id") + private MetadataBlock metadataBlock; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public Dataverse getDataverse() { + return dataverse; + } + + public void setDataverse(Dataverse dataverse) { + this.dataverse = dataverse; + } + + public MetadataBlock getMetadataBlock() { + return metadataBlock; + } + + public void setMetadataBlock(MetadataBlock metadataBlock) { + this.metadataBlock = metadataBlock; + } + + @Override + public int hashCode() { + int hash = 0; + hash += (this.id != null ? this.id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + if (!(object instanceof DataverseMetadataBlockFacet)) { + return false; + } + DataverseMetadataBlockFacet other = (DataverseMetadataBlockFacet) object; + return !(!Objects.equals(this.id, other.id) && (this.id == null || !this.id.equals(other.id))); + } + + @Override + public String toString() { + return String.format("edu.harvard.iq.dataverse.DataverseMetadataBlockFacet[ id=%s ]", id); + } + +} + diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java index 746efded48b..6ff01ef3ea8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java @@ -15,7 +15,6 @@ public abstract class DvObjectContainer extends DvObject { - //Default to "file" is for tests only public static final String UNDEFINED_METADATA_LANGUAGE_CODE = "undefined"; //Used in dataverse.xhtml as a non-null selection option value (indicating inheriting the default) @@ -93,6 +92,9 @@ public void setMetadataLanguage(String ml) { } } + public static boolean isMetadataLanguageSet(String mdLang) { + return mdLang!=null && !mdLang.equals(UNDEFINED_METADATA_LANGUAGE_CODE); + } /* Dataverse collections can be configured to allow use of Curation labels and have this inheritable value to decide which set of labels to use. diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java b/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java index c708c2e28e2..1bf6bee82eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java @@ -2,9 +2,11 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; +import org.apache.commons.text.StringEscapeUtils; import javax.ejb.Stateless; import javax.inject.Inject; +import java.util.Arrays; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; @@ -22,6 +24,14 @@ public class EditDataFilesPageHelper { @Inject private SettingsWrapper settingsWrapper; + public String consolidateHtmlErrorMessages(List errorMessages) { + if(errorMessages == null || errorMessages.isEmpty()) { + return null; + } + + return String.join("
        ", errorMessages); + } + public String getHtmlErrorMessage(CreateDataFileResult createDataFileResult) { List errors = createDataFileResult.getErrors(); if(errors == null || errors.isEmpty()) { @@ -33,8 +43,8 @@ public String getHtmlErrorMessage(CreateDataFileResult createDataFileResult) { return null; } - String typeMessage = Optional.ofNullable(BundleUtil.getStringFromBundle(createDataFileResult.getBundleKey())).orElse("Error processing file"); - String errorsMessage = errors.stream().limit(maxErrorsToShow).map(text -> String.format("
      • %s
      • ", text)).collect(Collectors.joining()); - return String.format("%s:
          %s
        ", typeMessage, errorsMessage); + String typeMessage = Optional.ofNullable(BundleUtil.getStringFromBundle(createDataFileResult.getBundleKey(), Arrays.asList(createDataFileResult.getFilename()))).orElse("Error processing file"); + String errorsMessage = errors.stream().limit(maxErrorsToShow).map(text -> String.format("
      • %s
      • ", StringEscapeUtils.escapeHtml4(text))).collect(Collectors.joining()); + return String.format("%s
          %s
        ", typeMessage, errorsMessage); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index b1d178f51d9..6cf294ffd6d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -51,6 +51,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.logging.Logger; import javax.ejb.EJB; import javax.ejb.EJBException; @@ -649,8 +650,8 @@ public String init() { setUpRsync(); } - if (settingsService.isTrueForKey(SettingsServiceBean.Key.PublicInstall, false)){ - JH.addMessage(FacesMessage.SEVERITY_WARN, getBundleString("dataset.message.publicInstall")); + if (isHasPublicStore()){ + JH.addMessage(FacesMessage.SEVERITY_WARN, getBundleString("dataset.message.label.fileAccess"), getBundleString("dataset.message.publicInstall")); } return null; @@ -1491,7 +1492,7 @@ public void handleDropBoxUpload(ActionEvent event) { //datafiles = ingestService.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream"); CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream", null, null, systemConfig); datafiles = createDataFilesResult.getDataFiles(); - errorMessage = editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult); + Optional.ofNullable(editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult)).ifPresent(errorMessage -> errorMessages.add(errorMessage)); } catch (IOException ex) { this.logger.log(Level.SEVERE, "Error during ingest of DropBox file {0} from link {1}", new Object[]{fileName, fileLink}); @@ -1745,12 +1746,13 @@ public void uploadFinished() { uploadedFiles.clear(); uploadInProgress.setValue(false); } - if(errorMessage != null) { - FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_ERROR, BundleUtil.getStringFromBundle("dataset.file.uploadFailure"), errorMessage)); - PrimeFaces.current().ajax().update(":messagePanel"); - } + // refresh the warning message below the upload component, if exists: if (uploadComponentId != null) { + if(!errorMessages.isEmpty()) { + FacesContext.getCurrentInstance().addMessage(uploadComponentId, new FacesMessage(FacesMessage.SEVERITY_ERROR, BundleUtil.getStringFromBundle("dataset.file.uploadFailure"), editDataFilesPageHelper.consolidateHtmlErrorMessages(errorMessages))); + } + if (uploadWarningMessage != null) { if (existingFilesWithDupeContent != null || newlyUploadedFilesWithDupeContent != null) { setWarningMessageForAlreadyExistsPopUp(uploadWarningMessage); @@ -1797,7 +1799,7 @@ public void uploadFinished() { multipleDupesNew = false; uploadWarningMessage = null; uploadSuccessMessage = null; - errorMessage = null; + errorMessages = new ArrayList<>(); } private String warningMessageForFileTypeDifferentPopUp; @@ -1931,7 +1933,7 @@ private void handleReplaceFileUpload(String fullStorageLocation, fileReplacePageHelper.resetReplaceFileHelper(); saveEnabled = false; - String storageIdentifier = DataAccess.getStorarageIdFromLocation(fullStorageLocation); + String storageIdentifier = DataAccess.getStorageIdFromLocation(fullStorageLocation); if (fileReplacePageHelper.handleNativeFileUpload(null, storageIdentifier, fileName, contentType, checkSumValue, checkSumType)) { saveEnabled = true; @@ -1948,7 +1950,7 @@ private void handleReplaceFileUpload(String fullStorageLocation, } private String uploadWarningMessage = null; - private String errorMessage = null; + private List errorMessages = new ArrayList<>(); private String uploadSuccessMessage = null; private String uploadComponentId = null; @@ -2020,7 +2022,11 @@ public void handleFileUpload(FileUploadEvent event) throws IOException { // zip file. CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, null, systemConfig); dFileList = createDataFilesResult.getDataFiles(); - errorMessage = editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult); + String createDataFilesError = editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult); + if(createDataFilesError != null) { + errorMessages.add(createDataFilesError); + uploadComponentId = event.getComponent().getClientId(); + } } catch (IOException ioex) { logger.warning("Failed to process and/or save the file " + uFile.getFileName() + "; " + ioex.getMessage()); @@ -2072,8 +2078,12 @@ public void handleExternalUpload() { if (!checksumTypeString.isBlank()) { checksumType = ChecksumType.fromString(checksumTypeString); } + + //Should only be one colon with curent design int lastColon = fullStorageIdentifier.lastIndexOf(':'); - String storageLocation = fullStorageIdentifier.substring(0, lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + fullStorageIdentifier.substring(lastColon + 1); + String storageLocation = fullStorageIdentifier.substring(0,lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + fullStorageIdentifier.substring(lastColon+1); + storageLocation = DataAccess.expandStorageIdentifierIfNeeded(storageLocation); + if (uploadInProgress.isFalse()) { uploadInProgress.setValue(true); } @@ -2127,7 +2137,7 @@ public void handleExternalUpload() { //datafiles = ingestService.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream"); CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, null, fileName, contentType, fullStorageIdentifier, checksumValue, checksumType, systemConfig); datafiles = createDataFilesResult.getDataFiles(); - errorMessage = editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult); + Optional.ofNullable(editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult)).ifPresent(errorMessage -> errorMessages.add(errorMessage)); } catch (IOException ex) { logger.log(Level.SEVERE, "Error during ingest of file {0}", new Object[]{fileName}); } @@ -3038,16 +3048,24 @@ public void saveAdvancedOptions() { } public boolean rsyncUploadSupported() { - // ToDo - rsync was written before multiple store support and currently is hardcoded to use the "s3" store. + // ToDo - rsync was written before multiple store support and currently is hardcoded to use the DataAccess.S3 store. // When those restrictions are lifted/rsync can be configured per store, the test in the // Dataset Util method should be updated - if (settingsWrapper.isRsyncUpload() && !DatasetUtil.isAppropriateStorageDriver(dataset)) { + if (settingsWrapper.isRsyncUpload() && !DatasetUtil.isRsyncAppropriateStorageDriver(dataset)) { //dataset.file.upload.setUp.rsync.failed.detail FacesMessage message = new FacesMessage(FacesMessage.SEVERITY_ERROR, BundleUtil.getStringFromBundle("dataset.file.upload.setUp.rsync.failed"), BundleUtil.getStringFromBundle("dataset.file.upload.setUp.rsync.failed.detail")); FacesContext.getCurrentInstance().addMessage(null, message); } - return settingsWrapper.isRsyncUpload() && DatasetUtil.isAppropriateStorageDriver(dataset); + return settingsWrapper.isRsyncUpload() && DatasetUtil.isRsyncAppropriateStorageDriver(dataset); + } + + // Globus must be one of the upload methods listed in the :UploadMethods setting + // and the dataset's store must be in the list allowed by the GlobusStores + // setting + public boolean globusUploadSupported() { + return settingsWrapper.isGlobusUpload() + && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId()); } private void populateFileMetadatas() { @@ -3083,4 +3101,9 @@ public boolean isFileAccessRequest() { public void setFileAccessRequest(boolean fileAccessRequest) { this.fileAccessRequest = fileAccessRequest; } + + //Determines whether this Dataset uses a public store and therefore doesn't support embargoed or restricted files + public boolean isHasPublicStore() { + return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index 6d3929a55e2..65e6b259bf4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -18,6 +18,7 @@ import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import java.io.IOException; @@ -313,9 +314,7 @@ public void explore(GuestbookResponse guestbookResponse, FileMetadata fmd, Exter ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, localeCode); // Persist the name of the tool (i.e. "Data Explorer", etc.) guestbookResponse.setDownloadtype(externalTool.getDisplayName()); - String toolUrl = externalToolHandler.getToolUrlWithQueryParams(); - logger.fine("Exploring with " + toolUrl); - PrimeFaces.current().executeScript("window.open('"+toolUrl + "', target='_blank');"); + PrimeFaces.current().executeScript(externalToolHandler.getExploreScript()); // This is the old logic from TwoRavens, null checks and all. if (guestbookResponse != null && guestbookResponse.isWriteResponse() && ((fmd != null && fmd.getDataFile() != null) || guestbookResponse.getDataFile() != null)) { @@ -561,12 +560,12 @@ public void addFileToCustomZipJob(String key, DataFile dataFile, Timestamp times public String getDirectStorageLocatrion(String storageLocation) { String storageDriverId; - int separatorIndex = storageLocation.indexOf("://"); + int separatorIndex = storageLocation.indexOf(DataAccess.SEPARATOR); if ( separatorIndex > 0 ) { storageDriverId = storageLocation.substring(0,separatorIndex); String storageType = DataAccess.getDriverType(storageDriverId); - if ("file".equals(storageType) || "s3".equals(storageType)) { + if (DataAccess.FILE.equals(storageType) || DataAccess.S3.equals(storageType)) { return storageType.concat(storageLocation.substring(separatorIndex)); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index 3fa6d4fdfff..7f2c6dfca5c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -13,6 +13,7 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; @@ -843,7 +844,7 @@ public String getComputeUrl() throws IOException { if (swiftObject != null) { swiftObject.open(); //generate a temp url for a file - if (settingsService.isTrueForKey(SettingsServiceBean.Key.PublicInstall, false)) { + if (isHasPublicStore()) { return settingsService.getValueForKey(SettingsServiceBean.Key.ComputeBaseUrl) + "?" + this.getFile().getOwner().getGlobalIdString() + "=" + swiftObject.getSwiftFileName(); } return settingsService.getValueForKey(SettingsServiceBean.Key.ComputeBaseUrl) + "?" + this.getFile().getOwner().getGlobalIdString() + "=" + swiftObject.getSwiftFileName() + "&temp_url_sig=" + swiftObject.getTempUrlSignature() + "&temp_url_expires=" + swiftObject.getTempUrlExpiry(); @@ -935,8 +936,8 @@ public String getPublicDownloadUrl() { try { SwiftAccessIO swiftIO = (SwiftAccessIO) storageIO; swiftIO.open(); - //if its a public install, lets just give users the permanent URL! - if (systemConfig.isPublicInstall()){ + //if its a public store, lets just give users the permanent URL! + if (isHasPublicStore()){ fileDownloadUrl = swiftIO.getRemoteUrl(); } else { //TODO: if a user has access to this file, they should be given the swift url @@ -1165,5 +1166,10 @@ public String getEmbargoPhrase() { public String getIngestMessage() { return BundleUtil.getStringFromBundle("file.ingestFailed.message", Arrays.asList(settingsWrapper.getGuidesBaseUrl(), settingsWrapper.getGuidesVersion())); } + + //Determines whether this File uses a public store and therefore doesn't support embargoed or restricted files + public boolean isHasPublicStore() { + return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(DataAccess.getStorageDriverFromIdentifier(file.getStorageIdentifier()))); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index 98112170d25..20b280771fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -26,9 +26,13 @@ public class GlobalId implements java.io.Serializable { public static final String DOI_PROTOCOL = "doi"; public static final String HDL_PROTOCOL = "hdl"; - public static final String HDL_RESOLVER_URL = "https://hdl.handle.net/"; public static final String DOI_RESOLVER_URL = "https://doi.org/"; - + public static final String DXDOI_RESOLVER_URL = "https://dx.doi.org/"; + public static final String HDL_RESOLVER_URL = "https://hdl.handle.net/"; + public static final String HTTP_DOI_RESOLVER_URL = "http://doi.org/"; + public static final String HTTP_DXDOI_RESOLVER_URL = "http://dx.doi.org/"; + public static final String HTTP_HDL_RESOLVER_URL = "http://hdl.handle.net/"; + public static Optional parse(String identifierString) { try { return Optional.of(new GlobalId(identifierString)); @@ -252,4 +256,27 @@ public static boolean verifyImportCharacters(String pidParam) { return m.matches(); } + + /** + * Convenience method to get the internal form of a PID string when it may be in + * the https:// or http:// form ToDo -refactor class to allow creating a + * GlobalID from any form (which assures it has valid syntax) and then have methods to get + * the form you want. + * + * @param pidUrlString - a string assumed to be a valid PID in some form + * @return the internal form as a String + */ + public static String getInternalFormOfPID(String pidUrlString) { + String pidString = pidUrlString; + if(pidUrlString.startsWith(GlobalId.DOI_RESOLVER_URL)) { + pidString = pidUrlString.replace(GlobalId.DOI_RESOLVER_URL, (GlobalId.DOI_PROTOCOL + ":")); + } else if(pidUrlString.startsWith(GlobalId.HDL_RESOLVER_URL)) { + pidString = pidUrlString.replace(GlobalId.HDL_RESOLVER_URL, (GlobalId.HDL_PROTOCOL + ":")); + } else if(pidUrlString.startsWith(GlobalId.HTTP_DOI_RESOLVER_URL)) { + pidString = pidUrlString.replace(GlobalId.HTTP_DOI_RESOLVER_URL, (GlobalId.DOI_PROTOCOL + ":")); + } else if(pidUrlString.startsWith(GlobalId.HTTP_HDL_RESOLVER_URL)) { + pidString = pidUrlString.replace(GlobalId.HTTP_HDL_RESOLVER_URL, (GlobalId.HDL_PROTOCOL + ":")); + } + return pidString; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index f39fb8b0a32..2bfd342d899 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -16,6 +16,8 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import java.io.UnsupportedEncodingException; import java.text.MessageFormat; import java.util.ArrayList; @@ -169,7 +171,7 @@ public boolean sendSystemEmail(String to, String subject, String messageText, bo return sent; } - private InternetAddress getSystemAddress() { + public InternetAddress getSystemAddress() { String systemEmail = settingsService.getValueForKey(Key.SystemEmail); return MailUtil.parseSystemAddress(systemEmail); } @@ -568,6 +570,49 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio logger.fine("fileImportMsg: " + fileImportMsg); return messageText += fileImportMsg; + case GLOBUSUPLOADCOMPLETED: + dataset = (Dataset) targetObject; + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String uploadCompletedMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.upload.completed", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalIdString(), + dataset.getDisplayName(), + comment + )) ; + return uploadCompletedMessage; + + case GLOBUSDOWNLOADCOMPLETED: + dataset = (Dataset) targetObject; + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String downloadCompletedMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.download.completed", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalIdString(), + dataset.getDisplayName(), + comment + )) ; + return downloadCompletedMessage; + case GLOBUSUPLOADCOMPLETEDWITHERRORS: + dataset = (Dataset) targetObject; + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String uploadCompletedWithErrorsMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.upload.completedWithErrors", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalIdString(), + dataset.getDisplayName(), + comment + )) ; + return uploadCompletedWithErrorsMessage; + + case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: + dataset = (Dataset) targetObject; + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String downloadCompletedWithErrorsMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.download.completedWithErrors", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalIdString(), + dataset.getDisplayName(), + comment + )) ; + return downloadCompletedWithErrorsMessage; + case CHECKSUMIMPORT: version = (DatasetVersion) targetObject; String checksumImportMsg = BundleUtil.getStringFromBundle("notification.import.checksum", Arrays.asList( @@ -608,6 +653,26 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio )); return ingestedCompletedWithErrorsMessage; + case DATASETMENTIONED: + String additionalInfo = userNotification.getAdditionalInfo(); + dataset = (Dataset) targetObject; + javax.json.JsonObject citingResource = null; + citingResource = JsonUtil.getJsonObject(additionalInfo); + + + pattern = BundleUtil.getStringFromBundle("notification.email.datasetWasMentioned"); + Object[] paramArrayDatasetMentioned = { + userNotification.getUser().getName(), + BrandingUtil.getInstallationBrandName(), + citingResource.getString("@type"), + citingResource.getString("@id"), + citingResource.getString("name"), + citingResource.getString("relationship"), + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalId().toString(), + dataset.getDisplayName()}; + messageText = MessageFormat.format(pattern, paramArrayDatasetMentioned); + return messageText; } return ""; @@ -632,6 +697,7 @@ public Object getObjectOfNotification (UserNotification userNotification){ case GRANTFILEACCESS: case REJECTFILEACCESS: case DATASETCREATED: + case DATASETMENTIONED: return datasetService.find(userNotification.getObjectId()); case CREATEDS: case SUBMITTEDDS: @@ -648,6 +714,11 @@ public Object getObjectOfNotification (UserNotification userNotification){ return datasetService.find(userNotification.getObjectId()); case FILESYSTEMIMPORT: return versionService.find(userNotification.getObjectId()); + case GLOBUSUPLOADCOMPLETED: + case GLOBUSUPLOADCOMPLETEDWITHERRORS: + case GLOBUSDOWNLOADCOMPLETED: + case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: + return datasetService.find(userNotification.getObjectId()); case CHECKSUMIMPORT: return versionService.find(userNotification.getObjectId()); case APIGENERATED: diff --git a/src/main/java/edu/harvard/iq/dataverse/ManageGroupsPage.java b/src/main/java/edu/harvard/iq/dataverse/ManageGroupsPage.java index d08337ec832..8513ca33b47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManageGroupsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManageGroupsPage.java @@ -35,6 +35,7 @@ import javax.persistence.PersistenceContext; import org.apache.commons.lang3.StringUtils; + /** * @author michaelsuo */ @@ -95,10 +96,24 @@ public String init() { return permissionsWrapper.notAuthorized(); } explicitGroups = new LinkedList<>(explicitGroupService.findByOwner(getDataverseId())); - + renderDeletePopup = false; return null; } + + private boolean renderDeletePopup = false; + + public boolean isRenderDeletePopup() { + return renderDeletePopup; + } + public void setRenderDeletePopup(boolean renderDeletePopup) { + this.renderDeletePopup = renderDeletePopup; + } + + public void clickDeleteGroup(ExplicitGroup selectedGroup) { + setRenderDeletePopup(true); + this.selectedGroup = selectedGroup; + } public void setSelectedGroup(ExplicitGroup selectedGroup) { this.selectedGroup = selectedGroup; diff --git a/src/main/java/edu/harvard/iq/dataverse/MetadataBlock.java b/src/main/java/edu/harvard/iq/dataverse/MetadataBlock.java index 844c0ec5be7..33e75efffb5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MetadataBlock.java +++ b/src/main/java/edu/harvard/iq/dataverse/MetadataBlock.java @@ -202,10 +202,18 @@ public String toString() { return "edu.harvard.iq.dataverse.MetadataBlock[ id=" + id + " ]"; } - public String getLocaleDisplayName() - { + public String getLocaleDisplayName() { + return getLocaleValue("metadatablock.displayName"); + } + + public String getLocaleDisplayFacet() { + return getLocaleValue("metadatablock.displayFacet"); + } + + // Visible for testing + String getLocaleValue(String metadataBlockKey) { try { - return BundleUtil.getStringFromPropertyFile("metadatablock.displayName", getName()); + return BundleUtil.getStringFromPropertyFile(metadataBlockKey, getName()); } catch (MissingResourceException e) { return displayName; } diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index aaf38af1b36..8f7f53de1a2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -733,6 +733,9 @@ else if (dataset.isLockedFor(DatasetLock.Reason.Workflow)) { else if (dataset.isLockedFor(DatasetLock.Reason.DcmUpload)) { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.editNotAllowed"), command); } + else if (dataset.isLockedFor(DatasetLock.Reason.GlobusUpload)) { + throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.editNotAllowed"), command); + } else if (dataset.isLockedFor(DatasetLock.Reason.EditInProgress)) { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.editNotAllowed"), command); } @@ -768,6 +771,9 @@ else if (dataset.isLockedFor(DatasetLock.Reason.Workflow)) { else if (dataset.isLockedFor(DatasetLock.Reason.DcmUpload)) { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.publishNotAllowed"), command); } + else if (dataset.isLockedFor(DatasetLock.Reason.GlobusUpload)) { + throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.publishNotAllowed"), command); + } else if (dataset.isLockedFor(DatasetLock.Reason.EditInProgress)) { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.publishNotAllowed"), command); } diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 9bf155740af..aa40423000d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -13,6 +13,7 @@ import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.UserNotification.Type; import java.time.LocalDate; @@ -92,7 +93,15 @@ public class SettingsWrapper implements java.io.Serializable { private Boolean rsyncUpload = null; - private Boolean rsyncDownload = null; + private Boolean rsyncDownload = null; + + private Boolean globusUpload = null; + private Boolean globusDownload = null; + private Boolean globusFileDownload = null; + + private String globusAppUrl = null; + + private List globusStoreList = null; private Boolean httpUpload = null; @@ -292,6 +301,42 @@ public boolean isRsyncDownload() { } return rsyncDownload; } + + public boolean isGlobusUpload() { + if (globusUpload == null) { + globusUpload = systemConfig.isGlobusUpload(); + } + return globusUpload; + } + + public boolean isGlobusDownload() { + if (globusDownload == null) { + globusDownload = systemConfig.isGlobusDownload(); + } + return globusDownload; + } + + public boolean isGlobusFileDownload() { + if (globusFileDownload == null) { + globusFileDownload = systemConfig.isGlobusFileDownload(); + } + return globusFileDownload; + } + + public boolean isGlobusEnabledStorageDriver(String driverId) { + if (globusStoreList == null) { + globusStoreList = systemConfig.getGlobusStoresList(); + } + return globusStoreList.contains(driverId); + } + + public String getGlobusAppUrl() { + if (globusAppUrl == null) { + globusAppUrl = settingsService.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost"); + } + return globusAppUrl; + + } public boolean isRsyncOnly() { if (rsyncOnly == null) { @@ -646,5 +691,4 @@ public boolean isCustomLicenseAllowed() { } return customLicenseAllowed; } -} - +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/Shib.java b/src/main/java/edu/harvard/iq/dataverse/Shib.java index 324f6e185a6..0f0e20aba94 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Shib.java +++ b/src/main/java/edu/harvard/iq/dataverse/Shib.java @@ -218,7 +218,26 @@ public void init() { ? getValueFromAssertion(shibAffiliationAttribute) : shibService.getAffiliation(shibIdp, shibService.getDevShibAccountType()); + if (affiliation != null) { + String ShibAffiliationSeparator = settingsService.getValueForKey(SettingsServiceBean.Key.ShibAffiliationSeparator); + if (ShibAffiliationSeparator == null) { + ShibAffiliationSeparator = ";"; + } + String ShibAffiliationOrder = settingsService.getValueForKey(SettingsServiceBean.Key.ShibAffiliationOrder); + if (ShibAffiliationOrder != null) { + if (ShibAffiliationOrder.equals("lastAffiliation")) { + affiliation = affiliation.substring(affiliation.lastIndexOf(ShibAffiliationSeparator) + 1); //patch for affiliation array returning last part + } + else if (ShibAffiliationOrder.equals("firstAffiliation")) { + try{ + affiliation = affiliation.substring(0,affiliation.indexOf(ShibAffiliationSeparator)); //patch for affiliation array returning first part + } + catch (Exception e){ + logger.info("Affiliation does not contain \"" + ShibAffiliationSeparator + "\""); + } + } + } affiliationToDisplayAtConfirmation = affiliation; friendlyNameForInstitution = affiliation; } diff --git a/src/main/java/edu/harvard/iq/dataverse/Template.java b/src/main/java/edu/harvard/iq/dataverse/Template.java index b9a1762714a..61f0a78656f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Template.java +++ b/src/main/java/edu/harvard/iq/dataverse/Template.java @@ -1,7 +1,6 @@ package edu.harvard.iq.dataverse; import java.io.Serializable; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -10,6 +9,11 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; + +import javax.json.Json; +import javax.json.JsonObjectBuilder; +import javax.json.JsonString; import javax.persistence.CascadeType; import javax.persistence.Column; import javax.persistence.Entity; @@ -28,6 +32,8 @@ import javax.validation.constraints.Size; import edu.harvard.iq.dataverse.util.DateUtil; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import javax.persistence.NamedQueries; import javax.persistence.NamedQuery; import org.hibernate.validator.constraints.NotBlank; @@ -125,7 +131,13 @@ public void setTermsOfUseAndAccess(TermsOfUseAndAccess termsOfUseAndAccess) { public List getDatasetFields() { return datasetFields; } + + @Column(columnDefinition="TEXT", nullable = true ) + private String instructions; + @Transient + private Map instructionsMap = null; + @Transient private Map> metadataBlocksForView = new HashMap<>(); @Transient @@ -235,10 +247,8 @@ public int compare(DatasetField d1, DatasetField d2) { } private void initMetadataBlocksForCreate() { - metadataBlocksForView.clear(); metadataBlocksForEdit.clear(); for (MetadataBlock mdb : this.getDataverse().getMetadataBlocks()) { - List datasetFieldsForView = new ArrayList<>(); List datasetFieldsForEdit = new ArrayList<>(); for (DatasetField dsf : this.getDatasetFields()) { @@ -247,9 +257,6 @@ private void initMetadataBlocksForCreate() { } } - if (!datasetFieldsForView.isEmpty()) { - metadataBlocksForView.put(mdb, sortDatasetFields(datasetFieldsForView)); - } if (!datasetFieldsForEdit.isEmpty()) { metadataBlocksForEdit.put(mdb, sortDatasetFields(datasetFieldsForEdit)); } @@ -261,27 +268,31 @@ public void setMetadataValueBlocks() { metadataBlocksForView.clear(); metadataBlocksForEdit.clear(); List filledInFields = this.getDatasetFields(); + + Map instructionsMap = getInstructionsMap(); - - List actualMDB = new ArrayList<>(); + List viewMDB = new ArrayList<>(); + List editMDB=this.getDataverse().getMetadataBlocks(false); - actualMDB.addAll(this.getDataverse().getMetadataBlocks()); - for (DatasetField dsfv : filledInFields) { - if (!dsfv.isEmptyForDisplay()) { - MetadataBlock mdbTest = dsfv.getDatasetFieldType().getMetadataBlock(); - if (!actualMDB.contains(mdbTest)) { - actualMDB.add(mdbTest); + //The metadatablocks in this template include any from the Dataverse it is associated with + //plus any others where the template has a displayable field (i.e. from before a block was dropped in the dataverse/collection) + viewMDB.addAll(this.getDataverse().getMetadataBlocks(true)); + for (DatasetField dsf : filledInFields) { + if (!dsf.isEmptyForDisplay()) { + MetadataBlock mdbTest = dsf.getDatasetFieldType().getMetadataBlock(); + if (!viewMDB.contains(mdbTest)) { + viewMDB.add(mdbTest); } } - } - - for (MetadataBlock mdb : actualMDB) { + } + + for (MetadataBlock mdb : viewMDB) { + List datasetFieldsForView = new ArrayList<>(); - List datasetFieldsForEdit = new ArrayList<>(); for (DatasetField dsf : this.getDatasetFields()) { if (dsf.getDatasetFieldType().getMetadataBlock().equals(mdb)) { - datasetFieldsForEdit.add(dsf); - if (!dsf.isEmpty()) { + //For viewing, show the field if it has a value or custom instructions + if (!dsf.isEmpty() || instructionsMap.containsKey(dsf.getDatasetFieldType().getName())) { datasetFieldsForView.add(dsf); } } @@ -290,10 +301,20 @@ public void setMetadataValueBlocks() { if (!datasetFieldsForView.isEmpty()) { metadataBlocksForView.put(mdb, sortDatasetFields(datasetFieldsForView)); } - if (!datasetFieldsForEdit.isEmpty()) { - metadataBlocksForEdit.put(mdb, sortDatasetFields(datasetFieldsForEdit)); + + } + + for (MetadataBlock mdb : editMDB) { + List datasetFieldsForEdit = new ArrayList<>(); + this.setDatasetFields(initDatasetFields()); + for (DatasetField dsf : this.getDatasetFields() ) { + if (dsf.getDatasetFieldType().getMetadataBlock().equals(mdb)) { + datasetFieldsForEdit.add(dsf); + } } + metadataBlocksForEdit.put(mdb, sortDatasetFields(datasetFieldsForEdit)); } + } // TODO: clean up init methods and get them to work, cascading all the way down. @@ -340,6 +361,9 @@ public Template cloneNewTemplate(Template source) { } terms.setTemplate(newTemplate); newTemplate.setTermsOfUseAndAccess(terms); + + newTemplate.getInstructionsMap().putAll(source.getInstructionsMap()); + newTemplate.updateInstructions(); return newTemplate; } @@ -379,6 +403,45 @@ private List getFlatDatasetFields(List dsfList) { return retList; } + //Cache values in map for reading + public Map getInstructionsMap() { + if(instructionsMap==null) + if(instructions != null) { + instructionsMap = JsonUtil.getJsonObject(instructions).entrySet().stream().collect(Collectors.toMap(entry -> entry.getKey(),entry -> ((JsonString)entry.getValue()).getString())); + } else { + instructionsMap = new HashMap(); + } + return instructionsMap; + } + + //Get the cutstom instructions defined for a give fieldType + public String getInstructionsFor(String fieldType) { + return getInstructionsMap().get(fieldType); + } + + /* + //Add/change or remove (null instructionString) instructions for a given fieldType + public void setInstructionsFor(String fieldType, String instructionString) { + if(instructionString==null) { + getInstructionsMap().remove(fieldType); + } else { + getInstructionsMap().put(fieldType, instructionString); + } + updateInstructions(); + } + */ + + //Keep instructions up-to-date on any change + public void updateInstructions() { + JsonObjectBuilder builder = Json.createObjectBuilder(); + getInstructionsMap().forEach((key, value) -> { + if (value != null) + builder.add(key, value); + }); + instructions = JsonUtil.prettyPrint(builder.build()); + } + + @Override public int hashCode() { int hash = 0; diff --git a/src/main/java/edu/harvard/iq/dataverse/TemplatePage.java b/src/main/java/edu/harvard/iq/dataverse/TemplatePage.java index 19beaf75349..6da0d99da20 100644 --- a/src/main/java/edu/harvard/iq/dataverse/TemplatePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/TemplatePage.java @@ -14,6 +14,7 @@ import java.sql.Timestamp; import java.util.Date; import java.util.List; +import java.util.logging.Logger; import javax.ejb.EJB; import javax.ejb.EJBException; import javax.faces.application.FacesMessage; @@ -52,6 +53,8 @@ public class TemplatePage implements java.io.Serializable { @Inject LicenseServiceBean licenseServiceBean; + + private static final Logger logger = Logger.getLogger(TemplatePage.class.getCanonicalName()); public enum EditMode { @@ -160,7 +163,7 @@ private void updateDatasetFieldInputLevels(){ for (DatasetField dsf: template.getFlatDatasetFields()){ DataverseFieldTypeInputLevel dsfIl = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(dvIdForInputLevel, dsf.getDatasetFieldType().getId()); - if (dsfIl != null){ + if (dsfIl != null){ dsf.setInclude(dsfIl.isInclude()); } else { dsf.setInclude(true); @@ -173,8 +176,6 @@ public void edit(TemplatePage.EditMode editMode) { } public String save(String redirectPage) { - - //SEK - removed dead code 1/6/2015 boolean create = false; Command cmd; @@ -184,6 +185,8 @@ public String save(String redirectPage) { DatasetFieldUtil.tidyUpFields( template.getDatasetFields(), false ); + template.updateInstructions(); + if (editMode == EditMode.CREATE) { template.setCreateTime(new Timestamp(new Date().getTime())); template.setUsageCount(new Long(0)); @@ -208,20 +211,13 @@ public String save(String redirectPage) { error.append(cause).append(" "); error.append(cause.getMessage()).append(" "); } - // - //FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_ERROR, "Template Save Failed", " - " + error.toString())); - System.out.print("dataverse " + dataverse.getName()); - System.out.print("Ejb exception"); - System.out.print(error.toString()); + logger.warning("Template Save failed - Ejb exception " + error.toString()); JH.addMessage(FacesMessage.SEVERITY_FATAL, BundleUtil.getStringFromBundle("template.save.fail")); return null; } catch (CommandException ex) { - System.out.print("command exception"); - System.out.print(ex.toString()); - //FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_ERROR, "Template Save Failed", " - " + ex.toString())); + logger.severe("Template Save failed - Ejb exception " + ex.toString()); JH.addMessage(FacesMessage.SEVERITY_FATAL, BundleUtil.getStringFromBundle("template.save.fail")); return null; - //logger.severe(ex.getMessage()); } editMode = null; String msg = (create)? BundleUtil.getStringFromBundle("template.create"): BundleUtil.getStringFromBundle("template.save"); @@ -253,5 +249,11 @@ public String deleteTemplate(Long templateId) { } return "/manage-templates.xhtml?dataverseId=" + dataverse.getId() + "&faces-redirect=true"; } + + //Get the cutstom instructions defined for a give fieldType + public String getInstructionsLabelFor(String fieldType) { + String fieldInstructions = template.getInstructionsMap().get(fieldType); + return (fieldInstructions!=null && !fieldInstructions.isBlank()) ? fieldInstructions : BundleUtil.getStringFromBundle("template.instructions.empty.label"); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java index 5714a879527..b68a1b9d13e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java +++ b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java @@ -37,7 +37,9 @@ public enum Type { ASSIGNROLE, REVOKEROLE, CREATEDV, CREATEDS, CREATEACC, SUBMITTEDDS, RETURNEDDS, PUBLISHEDDS, REQUESTFILEACCESS, GRANTFILEACCESS, REJECTFILEACCESS, FILESYSTEMIMPORT, CHECKSUMIMPORT, CHECKSUMFAIL, CONFIRMEMAIL, APIGENERATED, INGESTCOMPLETED, INGESTCOMPLETEDWITHERRORS, - PUBLISHFAILED_PIDREG, WORKFLOW_SUCCESS, WORKFLOW_FAILURE, STATUSUPDATED, DATASETCREATED; + PUBLISHFAILED_PIDREG, WORKFLOW_SUCCESS, WORKFLOW_FAILURE, STATUSUPDATED, DATASETCREATED, DATASETMENTIONED, + GLOBUSUPLOADCOMPLETED, GLOBUSUPLOADCOMPLETEDWITHERRORS, + GLOBUSDOWNLOADCOMPLETED, GLOBUSDOWNLOADCOMPLETEDWITHERRORS; public String getDescription() { return BundleUtil.getStringFromBundle("notification.typeDescription." + this.name()); @@ -88,6 +90,8 @@ public static String toStringValue(Set typesSet) { @Column( nullable = false ) private Type type; private Long objectId; + + private String additionalInfo; @Transient private boolean displayAsRead; @@ -196,4 +200,12 @@ public void setRoleString(String roleString) { public String getLocaleSendDate() { return DateUtil.formatDate(sendDate); } + + public String getAdditionalInfo() { + return additionalInfo; + } + + public void setAdditionalInfo(String additionalInfo) { + this.additionalInfo = additionalInfo; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/UserNotificationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/UserNotificationServiceBean.java index 6792a7bedc7..947ee3ce989 100644 --- a/src/main/java/edu/harvard/iq/dataverse/UserNotificationServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/UserNotificationServiceBean.java @@ -110,12 +110,16 @@ public void sendNotification(AuthenticatedUser dataverseUser, Timestamp sendDate } public void sendNotification(AuthenticatedUser dataverseUser, Timestamp sendDate, Type type, Long objectId, String comment, AuthenticatedUser requestor, boolean isHtmlContent) { + sendNotification(dataverseUser, sendDate, type, objectId, comment, requestor, isHtmlContent, null); + } + public void sendNotification(AuthenticatedUser dataverseUser, Timestamp sendDate, Type type, Long objectId, String comment, AuthenticatedUser requestor, boolean isHtmlContent, String additionalInfo) { UserNotification userNotification = new UserNotification(); userNotification.setUser(dataverseUser); userNotification.setSendDate(sendDate); userNotification.setType(type); userNotification.setObjectId(objectId); userNotification.setRequestor(requestor); + userNotification.setAdditionalInfo(additionalInfo); if (!isEmailMuted(userNotification) && mailService.sendNotificationEmail(userNotification, comment, requestor, isHtmlContent)) { logger.fine("email was sent"); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index d2c3f68dba2..ed9a544e726 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -630,6 +630,10 @@ protected T execCommand( Command cmd ) throws WrappedResponse { return engineSvc.submit(cmd); } catch (IllegalCommandException ex) { + //for 8859 for api calls that try to update datasets with TOA out of compliance + if (ex.getMessage().toLowerCase().contains("terms of use")){ + throw new WrappedResponse(ex, conflict(ex.getMessage())); + } throw new WrappedResponse( ex, forbidden(ex.getMessage() ) ); } catch (PermissionException ex) { /** @@ -822,6 +826,10 @@ protected Response forbidden( String msg ) { return error( Status.FORBIDDEN, msg ); } + protected Response conflict( String msg ) { + return error( Status.CONFLICT, msg ); + } + protected Response badApiKey( String apiKey ) { return error(Status.UNAUTHORIZED, (apiKey != null ) ? "Bad api key " : "Please provide a key query parameter (?key=XXX) or via the HTTP header " + DATAVERSE_KEY_HEADER_NAME); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index b2a8da3af4c..abeedf23b59 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -278,7 +278,7 @@ private DataFile findDataFileOrDieWrapper(String fileId){ @Path("datafile/{fileId:.+}") @GET @Produces({"application/xml"}) - public DownloadInstance datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { // check first if there's a trailing slash, and chop it: while (fileId.lastIndexOf('/') == fileId.length() - 1) { @@ -332,6 +332,11 @@ public DownloadInstance datafile(@PathParam("fileId") String fileId, @QueryParam dInfo.addServiceAvailable(new OptionalAccessService("preprocessed", "application/json", "format=prep", "Preprocessed data in JSON")); dInfo.addServiceAvailable(new OptionalAccessService("subset", "text/tab-separated-values", "variables=<LIST>", "Column-wise Subsetting")); } + + if(systemConfig.isGlobusFileDownload() && systemConfig.getGlobusStoresList().contains(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) { + dInfo.addServiceAvailable(new OptionalAccessService("GlobusTransfer", df.getContentType(), "format=GlobusTransfer", "Download via Globus")); + } + DownloadInstance downloadInstance = new DownloadInstance(dInfo); downloadInstance.setRequestUriInfo(uriInfo); downloadInstance.setRequestHttpHeaders(headers); @@ -423,7 +428,10 @@ public DownloadInstance datafile(@PathParam("fileId") String fileId, @QueryParam /* * Provide some browser-friendly headers: (?) */ - return downloadInstance; + if (headers.getRequestHeaders().containsKey("Range")) { + return Response.status(Response.Status.PARTIAL_CONTENT).entity(downloadInstance).build(); + } + return Response.ok(downloadInstance).build(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 78ec4a6edb5..ef08444af69 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -105,9 +105,6 @@ import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.rolesToJson; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; -import java.math.BigDecimal; - - import java.util.ArrayList; import java.util.Arrays; import java.util.Date; @@ -1805,31 +1802,44 @@ public Response validateDataFileHashValue(@PathParam("fileId") String fileId) { } - @GET - @Path("/submitDataVersionToArchive/{id}/{version}") - public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, @PathParam("version") String versionNumber) { + @POST + @Path("/submitDatasetVersionToArchive/{id}/{version}") + public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, + @PathParam("version") String versionNumber) { try { AuthenticatedUser au = findAuthenticatedUserOrDie(); - // Note - the user is being set in the session so it becomes part of the - // DataverseRequest and is sent to the back-end command where it is used to get - // the API Token which is then used to retrieve files (e.g. via S3 direct - // downloads) to create the Bag - session.setUser(au); // TODO: Stop using session. Use createDataverseRequest instead. + Dataset ds = findDatasetOrDie(dsid); DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); if (dv.getArchivalCopyLocation() == null) { String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); - AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); + // Note - the user is being sent via the createDataverseRequest(au) call to the + // back-end command where it is used to get the API Token which is + // then used to retrieve files (e.g. via S3 direct downloads) to create the Bag + AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, + createDataverseRequest(au), dv); + // createSubmitToArchiveCommand() tries to find and instantiate an non-abstract + // implementation of AbstractSubmitToArchiveCommand based on the provided + // className. If a class with that name isn't found (or can't be instatiated), it + // will return null if (cmd != null) { + if(ArchiverUtil.onlySingleVersionArchiving(cmd.getClass(), settingsService)) { + for (DatasetVersion version : ds.getVersions()) { + if ((dv != version) && version.getArchivalCopyLocation() != null) { + return error(Status.CONFLICT, "Dataset already archived."); + } + } + } new Thread(new Runnable() { public void run() { try { DatasetVersion dv = commandEngine.submit(cmd); - if (dv.getArchivalCopyLocation() != null) { - logger.info("DatasetVersion id=" + ds.getGlobalId().toString() + " v" + versionNumber + " submitted to Archive at: " - + dv.getArchivalCopyLocation()); + if (!dv.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { + logger.info( + "DatasetVersion id=" + ds.getGlobalId().toString() + " v" + versionNumber + + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); } else { logger.severe("Error submitting version due to conflict/error at Archive"); } @@ -1838,13 +1848,105 @@ public void run() { } } }).start(); - return ok("Archive submission using " + cmd.getClass().getCanonicalName() + " started. Processing can take significant time for large datasets. View log and/or check archive for results."); + return ok("Archive submission using " + cmd.getClass().getCanonicalName() + + " started. Processing can take significant time for large datasets and requires that the user have permission to publish the dataset. View log and/or check archive for results."); + } else { + logger.log(Level.SEVERE, "Could not find Archiver class: " + className); + return error(Status.INTERNAL_SERVER_ERROR, "Could not find Archiver class: " + className); + } + } else { + return error(Status.BAD_REQUEST, "Version was already submitted for archiving."); + } + } catch (WrappedResponse e1) { + return error(Status.UNAUTHORIZED, "api key required"); + } + } + + + /** + * Iteratively archives all unarchived dataset versions + * @param + * listonly - don't archive, just list unarchived versions + * limit - max number to process + * lastestonly - only archive the latest versions + * @return + */ + @POST + @Path("/archiveAllUnarchivedDatasetVersions") + public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") boolean listonly, @QueryParam("limit") Integer limit, @QueryParam("latestonly") boolean latestonly) { + + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + + List dsl = datasetversionService.getUnarchivedDatasetVersions(); + if (dsl != null) { + if (listonly) { + JsonArrayBuilder jab = Json.createArrayBuilder(); + logger.fine("Unarchived versions found: "); + int current = 0; + for (DatasetVersion dv : dsl) { + if (limit != null && current >= limit) { + break; + } + if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { + jab.add(dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); + logger.fine(" " + dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); + current++; + } + } + return ok(jab); + } + String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); + // Note - the user is being sent via the createDataverseRequest(au) call to the + // back-end command where it is used to get the API Token which is + // then used to retrieve files (e.g. via S3 direct downloads) to create the Bag + final DataverseRequest request = createDataverseRequest(au); + // createSubmitToArchiveCommand() tries to find and instantiate an non-abstract + // implementation of AbstractSubmitToArchiveCommand based on the provided + // className. If a class with that name isn't found (or can't be instatiated, it + // will return null + AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, request, dsl.get(0)); + if (cmd != null) { + //Found an archiver to use + new Thread(new Runnable() { + public void run() { + int total = dsl.size(); + int successes = 0; + int failures = 0; + for (DatasetVersion dv : dsl) { + if (limit != null && (successes + failures) >= limit) { + break; + } + if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { + try { + AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, request, dv); + + dv = commandEngine.submit(cmd); + if (!dv.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { + successes++; + logger.info("DatasetVersion id=" + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber() + " submitted to Archive, status: " + + dv.getArchivalCopyLocationStatus()); + } else { + failures++; + logger.severe("Error submitting version due to conflict/error at Archive for " + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber()); + } + } catch (CommandException ex) { + failures++; + logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); + } + } + logger.fine(successes + failures + " of " + total + " archive submissions complete"); + } + logger.info("Archiving complete: " + successes + " Successes, " + failures + " Failures. See prior log messages for details."); + } + }).start(); + return ok("Starting to archive all unarchived published dataset versions using " + cmd.getClass().getCanonicalName() + ". Processing can take significant time for large datasets/ large numbers of dataset versions and requires that the user have permission to publish the dataset(s). View log and/or check archive for results."); } else { logger.log(Level.SEVERE, "Could not find Archiver class: " + className); return error(Status.INTERNAL_SERVER_ERROR, "Could not find Archiver class: " + className); } } else { - return error(Status.BAD_REQUEST, "Version already archived at: " + dv.getArchivalCopyLocation()); + return error(Status.BAD_REQUEST, "No unarchived published dataset versions found"); } } catch (WrappedResponse e1) { return error(Status.UNAUTHORIZED, "api key required"); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/ApiBlockingFilter.java b/src/main/java/edu/harvard/iq/dataverse/api/ApiBlockingFilter.java index 6f7a1d876a1..6bf852d25f7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/ApiBlockingFilter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/ApiBlockingFilter.java @@ -163,7 +163,8 @@ public void doFilter(ServletRequest sr, ServletResponse sr1, FilterChain fc) thr if (settingsSvc.isTrueForKey(SettingsServiceBean.Key.AllowCors, true )) { ((HttpServletResponse) sr1).addHeader("Access-Control-Allow-Origin", "*"); ((HttpServletResponse) sr1).addHeader("Access-Control-Allow-Methods", "PUT, GET, POST, DELETE, OPTIONS"); - ((HttpServletResponse) sr1).addHeader("Access-Control-Allow-Headers", "Accept, Content-Type, X-Dataverse-Key"); + ((HttpServletResponse) sr1).addHeader("Access-Control-Allow-Headers", "Accept, Content-Type, X-Dataverse-Key, Range"); + ((HttpServletResponse) sr1).addHeader("Access-Control-Expose-Headers", "Accept-Ranges, Content-Range, Content-Encoding"); } fc.doFilter(sr, sr1); } catch ( ServletException se ) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 153d3f266b1..aff543e643c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -7,6 +7,7 @@ import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.RoleAssignee; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; @@ -59,6 +60,7 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; +import edu.harvard.iq.dataverse.S3PackageImporter; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; @@ -82,11 +84,13 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.EjbUtil; import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.MarkupChecker; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.search.IndexServiceBean; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @@ -96,6 +100,8 @@ import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean; import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; + import java.io.IOException; import java.io.InputStream; import java.io.StringReader; @@ -105,9 +111,10 @@ import java.text.SimpleDateFormat; import java.time.LocalDate; import java.time.LocalDateTime; +import java.util.*; +import java.util.concurrent.*; import java.time.ZoneId; import java.time.format.DateTimeFormatter; -import java.util.*; import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; @@ -115,7 +122,6 @@ import javax.ejb.EJB; import javax.ejb.EJBException; -import javax.faces.context.FacesContext; import javax.inject.Inject; import javax.json.*; import javax.json.stream.JsonParsingException; @@ -133,10 +139,7 @@ import javax.ws.rs.PathParam; import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; -import javax.ws.rs.core.Context; -import javax.ws.rs.core.HttpHeaders; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; +import javax.ws.rs.core.*; import javax.ws.rs.core.Response.Status; import static javax.ws.rs.core.Response.Status.BAD_REQUEST; import javax.ws.rs.core.UriInfo; @@ -161,6 +164,9 @@ public class Datasets extends AbstractApiBean { @EJB DataverseServiceBean dataverseService; + @EJB + GlobusServiceBean globusService; + @EJB UserNotificationServiceBean userNotificationService; @@ -216,6 +222,9 @@ public class Datasets extends AbstractApiBean { @Inject DataverseRoleServiceBean dataverseRoleService; + @EJB + DatasetVersionServiceBean datasetversionService; + /** * Used to consolidate the way we parse and handle dataset versions. * @param @@ -425,7 +434,7 @@ public Response setCitationDate( @PathParam("id") String id, String dsfTypeName) execCommand(new SetDatasetCitationDateCommand(req, findDatasetOrDie(id), dsfType)); return ok("Citation Date for dataset " + id + " set to: " + (dsfType != null ? dsfType.getDisplayName() : "default")); }); - } + } @DELETE @Path("{id}/citationdate") @@ -434,7 +443,7 @@ public Response useDefaultCitationDate( @PathParam("id") String id) { execCommand(new SetDatasetCitationDateCommand(req, findDatasetOrDie(id), null)); return ok("Citation Date for dataset " + id + " set to default"); }); - } + } @GET @Path("{id}/versions") @@ -450,9 +459,9 @@ public Response listVersions( @PathParam("id") String id ) { @Path("{id}/versions/{versionId}") public Response getVersion( @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { return response( req -> { - DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); + DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); return (dsv == null || dsv.getId() == null) ? notFound("Dataset version not found") - : ok(json(dsv)); + : ok(json(dsv)); }); } @@ -469,9 +478,9 @@ public Response getVersionFiles( @PathParam("id") String datasetId, @PathParam(" public Response getFileAccessFolderView(@PathParam("id") String datasetId, @QueryParam("version") String versionId, @QueryParam("folder") String folderName, @QueryParam("original") Boolean originals, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { folderName = folderName == null ? "" : folderName; - versionId = versionId == null ? ":latest-published" : versionId; + versionId = versionId == null ? ":latest-published" : versionId; - DatasetVersion version; + DatasetVersion version; try { DataverseRequest req = createDataverseRequest(findUserOrDie()); version = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); @@ -583,7 +592,7 @@ public Response updateDatasetPIDMetadataAll() { } catch (WrappedResponse ex) { Logger.getLogger(Datasets.class.getName()).log(Level.SEVERE, null, ex); } - }); + }); return ok(BundleUtil.getStringFromBundle("datasets.api.updatePIDMetadata.success.for.update.all")); }); } @@ -592,7 +601,7 @@ public Response updateDatasetPIDMetadataAll() { @Path("{id}/versions/{versionId}") @Consumes(MediaType.APPLICATION_JSON) public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, @PathParam("versionId") String versionId ){ - + if ( ! ":draft".equals(versionId) ) { return error( Response.Status.BAD_REQUEST, "Only the :draft version can be updated"); } @@ -620,14 +629,22 @@ public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, boolean updateDraft = ds.getLatestVersion().isDraft(); DatasetVersion managedVersion; - if ( updateDraft ) { + if (updateDraft) { final DatasetVersion editVersion = ds.getEditVersion(); editVersion.setDatasetFields(incomingVersion.getDatasetFields()); - editVersion.setTermsOfUseAndAccess( incomingVersion.getTermsOfUseAndAccess() ); + editVersion.setTermsOfUseAndAccess(incomingVersion.getTermsOfUseAndAccess()); editVersion.getTermsOfUseAndAccess().setDatasetVersion(editVersion); + boolean hasValidTerms = TermsOfUseAndAccessValidator.isTOUAValid(editVersion.getTermsOfUseAndAccess(), null); + if (!hasValidTerms) { + return error(Status.CONFLICT, BundleUtil.getStringFromBundle("dataset.message.toua.invalid")); + } Dataset managedDataset = execCommand(new UpdateDatasetVersionCommand(ds, req)); managedVersion = managedDataset.getEditVersion(); } else { + boolean hasValidTerms = TermsOfUseAndAccessValidator.isTOUAValid(incomingVersion.getTermsOfUseAndAccess(), null); + if (!hasValidTerms) { + return error(Status.CONFLICT, BundleUtil.getStringFromBundle("dataset.message.toua.invalid")); + } managedVersion = execCommand(new CreateDatasetVersionCommand(req, ds, incomingVersion)); } // DatasetVersion managedVersion = execCommand( updateDraft @@ -685,6 +702,10 @@ public Response updateVersionMetadata(String jsonLDBody, @PathParam("id") String boolean updateDraft = ds.getLatestVersion().isDraft(); dsv = JSONLDUtil.updateDatasetVersionMDFromJsonLD(dsv, jsonLDBody, metadataBlockService, datasetFieldSvc, !replaceTerms, false, licenseSvc); dsv.getTermsOfUseAndAccess().setDatasetVersion(dsv); + boolean hasValidTerms = TermsOfUseAndAccessValidator.isTOUAValid(dsv.getTermsOfUseAndAccess(), null); + if (!hasValidTerms) { + return error(Status.CONFLICT, BundleUtil.getStringFromBundle("dataset.message.toua.invalid")); + } DatasetVersion managedVersion; if (updateDraft) { Dataset managedDataset = execCommand(new UpdateDatasetVersionCommand(ds, req)); @@ -771,7 +792,7 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav boolean found = false; for (DatasetField dsf : dsv.getDatasetFields()) { if (dsf.getDatasetFieldType().equals(updateField.getDatasetFieldType())) { - if (dsf.getDatasetFieldType().isAllowMultiples()) { + if (dsf.getDatasetFieldType().isAllowMultiples()) { if (updateField.getDatasetFieldType().isControlledVocabulary()) { if (dsf.getDatasetFieldType().isAllowMultiples()) { for (ControlledVocabularyValue cvv : updateField.getControlledVocabularyValues()) { @@ -836,7 +857,7 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav datasetFieldCompoundValueItemsToRemove.forEach((remove) -> { dsf.getDatasetFieldCompoundValues().remove(remove); }); - if (!found) { + if (!found) { logger.log(Level.SEVERE, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + deleteVal + " not found."); return error(Response.Status.BAD_REQUEST, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + deleteVal + " not found."); } @@ -856,12 +877,11 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav logger.log(Level.SEVERE, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + displayValue + " not found." ); return error(Response.Status.BAD_REQUEST, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + displayValue + " not found." ); } - } + } - boolean updateDraft = ds.getLatestVersion().isDraft(); - DatasetVersion managedVersion = updateDraft + DatasetVersion managedVersion = updateDraft ? execCommand(new UpdateDatasetVersionCommand(ds, req)).getEditVersion() : execCommand(new CreateDatasetVersionCommand(req, ds, dsv)); return ok(json(managedVersion)); @@ -880,13 +900,13 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav private String getCompoundDisplayValue (DatasetFieldCompoundValue dscv){ String returnString = ""; - for (DatasetField dsf : dscv.getChildDatasetFields()) { - for (String value : dsf.getValues()) { - if (!(value == null)) { - returnString += (returnString.isEmpty() ? "" : "; ") + value.trim(); - } + for (DatasetField dsf : dscv.getChildDatasetFields()) { + for (String value : dsf.getValues()) { + if (!(value == null)) { + returnString += (returnString.isEmpty() ? "" : "; ") + value.trim(); } } + } return returnString; } @@ -915,13 +935,13 @@ private Response processDatasetUpdate(String jsonBody, String id, DataverseReque DatasetVersion dsv = ds.getEditVersion(); dsv.getTermsOfUseAndAccess().setDatasetVersion(dsv); List fields = new LinkedList<>(); - DatasetField singleField = null; + DatasetField singleField = null; JsonArray fieldsJson = json.getJsonArray("fields"); - if( fieldsJson == null ){ - singleField = jsonParser().parseField(json, Boolean.FALSE); + if (fieldsJson == null) { + singleField = jsonParser().parseField(json, Boolean.FALSE); fields.add(singleField); - } else{ + } else { fields = jsonParser().parseMultipleFields(json); } @@ -1082,18 +1102,24 @@ public Response publishDataset(@PathParam("id") String id, @QueryParam("type") S case "major": isMinor = false; break; - case "updatecurrent": - if(user.isSuperuser()) { - updateCurrent=true; - } else { - return error(Response.Status.FORBIDDEN, "Only superusers can update the current version"); - } - break; + case "updatecurrent": + if (user.isSuperuser()) { + updateCurrent = true; + } else { + return error(Response.Status.FORBIDDEN, "Only superusers can update the current version"); + } + break; default: - return error(Response.Status.BAD_REQUEST, "Illegal 'type' parameter value '" + type + "'. It needs to be either 'major', 'minor', or 'updatecurrent'."); + return error(Response.Status.BAD_REQUEST, "Illegal 'type' parameter value '" + type + "'. It needs to be either 'major', 'minor', or 'updatecurrent'."); } Dataset ds = findDatasetOrDie(id); + + boolean hasValidTerms = TermsOfUseAndAccessValidator.isTOUAValid(ds.getLatestVersion().getTermsOfUseAndAccess(), null); + if (!hasValidTerms) { + return error(Status.CONFLICT, BundleUtil.getStringFromBundle("dataset.message.toua.invalid")); + } + if (mustBeIndexed) { logger.fine("IT: " + ds.getIndexTime()); logger.fine("MT: " + ds.getModificationTime()); @@ -1110,7 +1136,7 @@ public Response publishDataset(@PathParam("id") String id, @QueryParam("type") S * set and if so, if it after the modification time. If the modification time is * set and the index time is null or is before the mod time, the 409/conflict * error is returned. - * + * */ if ((ds.getModificationTime()!=null && (ds.getIndexTime() == null || (ds.getIndexTime().compareTo(ds.getModificationTime()) <= 0))) || (ds.getPermissionModificationTime()!=null && (ds.getPermissionIndexTime() == null || (ds.getPermissionIndexTime().compareTo(ds.getPermissionModificationTime()) <= 0)))) { @@ -1149,7 +1175,7 @@ public Response publishDataset(@PathParam("id") String id, @QueryParam("type") S */ try { updateVersion = commandEngine.submit(archiveCommand); - if (updateVersion.getArchivalCopyLocation() != null) { + if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); } else { successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); @@ -1174,10 +1200,10 @@ public Response publishDataset(@PathParam("id") String id, @QueryParam("type") S .build(); } } else { - PublishDatasetResult res = execCommand(new PublishDatasetCommand(ds, + PublishDatasetResult res = execCommand(new PublishDatasetCommand(ds, createDataverseRequest(user), - isMinor)); - return res.isWorkflow() ? accepted(json(res.getDataset())) : ok(json(res.getDataset())); + isMinor)); + return res.isWorkflow() ? accepted(json(res.getDataset())) : ok(json(res.getDataset())); } } catch (WrappedResponse ex) { return ex.getResponse(); @@ -1278,7 +1304,7 @@ public Response publishMigratedDataset(String jsonldBody, @PathParam("id") Strin @Path("{id}/move/{targetDataverseAlias}") public Response moveDataset(@PathParam("id") String id, @PathParam("targetDataverseAlias") String targetDataverseAlias, @QueryParam("forceMove") Boolean force) { try { - User u = findUserOrDie(); + User u = findUserOrDie(); Dataset ds = findDatasetOrDie(id); Dataverse target = dataverseService.findByAlias(targetDataverseAlias); if (target == null) { @@ -1316,6 +1342,12 @@ public Response createFileEmbargo(@PathParam("id") String id, String jsonBody){ } catch (WrappedResponse ex) { return ex.getResponse(); } + + boolean hasValidTerms = TermsOfUseAndAccessValidator.isTOUAValid(dataset.getLatestVersion().getTermsOfUseAndAccess(), null); + + if (!hasValidTerms){ + return error(Status.CONFLICT, BundleUtil.getStringFromBundle("dataset.message.toua.invalid")); + } // client is superadmin or (client has EditDataset permission on these files and files are unreleased) /* @@ -1560,21 +1592,21 @@ public Response removeFileEmbargo(@PathParam("id") String id, String jsonBody){ @PUT - @Path("{linkedDatasetId}/link/{linkingDataverseAlias}") - public Response linkDataset(@PathParam("linkedDatasetId") String linkedDatasetId, @PathParam("linkingDataverseAlias") String linkingDataverseAlias) { - try{ - User u = findUserOrDie(); + @Path("{linkedDatasetId}/link/{linkingDataverseAlias}") + public Response linkDataset(@PathParam("linkedDatasetId") String linkedDatasetId, @PathParam("linkingDataverseAlias") String linkingDataverseAlias) { + try { + User u = findUserOrDie(); Dataset linked = findDatasetOrDie(linkedDatasetId); Dataverse linking = findDataverseOrDie(linkingDataverseAlias); if (linked == null){ return error(Response.Status.BAD_REQUEST, "Linked Dataset not found."); - } - if (linking == null){ + } + if (linking == null) { return error(Response.Status.BAD_REQUEST, "Linking Dataverse not found."); - } + } execCommand(new LinkDatasetCommand( createDataverseRequest(u), linking, linked - )); + )); return ok("Dataset " + linked.getId() + " linked successfully to " + linking.getAlias()); } catch (WrappedResponse ex) { return ex.getResponse(); @@ -1588,8 +1620,7 @@ public Response getCustomTermsTab(@PathParam("id") String id, @PathParam("versio User user = session.getUser(); String persistentId; try { - if (getDatasetVersionOrDie(createDataverseRequest(user), versionId, findDatasetOrDie(id), uriInfo, headers) - .getTermsOfUseAndAccess().getLicense() != null) { + if (DatasetUtil.getLicense(getDatasetVersionOrDie(createDataverseRequest(user), versionId, findDatasetOrDie(id), uriInfo, headers)) != null) { return error(Status.NOT_FOUND, "This Dataset has no custom license"); } persistentId = getRequestParameter(":persistentId".substring(1)); @@ -1630,8 +1661,8 @@ public Response getLinks(@PathParam("id") String idSupplied ) { /** * Add a given assignment to a given user or group - * @param ra role assignment DTO - * @param id dataset id + * @param ra role assignment DTO + * @param id dataset id * @param apiKey */ @POST @@ -1643,7 +1674,7 @@ public Response createAssignment(RoleAssignmentDTO ra, @PathParam("identifier") RoleAssignee assignee = findAssignee(ra.getAssignee()); if (assignee == null) { return error(Response.Status.BAD_REQUEST, BundleUtil.getStringFromBundle("datasets.api.grant.role.assignee.not.found.error")); - } + } DataverseRole theRole; Dataverse dv = dataset.getOwner(); @@ -1695,10 +1726,10 @@ public Response deleteAssignment(@PathParam("id") long assignmentId, @PathParam( @GET @Path("{identifier}/assignments") public Response getAssignments(@PathParam("identifier") String id) { - return response( req -> - ok( execCommand( - new ListRoleAssignments(req, findDatasetOrDie(id))) - .stream().map(ra->json(ra)).collect(toJsonArray())) ); + return response(req -> + ok(execCommand( + new ListRoleAssignments(req, findDatasetOrDie(id))) + .stream().map(ra -> json(ra)).collect(toJsonArray()))); } @GET @@ -1706,8 +1737,8 @@ public Response getAssignments(@PathParam("identifier") String id) { public Response getPrivateUrlData(@PathParam("id") String idSupplied) { return response( req -> { PrivateUrl privateUrl = execCommand(new GetPrivateUrlCommand(req, findDatasetOrDie(idSupplied))); - return (privateUrl != null) ? ok(json(privateUrl)) - : error(Response.Status.NOT_FOUND, "Private URL not found."); + return (privateUrl != null) ? ok(json(privateUrl)) + : error(Response.Status.NOT_FOUND, "Private URL not found."); }); } @@ -1717,7 +1748,7 @@ public Response createPrivateUrl(@PathParam("id") String idSupplied,@DefaultValu if(anonymizedAccess && settingsSvc.getValueForKey(SettingsServiceBean.Key.AnonymizedFieldTypeNames)==null) { throw new NotAcceptableException("Anonymized Access not enabled"); } - return response( req -> + return response(req -> ok(json(execCommand( new CreatePrivateUrlCommand(req, findDatasetOrDie(idSupplied), anonymizedAccess))))); } @@ -1851,13 +1882,13 @@ public Response getRsync(@PathParam("identifier") String id) { } /** - * This api endpoint triggers the creation of a "package" file in a dataset - * after that package has been moved onto the same filesystem via the Data Capture Module. + * This api endpoint triggers the creation of a "package" file in a dataset + * after that package has been moved onto the same filesystem via the Data Capture Module. * The package is really just a way that Dataverse interprets a folder created by DCM, seeing it as just one file. * The "package" can be downloaded over RSAL. - * + * * This endpoint currently supports both posix file storage and AWS s3 storage in Dataverse, and depending on which one is active acts accordingly. - * + * * The initial design of the DCM/Dataverse interaction was not to use packages, but to allow import of all individual files natively into Dataverse. * But due to the possibly immense number of files (millions) the package approach was taken. * This is relevant because the posix ("file") code contains many remnants of that development work. @@ -1881,7 +1912,7 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String try { Dataset dataset = findDatasetOrDie(id); if ("validation passed".equals(statusMessageFromDcm)) { - logger.log(Level.INFO, "Checksum Validation passed for DCM."); + logger.log(Level.INFO, "Checksum Validation passed for DCM."); String storageDriver = dataset.getDataverseContext().getEffectiveStorageDriverId(); String uploadFolder = jsonFromDcm.getString("uploadFolder"); @@ -1904,7 +1935,7 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String String message = wr.getMessage(); return error(Response.Status.INTERNAL_SERVER_ERROR, "Uploaded files have passed checksum validation but something went wrong while attempting to put the files into Dataverse. Message was '" + message + "'."); } - } else if(storageDriverType.equals("s3")) { + } else if(storageDriverType.equals(DataAccess.S3)) { logger.log(Level.INFO, "S3 storage driver used for DCM (dataset id={0})", dataset.getId()); try { @@ -1943,10 +1974,10 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String JsonObjectBuilder job = Json.createObjectBuilder(); return ok(job); - } catch (IOException e) { + } catch (IOException e) { String message = e.getMessage(); return error(Response.Status.INTERNAL_SERVER_ERROR, "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" + message + "'."); - } + } } else { return error(Response.Status.INTERNAL_SERVER_ERROR, "Invalid storage driver in Dataverse, not compatible with dcm"); } @@ -1999,7 +2030,7 @@ public Response returnToAuthor(@PathParam("id") String idSupplied, String jsonBo JsonObject json = Json.createReader(rdr).readObject(); try { Dataset dataset = findDatasetOrDie(idSupplied); - String reasonForReturn = null; + String reasonForReturn = null; reasonForReturn = json.getString("reasonForReturn"); // TODO: Once we add a box for the curator to type into, pass the reason for return to the ReturnDatasetToAuthorCommand and delete this check and call to setReturnReason on the API side. if (reasonForReturn == null || reasonForReturn.isEmpty()) { @@ -2052,7 +2083,7 @@ public Response setCurationStatus(@PathParam("id") String idSupplied, @QueryPara return Response.fromResponse(wr.getResponse()).status(Response.Status.BAD_REQUEST).build(); } } - + @DELETE @Path("{id}/curationStatus") public Response deleteCurationStatus(@PathParam("id") String idSupplied) { @@ -2072,228 +2103,228 @@ public Response deleteCurationStatus(@PathParam("id") String idSupplied) { return Response.fromResponse(wr.getResponse()).status(Response.Status.BAD_REQUEST).build(); } } - -@GET -@Path("{id}/uploadsid") -@Deprecated -public Response getUploadUrl(@PathParam("id") String idSupplied) { - try { - Dataset dataset = findDatasetOrDie(idSupplied); - - boolean canUpdateDataset = false; - try { - canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); - } catch (WrappedResponse ex) { - logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } - if (!canUpdateDataset) { - return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); - } - S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); - if(s3io == null) { - return error(Response.Status.NOT_FOUND,"Direct upload not supported for files in this dataset: " + dataset.getId()); - } - String url = null; - String storageIdentifier = null; - try { - url = s3io.generateTemporaryS3UploadUrl(); - storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); - } catch (IOException io) { - logger.warning(io.getMessage()); - throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); - } - - JsonObjectBuilder response = Json.createObjectBuilder() - .add("url", url) - .add("storageIdentifier", storageIdentifier ); - return ok(response); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } -} -@GET -@Path("{id}/uploadurls") -public Response getMPUploadUrls(@PathParam("id") String idSupplied, @QueryParam("size") long fileSize) { - try { - Dataset dataset = findDatasetOrDie(idSupplied); - - boolean canUpdateDataset = false; - try { - canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset) - .canIssue(UpdateDatasetVersionCommand.class); - } catch (WrappedResponse ex) { - logger.info( - "Exception thrown while trying to figure out permissions while getting upload URLs for dataset id " - + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } - if (!canUpdateDataset) { - return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); - } - S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); - if (s3io == null) { - return error(Response.Status.NOT_FOUND, - "Direct upload not supported for files in this dataset: " + dataset.getId()); - } - JsonObjectBuilder response = null; - String storageIdentifier = null; - try { - storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); - response = s3io.generateTemporaryS3UploadUrls(dataset.getGlobalId().asString(), storageIdentifier, fileSize); - - } catch (IOException io) { - logger.warning(io.getMessage()); - throw new WrappedResponse(io, - error(Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); - } - - response.add("storageIdentifier", storageIdentifier); - return ok(response); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } -} + @GET + @Path("{id}/uploadsid") + @Deprecated + public Response getUploadUrl(@PathParam("id") String idSupplied) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); -@DELETE -@Path("mpupload") -public Response abortMPUpload(@QueryParam("globalid") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { - try { - Dataset dataset = datasetSvc.findByGlobalId(idSupplied); - //Allow the API to be used within a session (e.g. for direct upload in the UI) - User user =session.getUser(); - if (!user.isAuthenticated()) { - try { - user = findAuthenticatedUserOrDie(); - } catch (WrappedResponse ex) { - logger.info( - "Exception thrown while trying to figure out permissions while getting aborting upload for dataset id " - + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } - } - boolean allowed = false; - if (dataset != null) { - allowed = permissionSvc.requestOn(createDataverseRequest(user), dataset) - .canIssue(UpdateDatasetVersionCommand.class); - } else { - /* - * The only legitimate case where a global id won't correspond to a dataset is - * for uploads during creation. Given that this call will still fail unless all - * three parameters correspond to an active multipart upload, it should be safe - * to allow the attempt for an authenticated user. If there are concerns about - * permissions, one could check with the current design that the user is allowed - * to create datasets in some dataverse that is configured to use the storage - * provider specified in the storageidentifier, but testing for the ability to - * create a dataset in a specific dataverse would requiring changing the design - * somehow (e.g. adding the ownerId to this call). - */ - allowed = true; - } - if (!allowed) { - return error(Response.Status.FORBIDDEN, - "You are not permitted to abort file uploads with the supplied parameters."); - } - try { - S3AccessIO.abortMultipartUpload(idSupplied, storageidentifier, uploadId); - } catch (IOException io) { - logger.warning("Multipart upload abort failed for uploadId: " + uploadId + " storageidentifier=" - + storageidentifier + " dataset Id: " + dataset.getId()); - logger.warning(io.getMessage()); - throw new WrappedResponse(io, - error(Response.Status.INTERNAL_SERVER_ERROR, "Could not abort multipart upload")); - } - return Response.noContent().build(); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } -} + boolean canUpdateDataset = false; + try { + canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); + } catch (WrappedResponse ex) { + logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); + throw ex; + } + if (!canUpdateDataset) { + return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); + } + S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); + if (s3io == null) { + return error(Response.Status.NOT_FOUND, "Direct upload not supported for files in this dataset: " + dataset.getId()); + } + String url = null; + String storageIdentifier = null; + try { + url = s3io.generateTemporaryS3UploadUrl(); + storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); + } catch (IOException io) { + logger.warning(io.getMessage()); + throw new WrappedResponse(io, error(Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); + } -@PUT -@Path("mpupload") -public Response completeMPUpload(String partETagBody, @QueryParam("globalid") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { - try { - Dataset dataset = datasetSvc.findByGlobalId(idSupplied); - //Allow the API to be used within a session (e.g. for direct upload in the UI) - User user =session.getUser(); - if (!user.isAuthenticated()) { - try { - user=findAuthenticatedUserOrDie(); - } catch (WrappedResponse ex) { - logger.info( - "Exception thrown while trying to figure out permissions to complete mpupload for dataset id " - + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } - } - boolean allowed = false; - if (dataset != null) { - allowed = permissionSvc.requestOn(createDataverseRequest(user), dataset) - .canIssue(UpdateDatasetVersionCommand.class); - } else { - /* - * The only legitimate case where a global id won't correspond to a dataset is - * for uploads during creation. Given that this call will still fail unless all - * three parameters correspond to an active multipart upload, it should be safe - * to allow the attempt for an authenticated user. If there are concerns about - * permissions, one could check with the current design that the user is allowed - * to create datasets in some dataverse that is configured to use the storage - * provider specified in the storageidentifier, but testing for the ability to - * create a dataset in a specific dataverse would requiring changing the design - * somehow (e.g. adding the ownerId to this call). - */ - allowed = true; - } - if (!allowed) { - return error(Response.Status.FORBIDDEN, - "You are not permitted to complete file uploads with the supplied parameters."); - } - List eTagList = new ArrayList(); - logger.info("Etags: " + partETagBody); - try { - JsonReader jsonReader = Json.createReader(new StringReader(partETagBody)); - JsonObject object = jsonReader.readObject(); - jsonReader.close(); - for(String partNo : object.keySet()) { - eTagList.add(new PartETag(Integer.parseInt(partNo), object.getString(partNo))); - } - for(PartETag et: eTagList) { - logger.info("Part: " + et.getPartNumber() + " : " + et.getETag()); - } - } catch (JsonException je) { - logger.info("Unable to parse eTags from: " + partETagBody); - throw new WrappedResponse(je, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); - } - try { - S3AccessIO.completeMultipartUpload(idSupplied, storageidentifier, uploadId, eTagList); - } catch (IOException io) { - logger.warning("Multipart upload completion failed for uploadId: " + uploadId +" storageidentifier=" + storageidentifier + " globalId: " + idSupplied); - logger.warning(io.getMessage()); - try { - S3AccessIO.abortMultipartUpload(idSupplied, storageidentifier, uploadId); - } catch (IOException e) { - logger.severe("Also unable to abort the upload (and release the space on S3 for uploadId: " + uploadId +" storageidentifier=" + storageidentifier + " globalId: " + idSupplied); - logger.severe(io.getMessage()); - } - - throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); - } - return ok("Multipart Upload completed"); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } -} + JsonObjectBuilder response = Json.createObjectBuilder() + .add("url", url) + .add("storageIdentifier", storageIdentifier); + return ok(response); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + + @GET + @Path("{id}/uploadurls") + public Response getMPUploadUrls(@PathParam("id") String idSupplied, @QueryParam("size") long fileSize) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); + + boolean canUpdateDataset = false; + try { + canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset) + .canIssue(UpdateDatasetVersionCommand.class); + } catch (WrappedResponse ex) { + logger.info( + "Exception thrown while trying to figure out permissions while getting upload URLs for dataset id " + + dataset.getId() + ": " + ex.getLocalizedMessage()); + throw ex; + } + if (!canUpdateDataset) { + return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); + } + S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); + if (s3io == null) { + return error(Response.Status.NOT_FOUND, + "Direct upload not supported for files in this dataset: " + dataset.getId()); + } + JsonObjectBuilder response = null; + String storageIdentifier = null; + try { + storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); + response = s3io.generateTemporaryS3UploadUrls(dataset.getGlobalId().asString(), storageIdentifier, fileSize); + + } catch (IOException io) { + logger.warning(io.getMessage()); + throw new WrappedResponse(io, + error(Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); + } + + response.add("storageIdentifier", storageIdentifier); + return ok(response); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + + @DELETE + @Path("mpupload") + public Response abortMPUpload(@QueryParam("globalid") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { + try { + Dataset dataset = datasetSvc.findByGlobalId(idSupplied); + //Allow the API to be used within a session (e.g. for direct upload in the UI) + User user = session.getUser(); + if (!user.isAuthenticated()) { + try { + user = findAuthenticatedUserOrDie(); + } catch (WrappedResponse ex) { + logger.info( + "Exception thrown while trying to figure out permissions while getting aborting upload for dataset id " + + dataset.getId() + ": " + ex.getLocalizedMessage()); + throw ex; + } + } + boolean allowed = false; + if (dataset != null) { + allowed = permissionSvc.requestOn(createDataverseRequest(user), dataset) + .canIssue(UpdateDatasetVersionCommand.class); + } else { + /* + * The only legitimate case where a global id won't correspond to a dataset is + * for uploads during creation. Given that this call will still fail unless all + * three parameters correspond to an active multipart upload, it should be safe + * to allow the attempt for an authenticated user. If there are concerns about + * permissions, one could check with the current design that the user is allowed + * to create datasets in some dataverse that is configured to use the storage + * provider specified in the storageidentifier, but testing for the ability to + * create a dataset in a specific dataverse would requiring changing the design + * somehow (e.g. adding the ownerId to this call). + */ + allowed = true; + } + if (!allowed) { + return error(Response.Status.FORBIDDEN, + "You are not permitted to abort file uploads with the supplied parameters."); + } + try { + S3AccessIO.abortMultipartUpload(idSupplied, storageidentifier, uploadId); + } catch (IOException io) { + logger.warning("Multipart upload abort failed for uploadId: " + uploadId + " storageidentifier=" + + storageidentifier + " dataset Id: " + dataset.getId()); + logger.warning(io.getMessage()); + throw new WrappedResponse(io, + error(Response.Status.INTERNAL_SERVER_ERROR, "Could not abort multipart upload")); + } + return Response.noContent().build(); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + + @PUT + @Path("mpupload") + public Response completeMPUpload(String partETagBody, @QueryParam("globalid") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { + try { + Dataset dataset = datasetSvc.findByGlobalId(idSupplied); + //Allow the API to be used within a session (e.g. for direct upload in the UI) + User user = session.getUser(); + if (!user.isAuthenticated()) { + try { + user = findAuthenticatedUserOrDie(); + } catch (WrappedResponse ex) { + logger.info( + "Exception thrown while trying to figure out permissions to complete mpupload for dataset id " + + dataset.getId() + ": " + ex.getLocalizedMessage()); + throw ex; + } + } + boolean allowed = false; + if (dataset != null) { + allowed = permissionSvc.requestOn(createDataverseRequest(user), dataset) + .canIssue(UpdateDatasetVersionCommand.class); + } else { + /* + * The only legitimate case where a global id won't correspond to a dataset is + * for uploads during creation. Given that this call will still fail unless all + * three parameters correspond to an active multipart upload, it should be safe + * to allow the attempt for an authenticated user. If there are concerns about + * permissions, one could check with the current design that the user is allowed + * to create datasets in some dataverse that is configured to use the storage + * provider specified in the storageidentifier, but testing for the ability to + * create a dataset in a specific dataverse would requiring changing the design + * somehow (e.g. adding the ownerId to this call). + */ + allowed = true; + } + if (!allowed) { + return error(Response.Status.FORBIDDEN, + "You are not permitted to complete file uploads with the supplied parameters."); + } + List eTagList = new ArrayList(); + logger.info("Etags: " + partETagBody); + try { + JsonReader jsonReader = Json.createReader(new StringReader(partETagBody)); + JsonObject object = jsonReader.readObject(); + jsonReader.close(); + for (String partNo : object.keySet()) { + eTagList.add(new PartETag(Integer.parseInt(partNo), object.getString(partNo))); + } + for (PartETag et : eTagList) { + logger.info("Part: " + et.getPartNumber() + " : " + et.getETag()); + } + } catch (JsonException je) { + logger.info("Unable to parse eTags from: " + partETagBody); + throw new WrappedResponse(je, error(Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); + } + try { + S3AccessIO.completeMultipartUpload(idSupplied, storageidentifier, uploadId, eTagList); + } catch (IOException io) { + logger.warning("Multipart upload completion failed for uploadId: " + uploadId + " storageidentifier=" + storageidentifier + " globalId: " + idSupplied); + logger.warning(io.getMessage()); + try { + S3AccessIO.abortMultipartUpload(idSupplied, storageidentifier, uploadId); + } catch (IOException e) { + logger.severe("Also unable to abort the upload (and release the space on S3 for uploadId: " + uploadId + " storageidentifier=" + storageidentifier + " globalId: " + idSupplied); + logger.severe(io.getMessage()); + } + + throw new WrappedResponse(io, error(Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); + } + return ok("Multipart Upload completed"); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } /** * Add a File to an existing Dataset - * + * * @param idSupplied * @param jsonData * @param fileInputStream * @param contentDispositionHeader * @param formDataBodyPart - * @return + * @return */ @POST @Path("{id}/add") @@ -2318,7 +2349,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } catch (WrappedResponse ex) { return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") - ); + ); } @@ -2331,7 +2362,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, try { dataset = findDatasetOrDie(idSupplied); } catch (WrappedResponse wr) { - return wr.getResponse(); + return wr.getResponse(); } //------------------------------------ @@ -2350,12 +2381,12 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, // (2a) Load up optional params via JSON //--------------------------------------- OptionalFileParams optionalFileParams = null; - msgt("(api) jsonData: " + jsonData); + msgt("(api) jsonData: " + jsonData); try { optionalFileParams = new OptionalFileParams(jsonData); } catch (DataFileTagException ex) { - return error( Response.Status.BAD_REQUEST, ex.getMessage()); + return error(Response.Status.BAD_REQUEST, ex.getMessage()); } catch (ClassCastException | com.google.gson.JsonParseException ex) { return error(Response.Status.BAD_REQUEST, BundleUtil.getStringFromBundle("file.addreplace.error.parsing")); @@ -2367,42 +2398,47 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, String newFilename = null; String newFileContentType = null; String newStorageIdentifier = null; - if (null == contentDispositionHeader) { - if (optionalFileParams.hasStorageIdentifier()) { - newStorageIdentifier = optionalFileParams.getStorageIdentifier(); - // ToDo - check that storageIdentifier is valid - if (optionalFileParams.hasFileName()) { - newFilename = optionalFileParams.getFileName(); - if (optionalFileParams.hasMimetype()) { - newFileContentType = optionalFileParams.getMimeType(); - } - } - } else { - return error(BAD_REQUEST, - "You must upload a file or provide a storageidentifier, filename, and mimetype."); - } - } else { - newFilename = contentDispositionHeader.getFileName(); - // Let's see if the form data part has the mime (content) type specified. - // Note that we don't want to rely on formDataBodyPart.getMediaType() - - // because that defaults to "text/plain" when no "Content-Type:" header is - // present. Instead we'll go through the headers, and see if "Content-Type:" - // is there. If not, we'll default to "application/octet-stream" - the generic - // unknown type. This will prompt the application to run type detection and - // potentially find something more accurate. - //newFileContentType = formDataBodyPart.getMediaType().toString(); - - for (String header : formDataBodyPart.getHeaders().keySet()) { - if (header.equalsIgnoreCase("Content-Type")) { - newFileContentType = formDataBodyPart.getHeaders().get(header).get(0); - } - } - if (newFileContentType == null) { - newFileContentType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; - } - } + if (null == contentDispositionHeader) { + if (optionalFileParams.hasStorageIdentifier()) { + newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + newStorageIdentifier = DataAccess.expandStorageIdentifierIfNeeded(newStorageIdentifier); + + if(!DataAccess.uploadToDatasetAllowed(dataset, newStorageIdentifier)) { + return error(BAD_REQUEST, + "Dataset store configuration does not allow provided storageIdentifier."); + } + if (optionalFileParams.hasFileName()) { + newFilename = optionalFileParams.getFileName(); + if (optionalFileParams.hasMimetype()) { + newFileContentType = optionalFileParams.getMimeType(); + } + } + } else { + return error(BAD_REQUEST, + "You must upload a file or provide a valid storageidentifier, filename, and mimetype."); + } + } else { + newFilename = contentDispositionHeader.getFileName(); + // Let's see if the form data part has the mime (content) type specified. + // Note that we don't want to rely on formDataBodyPart.getMediaType() - + // because that defaults to "text/plain" when no "Content-Type:" header is + // present. Instead we'll go through the headers, and see if "Content-Type:" + // is there. If not, we'll default to "application/octet-stream" - the generic + // unknown type. This will prompt the application to run type detection and + // potentially find something more accurate. + // newFileContentType = formDataBodyPart.getMediaType().toString(); + + for (String header : formDataBodyPart.getHeaders().keySet()) { + if (header.equalsIgnoreCase("Content-Type")) { + newFileContentType = formDataBodyPart.getHeaders().get(header).get(0); + } + } + if (newFileContentType == null) { + newFileContentType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; + } + } + - //------------------- // (3) Create the AddReplaceFileHelper object //------------------- @@ -2410,11 +2446,11 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, DataverseRequest dvRequest2 = createDataverseRequest(authUser); AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper(dvRequest2, - ingestService, - datasetService, - fileService, - permissionSvc, - commandEngine, + ingestService, + datasetService, + fileService, + permissionSvc, + commandEngine, systemConfig, licenseSvc); @@ -2423,16 +2459,20 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, // (4) Run "runAddFileByDatasetId" //------------------- addFileHelper.runAddFileByDataset(dataset, - newFilename, - newFileContentType, - newStorageIdentifier, - fileInputStream, - optionalFileParams); + newFilename, + newFileContentType, + newStorageIdentifier, + fileInputStream, + optionalFileParams); if (addFileHelper.hasError()){ + //conflict response status added for 8859 + if (Response.Status.CONFLICT.equals(addFileHelper.getHttpErrorCode())){ + return conflict(addFileHelper.getErrorMessagesAsString("\n")); + } return error(addFileHelper.getHttpErrorCode(), addFileHelper.getErrorMessagesAsString("\n")); - }else{ + } else { String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); try { //msgt("as String: " + addFileHelper.getSuccessResult()); @@ -2458,73 +2498,79 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } } - + } // end: addFileToDataset - - private void msg(String m){ + private void msg(String m) { //System.out.println(m); logger.fine(m); } - private void dashes(){ + + private void dashes() { msg("----------------"); } - private void msgt(String m){ - dashes(); msg(m); dashes(); + + private void msgt(String m) { + dashes(); + msg(m); + dashes(); } - - - public static T handleVersion( String versionId, DsVersionHandler hdl ) - throws WrappedResponse { + + + public static T handleVersion(String versionId, DsVersionHandler hdl) + throws WrappedResponse { switch (versionId) { - case ":latest": return hdl.handleLatest(); - case ":draft": return hdl.handleDraft(); - case ":latest-published": return hdl.handleLatestPublished(); + case ":latest": + return hdl.handleLatest(); + case ":draft": + return hdl.handleDraft(); + case ":latest-published": + return hdl.handleLatestPublished(); default: try { String[] versions = versionId.split("\\."); switch (versions.length) { case 1: - return hdl.handleSpecific(Long.parseLong(versions[0]), (long)0.0); + return hdl.handleSpecific(Long.parseLong(versions[0]), (long) 0.0); case 2: - return hdl.handleSpecific( Long.parseLong(versions[0]), Long.parseLong(versions[1]) ); + return hdl.handleSpecific(Long.parseLong(versions[0]), Long.parseLong(versions[1])); default: - throw new WrappedResponse(error( Response.Status.BAD_REQUEST, "Illegal version identifier '" + versionId + "'")); + throw new WrappedResponse(error(Response.Status.BAD_REQUEST, "Illegal version identifier '" + versionId + "'")); } - } catch ( NumberFormatException nfe ) { - throw new WrappedResponse( error( Response.Status.BAD_REQUEST, "Illegal version identifier '" + versionId + "'") ); + } catch (NumberFormatException nfe) { + throw new WrappedResponse(error(Response.Status.BAD_REQUEST, "Illegal version identifier '" + versionId + "'")); } } } - - private DatasetVersion getDatasetVersionOrDie( final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers) throws WrappedResponse { - DatasetVersion dsv = execCommand( handleVersion(versionNumber, new DsVersionHandler>(){ - @Override - public Command handleLatest() { - return new GetLatestAccessibleDatasetVersionCommand(req, ds); - } + private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers) throws WrappedResponse { + DatasetVersion dsv = execCommand(handleVersion(versionNumber, new DsVersionHandler>() { - @Override - public Command handleDraft() { - return new GetDraftDatasetVersionCommand(req, ds); - } - - @Override - public Command handleSpecific(long major, long minor) { - return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor); - } + @Override + public Command handleLatest() { + return new GetLatestAccessibleDatasetVersionCommand(req, ds); + } - @Override - public Command handleLatestPublished() { - return new GetLatestPublishedDatasetVersionCommand(req, ds); - } - })); - if ( dsv == null || dsv.getId() == null ) { - throw new WrappedResponse( notFound("Dataset version " + versionNumber + " of dataset " + ds.getId() + " not found") ); + @Override + public Command handleDraft() { + return new GetDraftDatasetVersionCommand(req, ds); + } + + @Override + public Command handleSpecific(long major, long minor) { + return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor); + } + + @Override + public Command handleLatestPublished() { + return new GetLatestPublishedDatasetVersionCommand(req, ds); + } + })); + if (dsv == null || dsv.getId() == null) { + throw new WrappedResponse(notFound("Dataset version " + versionNumber + " of dataset " + ds.getId() + " not found")); } - if (dsv.isReleased()) { + if (dsv.isReleased()&& uriInfo!=null) { MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, ds); mdcLogService.logEntry(entry); } @@ -2538,14 +2584,14 @@ public Response getLocksForDataset(@PathParam("identifier") String id, @QueryPar Dataset dataset = null; try { dataset = findDatasetOrDie(id); - Set locks; + Set locks; if (lockType == null) { locks = dataset.getLocks(); } else { // request for a specific type lock: DatasetLock lock = dataset.getLockFor(lockType); - locks = new HashSet<>(); + locks = new HashSet<>(); if (lock != null) { locks.add(lock); } @@ -2555,9 +2601,9 @@ public Response getLocksForDataset(@PathParam("identifier") String id, @QueryPar } catch (WrappedResponse wr) { return wr.getResponse(); - } - } - + } + } + @DELETE @Path("{identifier}/locks") public Response deleteLocks(@PathParam("identifier") String id, @QueryParam("type") DatasetLock.Reason lockType) { @@ -2630,7 +2676,7 @@ public Response lockDataset(@PathParam("identifier") String id, @PathParam("type AuthenticatedUser user = findAuthenticatedUserOrDie(); if (!user.isSuperuser()) { return error(Response.Status.FORBIDDEN, "This API end point can be used by superusers only."); - } + } Dataset dataset = findDatasetOrDie(id); DatasetLock lock = dataset.getLockFor(lockType); if (lock != null) { @@ -2723,7 +2769,7 @@ public Response getMakeDataCountCitations(@PathParam("id") String idSupplied) { Dataset dataset = findDatasetOrDie(idSupplied); JsonArrayBuilder datasetsCitations = Json.createArrayBuilder(); List externalCitations = datasetExternalCitationsService.getDatasetExternalCitationsByDataset(dataset); - for (DatasetExternalCitations citation : externalCitations ){ + for (DatasetExternalCitations citation : externalCitations) { JsonObjectBuilder candidateObj = Json.createObjectBuilder(); /** * In the future we can imagine storing and presenting more @@ -2734,9 +2780,9 @@ public Response getMakeDataCountCitations(@PathParam("id") String idSupplied) { */ candidateObj.add("citationUrl", citation.getCitedByUrl()); datasetsCitations.add(candidateObj); - } - return ok(datasetsCitations); - + } + return ok(datasetsCitations); + } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -2752,20 +2798,20 @@ public Response getMakeDataCountMetricCurrentMonth(@PathParam("id") String idSup @GET @Path("{identifier}/storagesize") - public Response getStorageSize(@PathParam("identifier") String dvIdtf, @QueryParam("includeCached") boolean includeCached, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + public Response getStorageSize(@PathParam("identifier") String dvIdtf, @QueryParam("includeCached") boolean includeCached, + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.storage"), - execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), includeCached,GetDatasetStorageSizeCommand.Mode.STORAGE, null))))); + execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), includeCached, GetDatasetStorageSizeCommand.Mode.STORAGE, null))))); } @GET @Path("{identifier}/versions/{versionId}/downloadsize") - public Response getDownloadSize(@PathParam("identifier") String dvIdtf, @PathParam("versionId") String version, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + public Response getDownloadSize(@PathParam("identifier") String dvIdtf, @PathParam("versionId") String version, + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.download"), - execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), false, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, getDatasetVersionOrDie(req, version , findDatasetOrDie(dvIdtf), uriInfo, headers)))))); + execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), false, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers)))))); } @GET @@ -2889,7 +2935,7 @@ public Response getFileStore(@PathParam("identifier") String dvIdtf, } catch (WrappedResponse ex) { return error(Response.Status.NOT_FOUND, "No such dataset"); } - + return response(req -> ok(dataset.getEffectiveStorageDriverId())); } @@ -2908,10 +2954,10 @@ public Response setFileStore(@PathParam("identifier") String dvIdtf, } if (!user.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Superusers only."); - } - - Dataset dataset; - + } + + Dataset dataset; + try { dataset = findDatasetOrDie(dvIdtf); } catch (WrappedResponse ex) { @@ -2926,15 +2972,15 @@ public Response setFileStore(@PathParam("identifier") String dvIdtf, return ok("Storage driver set to: " + store.getKey() + "/" + store.getValue()); } } - return error(Response.Status.BAD_REQUEST, - "No Storage Driver found for : " + storageDriverLabel); + return error(Response.Status.BAD_REQUEST, + "No Storage Driver found for : " + storageDriverLabel); } @DELETE @Path("{identifier}/storageDriver") public Response resetFileStore(@PathParam("identifier") String dvIdtf, @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + // Superuser-only: AuthenticatedUser user; try { @@ -2944,10 +2990,10 @@ public Response resetFileStore(@PathParam("identifier") String dvIdtf, } if (!user.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Superusers only."); - } - - Dataset dataset; - + } + + Dataset dataset; + try { dataset = findDatasetOrDie(dvIdtf); } catch (WrappedResponse ex) { @@ -2956,14 +3002,14 @@ public Response resetFileStore(@PathParam("identifier") String dvIdtf, dataset.setStorageDriverId(null); datasetService.merge(dataset); - return ok("Storage reset to default: " + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + return ok("Storage reset to default: " + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); } @GET @Path("{identifier}/curationLabelSet") public Response getCurationLabelSet(@PathParam("identifier") String dvIdtf, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + try { AuthenticatedUser user = findAuthenticatedUserOrDie(); if (!user.isSuperuser()) { @@ -2972,24 +3018,24 @@ public Response getCurationLabelSet(@PathParam("identifier") String dvIdtf, } catch (WrappedResponse wr) { return wr.getResponse(); } - - Dataset dataset; - + + Dataset dataset; + try { dataset = findDatasetOrDie(dvIdtf); } catch (WrappedResponse ex) { return ex.getResponse(); } - + return response(req -> ok(dataset.getEffectiveCurationLabelSetName())); } - + @PUT @Path("{identifier}/curationLabelSet") public Response setCurationLabelSet(@PathParam("identifier") String dvIdtf, @QueryParam("name") String curationLabelSet, @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + // Superuser-only: AuthenticatedUser user; try { @@ -3000,9 +3046,9 @@ public Response setCurationLabelSet(@PathParam("identifier") String dvIdtf, if (!user.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Superusers only."); } - - Dataset dataset; - + + Dataset dataset; + try { dataset = findDatasetOrDie(dvIdtf); } catch (WrappedResponse ex) { @@ -3024,12 +3070,12 @@ public Response setCurationLabelSet(@PathParam("identifier") String dvIdtf, return error(Response.Status.BAD_REQUEST, "No Such Curation Label Set"); } - + @DELETE @Path("{identifier}/curationLabelSet") public Response resetCurationLabelSet(@PathParam("identifier") String dvIdtf, @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + // Superuser-only: AuthenticatedUser user; try { @@ -3040,15 +3086,15 @@ public Response resetCurationLabelSet(@PathParam("identifier") String dvIdtf, if (!user.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Superusers only."); } - - Dataset dataset; - + + Dataset dataset; + try { dataset = findDatasetOrDie(dvIdtf); } catch (WrappedResponse ex) { return ex.getResponse(); } - + dataset.setCurationLabelSetName(SystemConfig.DEFAULTCURATIONLABELSET); datasetService.merge(dataset); return ok("Curation Label Set reset to default: " + SystemConfig.DEFAULTCURATIONLABELSET); @@ -3057,16 +3103,16 @@ public Response resetCurationLabelSet(@PathParam("identifier") String dvIdtf, @GET @Path("{identifier}/allowedCurationLabels") public Response getAllowedCurationLabels(@PathParam("identifier") String dvIdtf, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { AuthenticatedUser user = null; try { user = findAuthenticatedUserOrDie(); } catch (WrappedResponse wr) { return wr.getResponse(); } - - Dataset dataset; - + + Dataset dataset; + try { dataset = findDatasetOrDie(dvIdtf); } catch (WrappedResponse ex) { @@ -3079,7 +3125,7 @@ public Response getAllowedCurationLabels(@PathParam("identifier") String dvIdtf, return error(Response.Status.FORBIDDEN, "You are not permitted to view the allowed curation labels for this dataset."); } } - + @GET @Path("{identifier}/timestamps") @Produces(MediaType.APPLICATION_JSON) @@ -3109,6 +3155,7 @@ public Response getTimestamps(@PathParam("identifier") String id) { if (dataset.getLastExportTime() != null) { timestamps.add("lastMetadataExportTime", formatter.format(dataset.getLastExportTime().toInstant().atZone(ZoneId.systemDefault()))); + } if (dataset.getMostRecentMajorVersionReleaseDate() != null) { @@ -3120,11 +3167,11 @@ public Response getTimestamps(@PathParam("identifier") String id) { timestamps.add("hasStaleIndex", (dataset.getModificationTime() != null && (dataset.getIndexTime() == null || (dataset.getIndexTime().compareTo(dataset.getModificationTime()) <= 0))) ? true - : false); + : false); timestamps.add("hasStalePermissionIndex", (dataset.getPermissionModificationTime() != null && (dataset.getIndexTime() == null || (dataset.getIndexTime().compareTo(dataset.getModificationTime()) <= 0))) ? true - : false); + : false); } // More detail if you can see a draft if (canSeeDraft) { @@ -3153,6 +3200,129 @@ public Response getTimestamps(@PathParam("identifier") String id) { } + @POST + @Path("{id}/addglobusFiles") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, + @FormDataParam("jsonData") String jsonData, + @Context UriInfo uriInfo, + @Context HttpHeaders headers + ) throws IOException, ExecutionException, InterruptedException { + + logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + AuthenticatedUser authUser; + try { + authUser = findAuthenticatedUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + //------------------------------------ + // (2b) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + + String lockInfoMessage = "Globus Upload API started "; + DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload, + (authUser).getId(), lockInfoMessage); + if (lock != null) { + dataset.addLock(lock); + } else { + logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + } + + + ApiToken token = authSvc.findApiTokenByUser(authUser); + + if(uriInfo != null) { + logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + uriInfo.getRequestUri().toString()); + } + + + String requestUrl = headers.getRequestHeader("origin").get(0); + + if(requestUrl.contains("localhost")){ + requestUrl = "http://localhost:8080"; + } + + // Async Call + globusService.globusUpload(jsonData, token, dataset, requestUrl, authUser); + + return ok("Async call to Globus Upload started "); + + } + + @POST + @Path("{id}/deleteglobusRule") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response deleteglobusRule(@PathParam("id") String datasetId,@FormDataParam("jsonData") String jsonData + ) throws IOException, ExecutionException, InterruptedException { + + + logger.info(" ==== (api deleteglobusRule) jsonData ====== " + jsonData); + + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + // Async Call + globusService.globusDownload(jsonData, dataset, authUser); + + return ok("Async call to Globus Download started"); + + } + + /** * Add multiple Files to an existing Dataset * @@ -3192,6 +3362,9 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, return wr.getResponse(); } + dataset.getLocks().forEach(dl -> { + logger.info(dl.toString()); + }); //------------------------------------ // (2a) Make sure dataset does not have package file @@ -3221,10 +3394,10 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, return addFileHelper.addFiles(jsonData, dataset, authUser); } - - /** + + /** * API to find curation assignments and statuses - * + * * @return * @throws WrappedResponse */ @@ -3282,4 +3455,130 @@ public Response getCurationStates() throws WrappedResponse { csvSB.append("\n"); return ok(csvSB.toString(), MediaType.valueOf(FileUtil.MIME_TYPE_CSV), "datasets.status.csv"); } + + // APIs to manage archival status + + @GET + @Produces(MediaType.APPLICATION_JSON) + @Path("/{id}/{version}/archivalStatus") + public Response getDatasetVersionArchivalStatus(@PathParam("id") String datasetId, + @PathParam("version") String versionNumber, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + DataverseRequest req = createDataverseRequest(au); + DatasetVersion dsv = getDatasetVersionOrDie(req, versionNumber, findDatasetOrDie(datasetId), uriInfo, + headers); + + if (dsv.getArchivalCopyLocation() == null) { + return error(Status.NO_CONTENT, "This dataset version has not been archived"); + } else { + JsonObject status = JsonUtil.getJsonObject(dsv.getArchivalCopyLocation()); + return ok(status); + } + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + + @PUT + @Consumes(MediaType.APPLICATION_JSON) + @Path("/{id}/{version}/archivalStatus") + public Response setDatasetVersionArchivalStatus(@PathParam("id") String datasetId, + @PathParam("version") String versionNumber, String newStatus, @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + + logger.fine(newStatus); + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + + //Verify we have valid json after removing any HTML tags (the status gets displayed in the UI, so we want plain text). + JsonObject update= JsonUtil.getJsonObject(MarkupChecker.stripAllTags(newStatus)); + + if (update.containsKey(DatasetVersion.ARCHIVAL_STATUS) && update.containsKey(DatasetVersion.ARCHIVAL_STATUS_MESSAGE)) { + String status = update.getString(DatasetVersion.ARCHIVAL_STATUS); + if (status.equals(DatasetVersion.ARCHIVAL_STATUS_PENDING) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE) + || status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { + + DataverseRequest req = createDataverseRequest(au); + DatasetVersion dsv = getDatasetVersionOrDie(req, versionNumber, findDatasetOrDie(datasetId), + uriInfo, headers); + + if (dsv == null) { + return error(Status.NOT_FOUND, "Dataset version not found"); + } + if (isSingleVersionArchiving()) { + for (DatasetVersion version : dsv.getDataset().getVersions()) { + if ((!dsv.equals(version)) && (version.getArchivalCopyLocation() != null)) { + return error(Status.CONFLICT, "Dataset already archived."); + } + } + } + + dsv.setArchivalCopyLocation(JsonUtil.prettyPrint(update)); + dsv = datasetversionService.merge(dsv); + logger.fine("status now: " + dsv.getArchivalCopyLocationStatus()); + logger.fine("message now: " + dsv.getArchivalCopyLocationMessage()); + + return ok("Status updated"); + } + } + } catch (WrappedResponse wr) { + return wr.getResponse(); + } catch (JsonException| IllegalStateException ex) { + return error(Status.BAD_REQUEST, "Unable to parse provided JSON"); + } + return error(Status.BAD_REQUEST, "Unacceptable status format"); + } + + @DELETE + @Produces(MediaType.APPLICATION_JSON) + @Path("/{id}/{version}/archivalStatus") + public Response deleteDatasetVersionArchivalStatus(@PathParam("id") String datasetId, + @PathParam("version") String versionNumber, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + + DataverseRequest req = createDataverseRequest(au); + DatasetVersion dsv = getDatasetVersionOrDie(req, versionNumber, findDatasetOrDie(datasetId), uriInfo, + headers); + if (dsv == null) { + return error(Status.NOT_FOUND, "Dataset version not found"); + } + dsv.setArchivalCopyLocation(null); + dsv = datasetversionService.merge(dsv); + + return ok("Status deleted"); + + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + + private boolean isSingleVersionArchiving() { + String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); + if (className != null) { + Class clazz; + try { + clazz = Class.forName(className).asSubclass(AbstractSubmitToArchiveCommand.class); + return ArchiverUtil.onlySingleVersionArchiving(clazz, settingsService); + } catch (ClassNotFoundException e) { + logger.warning(":ArchiverClassName does not refer to a known Archiver"); + } catch (ClassCastException cce) { + logger.warning(":ArchiverClassName does not refer to an Archiver class"); + } + } + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index d15b0f1c48f..90130cb3944 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -7,17 +7,17 @@ import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseFacet; import edu.harvard.iq.dataverse.DataverseContact; +import edu.harvard.iq.dataverse.DataverseMetadataBlockFacet; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.api.datadeposit.SwordServiceBean; +import edu.harvard.iq.dataverse.api.dto.DataverseMetadataBlockFacetDTO; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; import edu.harvard.iq.dataverse.GuestbookServiceBean; import edu.harvard.iq.dataverse.MetadataBlock; import edu.harvard.iq.dataverse.RoleAssignment; -import static edu.harvard.iq.dataverse.api.AbstractApiBean.error; import edu.harvard.iq.dataverse.api.dto.ExplicitGroupDTO; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.api.dto.RoleDTO; @@ -41,6 +41,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseLinkingDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteExplicitGroupCommand; +import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetRootCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseStorageSizeCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetExplicitGroupCommand; @@ -49,6 +50,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.ListDataverseContentCommand; import edu.harvard.iq.dataverse.engine.command.impl.ListExplicitGroupsCommand; import edu.harvard.iq.dataverse.engine.command.impl.ListFacetsCommand; +import edu.harvard.iq.dataverse.engine.command.impl.ListMetadataBlockFacetsCommand; import edu.harvard.iq.dataverse.engine.command.impl.ListMetadataBlocksCommand; import edu.harvard.iq.dataverse.engine.command.impl.ListRoleAssignments; import edu.harvard.iq.dataverse.engine.command.impl.ListRolesCommand; @@ -62,6 +64,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseDefaultContributorRoleCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseMetadataBlocksCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateExplicitGroupCommand; +import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetsCommand; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.ConstraintViolationUtil; @@ -69,7 +72,6 @@ import static edu.harvard.iq.dataverse.util.StringUtil.nonEmpty; import edu.harvard.iq.dataverse.util.json.JSONLDUtil; -import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.util.json.JsonParseException; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.brief; import java.io.StringReader; @@ -91,7 +93,6 @@ import javax.json.JsonValue; import javax.json.JsonValue.ValueType; import javax.json.stream.JsonParsingException; -import javax.validation.ConstraintViolation; import javax.validation.ConstraintViolationException; import javax.ws.rs.BadRequestException; import javax.ws.rs.Consumes; @@ -114,9 +115,9 @@ import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; -import java.util.HashMap; import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.WebApplicationException; import javax.ws.rs.core.Context; @@ -713,6 +714,78 @@ public Response setFacets(@PathParam("identifier") String dvIdtf, String facetId } } + @GET + @Path("{identifier}/metadatablockfacets") + @Produces(MediaType.APPLICATION_JSON) + public Response listMetadataBlockFacets(@PathParam("identifier") String dvIdtf) { + try { + User u = findUserOrDie(); + DataverseRequest request = createDataverseRequest(u); + Dataverse dataverse = findDataverseOrDie(dvIdtf); + List metadataBlockFacets = Optional.ofNullable(execCommand(new ListMetadataBlockFacetsCommand(request, dataverse))).orElse(Collections.emptyList()); + List metadataBlocksDTOs = metadataBlockFacets.stream() + .map(item -> new DataverseMetadataBlockFacetDTO.MetadataBlockDTO(item.getMetadataBlock().getName(), item.getMetadataBlock().getLocaleDisplayFacet())) + .collect(Collectors.toList()); + DataverseMetadataBlockFacetDTO response = new DataverseMetadataBlockFacetDTO(dataverse.getId(), dataverse.getAlias(), dataverse.isMetadataBlockFacetRoot(), metadataBlocksDTOs); + return Response.ok(response).build(); + } catch (WrappedResponse e) { + return e.getResponse(); + } + } + + @POST + @Path("{identifier}/metadatablockfacets") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response setMetadataBlockFacets(@PathParam("identifier") String dvIdtf, List metadataBlockNames) { + try { + Dataverse dataverse = findDataverseOrDie(dvIdtf); + + if(!dataverse.isMetadataBlockFacetRoot()) { + return badRequest(String.format("Dataverse: %s must have metadata block facet root set to true", dvIdtf)); + } + + List metadataBlockFacets = new LinkedList<>(); + for(String metadataBlockName: metadataBlockNames) { + MetadataBlock metadataBlock = findMetadataBlock(metadataBlockName); + if (metadataBlock == null) { + return badRequest(String.format("Invalid metadata block name: %s", metadataBlockName)); + } + + DataverseMetadataBlockFacet metadataBlockFacet = new DataverseMetadataBlockFacet(); + metadataBlockFacet.setDataverse(dataverse); + metadataBlockFacet.setMetadataBlock(metadataBlock); + metadataBlockFacets.add(metadataBlockFacet); + } + + execCommand(new UpdateMetadataBlockFacetsCommand(createDataverseRequest(findUserOrDie()), dataverse, metadataBlockFacets)); + return ok(String.format("Metadata block facets updated. DataverseId: %s blocks: %s", dvIdtf, metadataBlockNames)); + + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + + @POST + @Path("{identifier}/metadatablockfacets/isRoot") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response updateMetadataBlockFacetsRoot(@PathParam("identifier") String dvIdtf, String body) { + try { + final boolean blockFacetsRoot = parseBooleanOrDie(body); + Dataverse dataverse = findDataverseOrDie(dvIdtf); + if(dataverse.isMetadataBlockFacetRoot() == blockFacetsRoot) { + return ok(String.format("No update needed, dataverse already consistent with new value. DataverseId: %s blockFacetsRoot: %s", dvIdtf, blockFacetsRoot)); + } + + execCommand(new UpdateMetadataBlockFacetRootCommand(createDataverseRequest(findUserOrDie()), dataverse, blockFacetsRoot)); + return ok(String.format("Metadata block facets root updated. DataverseId: %s blockFacetsRoot: %s", dvIdtf, blockFacetsRoot)); + + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + // FIXME: This listContent method is way too optimistic, always returning "ok" and never "error". // TODO: Investigate why there was a change in the timeframe of when pull request #4350 was merged // (2438-4295-dois-for-files branch) such that a contributor API token no longer allows this method diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java index 07215cb919e..c9eb3638b90 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java @@ -11,6 +11,8 @@ import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.GuestbookResponse; import java.util.List; +import java.util.logging.Logger; + import edu.harvard.iq.dataverse.dataaccess.OptionalAccessService; import javax.faces.context.FacesContext; import javax.ws.rs.core.HttpHeaders; @@ -22,6 +24,7 @@ */ public class DownloadInstance { + private static final Logger logger = Logger.getLogger(DownloadInstance.class.getCanonicalName()); /* private ByteArrayOutputStream outStream = null; @@ -122,6 +125,7 @@ public Boolean checkIfServiceSupportedAndSetConverter(String serviceArg, String for (OptionalAccessService dataService : servicesAvailable) { if (dataService != null) { + logger.fine("Checking service: " + dataService.getServiceName()); if (serviceArg.equals("variables")) { // Special case for the subsetting parameter (variables=): if ("subset".equals(dataService.getServiceName())) { @@ -149,6 +153,7 @@ public Boolean checkIfServiceSupportedAndSetConverter(String serviceArg, String return true; } String argValuePair = serviceArg + "=" + serviceArgValue; + logger.fine("Comparing: " + argValuePair + " and " + dataService.getServiceArguments()); if (argValuePair.startsWith(dataService.getServiceArguments())) { conversionParam = serviceArg; conversionParamValue = serviceArgValue; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index 84a31959286..01f627ea23b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -27,9 +27,12 @@ import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.CreateGuestbookResponseCommand; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; + import java.io.File; import java.io.FileInputStream; import java.net.URI; @@ -59,6 +62,10 @@ public class DownloadInstanceWriter implements MessageBodyWriter clazz, Type type, Annotation[] throw new NotFoundException("Datafile " + dataFile.getId() + ": Failed to locate and/or open physical file."); } + + boolean redirectSupported = false; + String auxiliaryTag = null; + String auxiliaryType = null; + String auxiliaryFileName = null; // Before we do anything else, check if this download can be handled // by a redirect to remote storage (only supported on S3, as of 5.4): - if (storageIO instanceof S3AccessIO && ((S3AccessIO) storageIO).downloadRedirectEnabled()) { + if (storageIO.downloadRedirectEnabled()) { // Even if the above is true, there are a few cases where a // redirect is not applicable. @@ -101,10 +113,8 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // for a saved original; but CANNOT if it is a column subsetting // request (must be streamed in real time locally); or a format // conversion that hasn't been cached and saved on S3 yet. - boolean redirectSupported = true; - String auxiliaryTag = null; - String auxiliaryType = null; - String auxiliaryFileName = null; + redirectSupported = true; + if ("imageThumb".equals(di.getConversionParam())) { @@ -112,7 +122,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] int requestedSize = 0; if (!"".equals(di.getConversionParamValue())) { try { - requestedSize = new Integer(di.getConversionParamValue()); + requestedSize = Integer.parseInt(di.getConversionParamValue()); } catch (java.lang.NumberFormatException ex) { // it's ok, the default size will be used. } @@ -120,7 +130,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] auxiliaryTag = ImageThumbConverter.THUMBNAIL_SUFFIX + (requestedSize > 0 ? requestedSize : ImageThumbConverter.DEFAULT_THUMBNAIL_SIZE); - if (isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { + if (storageIO.downloadRedirectEnabled(auxiliaryTag) && isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { auxiliaryType = ImageThumbConverter.THUMBNAIL_MIME_TYPE; String fileName = storageIO.getFileName(); if (fileName != null) { @@ -139,7 +149,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] auxiliaryTag = auxiliaryTag + "_" + auxVersion; } - if (isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { + if (storageIO.downloadRedirectEnabled(auxiliaryTag) && isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { String fileExtension = getFileExtension(di.getAuxiliaryFile()); auxiliaryFileName = storageIO.getFileName() + "." + auxiliaryTag + fileExtension; auxiliaryType = di.getAuxiliaryFile().getContentType(); @@ -162,7 +172,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // it has been cached already. auxiliaryTag = di.getConversionParamValue(); - if (isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { + if (storageIO.downloadRedirectEnabled(auxiliaryTag) && isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { auxiliaryType = di.getServiceFormatType(di.getConversionParam(), auxiliaryTag); auxiliaryFileName = FileUtil.replaceExtension(storageIO.getFileName(), auxiliaryTag); } else { @@ -177,40 +187,52 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] redirectSupported = false; } } - - if (redirectSupported) { - // definitely close the (potentially still open) input stream, - // since we are not going to use it. The S3 documentation in particular - // emphasizes that it is very important not to leave these - // lying around un-closed, since they are going to fill - // up the S3 connection pool! - storageIO.closeInputStream(); - // [attempt to] redirect: - String redirect_url_str; - try { - redirect_url_str = ((S3AccessIO) storageIO).generateTemporaryS3Url(auxiliaryTag, auxiliaryType, auxiliaryFileName); - } catch (IOException ioex) { - redirect_url_str = null; - } - - if (redirect_url_str == null) { - throw new ServiceUnavailableException(); + } + String redirect_url_str=null; + + if (redirectSupported) { + // definitely close the (potentially still open) input stream, + // since we are not going to use it. The S3 documentation in particular + // emphasizes that it is very important not to leave these + // lying around un-closed, since they are going to fill + // up the S3 connection pool! + storageIO.closeInputStream(); + // [attempt to] redirect: + try { + redirect_url_str = storageIO.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); + } catch (IOException ioex) { + logger.warning("Unable to generate downloadURL for " + dataFile.getId() + ": " + auxiliaryTag); + //Setting null will let us try to get the file/aux file w/o redirecting + redirect_url_str = null; + } + } + + if (systemConfig.isGlobusFileDownload() && systemConfig.getGlobusStoresList() + .contains(DataAccess.getStorageDriverFromIdentifier(dataFile.getStorageIdentifier()))) { + if (di.getConversionParam() != null) { + if (di.getConversionParam().equals("format")) { + + if ("GlobusTransfer".equals(di.getConversionParamValue())) { + redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, dataFile); + } } + } + if (redirect_url_str!=null) { - logger.fine("Data Access API: direct S3 url: " + redirect_url_str); + logger.fine("Data Access API: redirect url: " + redirect_url_str); URI redirect_uri; try { redirect_uri = new URI(redirect_url_str); } catch (URISyntaxException ex) { - logger.info("Data Access API: failed to create S3 redirect url (" + redirect_url_str + ")"); + logger.info("Data Access API: failed to create redirect url (" + redirect_url_str + ")"); redirect_uri = null; } if (redirect_uri != null) { // increment the download count, if necessary: if (di.getGbr() != null && !(isThumbnailDownload(di) || isPreprocessedMetadataDownload(di))) { try { - logger.fine("writing guestbook response, for an S3 download redirect."); + logger.fine("writing guestbook response, for a download redirect."); Command cmd = new CreateGuestbookResponseCommand(di.getDataverseRequestService().getDataverseRequest(), di.getGbr(), di.getGbr().getDataFile().getOwner()); di.getCommand().submit(cmd); MakeDataCountEntry entry = new MakeDataCountEntry(di.getRequestUriInfo(), di.getRequestHttpHeaders(), di.getDataverseRequestService(), di.getGbr().getDataFile()); @@ -221,7 +243,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // finally, issue the redirect: Response response = Response.seeOther(redirect_uri).build(); - logger.fine("Issuing redirect to the file location on S3."); + logger.fine("Issuing redirect to the file location."); throw new RedirectionException(response); } throw new ServiceUnavailableException(); @@ -434,6 +456,9 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] offset = ranges.get(0).getStart(); leftToRead = rangeContentSize; + httpHeaders.add("Accept-Ranges", "bytes"); + httpHeaders.add("Content-Range", "bytes "+offset+"-"+(offset+rangeContentSize-1)+"/"+contentSize); + } } else { // Content size unknown, must be a dynamically diff --git a/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java b/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java index d58622f9874..82938fd3687 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java @@ -35,7 +35,6 @@ import javax.inject.Inject; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; -import javax.servlet.http.HttpServletRequest; import javax.ws.rs.core.Response; import javax.ws.rs.core.Context; import javax.ws.rs.Path; @@ -92,9 +91,6 @@ public class EditDDI extends AbstractApiBean { private List filesToBeDeleted = new ArrayList<>(); - @Context - protected HttpServletRequest httpRequest; - private VariableMetadataUtil variableMetadataUtil; @@ -193,7 +189,7 @@ private boolean createNewDraftVersion(ArrayList neededToUpdate Command cmd; try { - DataverseRequest dr = new DataverseRequest(apiTokenUser, httpRequest); + DataverseRequest dr = createDataverseRequest(apiTokenUser); cmd = new UpdateDatasetVersionCommand(dataset, dr, fm); ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); dataset = commandEngine.submit(cmd); @@ -335,7 +331,7 @@ private boolean updateDraftVersion(ArrayList neededToUpdateVM, } Command cmd; try { - DataverseRequest dr = new DataverseRequest(apiTokenUser, httpRequest); + DataverseRequest dr = createDataverseRequest(apiTokenUser); cmd = new UpdateDatasetVersionCommand(dataset, dr); ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); commandEngine.submit(cmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 78847119ce4..9dc0c3be524 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -12,6 +12,7 @@ import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.TermsOfUseAndAccessValidator; import edu.harvard.iq.dataverse.UserNotificationServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; @@ -21,6 +22,7 @@ import edu.harvard.iq.dataverse.datasetutility.OptionalFileParams; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.impl.GetDataFileCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDraftFileMetadataIfAvailableCommand; import edu.harvard.iq.dataverse.engine.command.impl.RedetectFileTypeCommand; @@ -146,6 +148,12 @@ public Response restrictFileInDataset(@PathParam("id") String fileToRestrictId, // update the dataset try { engineSvc.submit(new UpdateDatasetVersionCommand(dataFile.getOwner(), dataverseRequest)); + } catch (IllegalCommandException ex) { + //special case where terms of use are out of compliance + if (!TermsOfUseAndAccessValidator.isTOUAValid(dataFile.getOwner().getLatestVersion().getTermsOfUseAndAccess(), null)) { + return conflict(BundleUtil.getStringFromBundle("dataset.message.toua.invalid")); + } + return error(BAD_REQUEST, "Problem saving datafile " + dataFile.getDisplayName() + ": " + ex.getLocalizedMessage()); } catch (CommandException ex) { return error(BAD_REQUEST, "Problem saving datafile " + dataFile.getDisplayName() + ": " + ex.getLocalizedMessage()); } @@ -232,7 +240,7 @@ public Response replaceFileInDataset( } } else { return error(BAD_REQUEST, - "You must upload a file or provide a storageidentifier, filename, and mimetype."); + "You must upload a file or provide a valid storageidentifier, filename, and mimetype."); } } else { newFilename = contentDispositionHeader.getFileName(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/LDNInbox.java b/src/main/java/edu/harvard/iq/dataverse/api/LDNInbox.java new file mode 100644 index 00000000000..3912b9102e2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/LDNInbox.java @@ -0,0 +1,195 @@ +package edu.harvard.iq.dataverse.api; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetServiceBean; +import edu.harvard.iq.dataverse.DataverseRoleServiceBean; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.MailServiceBean; +import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; +import edu.harvard.iq.dataverse.RoleAssignment; +import edu.harvard.iq.dataverse.UserNotification; +import edu.harvard.iq.dataverse.UserNotificationServiceBean; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.json.JSONLDUtil; +import edu.harvard.iq.dataverse.util.json.JsonLDNamespace; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; + +import java.util.Date; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.io.StringWriter; +import java.sql.Timestamp; +import java.util.logging.Logger; + +import javax.ejb.EJB; +import javax.json.Json; +import javax.json.JsonObject; +import javax.json.JsonValue; +import javax.json.JsonWriter; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.BadRequestException; +import javax.ws.rs.ServiceUnavailableException; +import javax.ws.rs.Consumes; +import javax.ws.rs.ForbiddenException; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.Response; + +@Path("inbox") +public class LDNInbox extends AbstractApiBean { + + private static final Logger logger = Logger.getLogger(LDNInbox.class.getName()); + + @EJB + SettingsServiceBean settingsService; + + @EJB + DatasetServiceBean datasetService; + + @EJB + MailServiceBean mailService; + + @EJB + UserNotificationServiceBean userNotificationService; + + @EJB + DataverseRoleServiceBean roleService; + + @EJB + RoleAssigneeServiceBean roleAssigneeService; + @Context + protected HttpServletRequest httpRequest; + + @POST + @Path("/") + @Consumes("application/ld+json, application/json-ld") + public Response acceptMessage(String body) { + IpAddress origin = new DataverseRequest(null, httpRequest).getSourceAddress(); + String whitelist = settingsService.get(SettingsServiceBean.Key.LDNMessageHosts.toString(), ""); + // Only do something if we listen to this host + if (whitelist.equals("*") || whitelist.contains(origin.toString())) { + String citingPID = null; + String citingType = null; + boolean sent = false; + + JsonObject jsonld = null; + jsonld = JSONLDUtil.decontextualizeJsonLD(body); + if (jsonld == null) { + // Kludge - something about the coar notify URL causes a + // LOADING_REMOTE_CONTEXT_FAILED error in the titanium library - so replace it + // and try with a local copy + body = body.replace("\"https://purl.org/coar/notify\"", + "{\n" + " \"@vocab\": \"http://purl.org/coar/notify_vocabulary/\",\n" + + " \"ietf\": \"http://www.iana.org/assignments/relation/\",\n" + + " \"coar-notify\": \"http://purl.org/coar/notify_vocabulary/\",\n" + + " \"sorg\": \"http://schema.org/\",\n" + + " \"ReviewAction\": \"coar-notify:ReviewAction\",\n" + + " \"EndorsementAction\": \"coar-notify:EndorsementAction\",\n" + + " \"IngestAction\": \"coar-notify:IngestAction\",\n" + + " \"ietf:cite-as\": {\n" + " \"@type\": \"@id\"\n" + + " }}"); + jsonld = JSONLDUtil.decontextualizeJsonLD(body); + } + if (jsonld == null) { + throw new BadRequestException("Could not parse message to find acceptable citation link to a dataset."); + } + String relationship = "isRelatedTo"; + String name = null; + JsonLDNamespace activityStreams = JsonLDNamespace.defineNamespace("as", + "https://www.w3.org/ns/activitystreams#"); + JsonLDNamespace ietf = JsonLDNamespace.defineNamespace("ietf", "http://www.iana.org/assignments/relation/"); + String objectKey = new JsonLDTerm(activityStreams, "object").getUrl(); + if (jsonld.containsKey(objectKey)) { + JsonObject msgObject = jsonld.getJsonObject(objectKey); + + citingPID = msgObject.getJsonObject(new JsonLDTerm(ietf, "cite-as").getUrl()).getString("@id"); + logger.fine("Citing PID: " + citingPID); + if (msgObject.containsKey("@type")) { + citingType = msgObject.getString("@type"); + if (citingType.startsWith(JsonLDNamespace.schema.getUrl())) { + citingType = citingType.replace(JsonLDNamespace.schema.getUrl(), ""); + } + if (msgObject.containsKey(JsonLDTerm.schemaOrg("name").getUrl())) { + name = msgObject.getString(JsonLDTerm.schemaOrg("name").getUrl()); + } + logger.fine("Citing Type: " + citingType); + String contextKey = new JsonLDTerm(activityStreams, "context").getUrl(); + + if (jsonld.containsKey(contextKey)) { + JsonObject context = jsonld.getJsonObject(contextKey); + for (Map.Entry entry : context.entrySet()) { + + relationship = entry.getKey().replace("_:", ""); + // Assuming only one for now - should check for array and loop + JsonObject citedResource = (JsonObject) entry.getValue(); + String pid = citedResource.getJsonObject(new JsonLDTerm(ietf, "cite-as").getUrl()) + .getString("@id"); + if (citedResource.getString("@type").equals(JsonLDTerm.schemaOrg("Dataset").getUrl())) { + logger.fine("Raw PID: " + pid); + if (pid.startsWith(GlobalId.DOI_RESOLVER_URL)) { + pid = pid.replace(GlobalId.DOI_RESOLVER_URL, GlobalId.DOI_PROTOCOL + ":"); + } else if (pid.startsWith(GlobalId.HDL_RESOLVER_URL)) { + pid = pid.replace(GlobalId.HDL_RESOLVER_URL, GlobalId.HDL_PROTOCOL + ":"); + } + logger.fine("Protocol PID: " + pid); + Optional id = GlobalId.parse(pid); + Dataset dataset = datasetSvc.findByGlobalId(pid); + if (dataset != null) { + JsonObject citingResource = Json.createObjectBuilder().add("@id", citingPID) + .add("@type", citingType).add("relationship", relationship) + .add("name", name).build(); + StringWriter sw = new StringWriter(128); + try (JsonWriter jw = Json.createWriter(sw)) { + jw.write(citingResource); + } + String jsonstring = sw.toString(); + Set ras = roleService.rolesAssignments(dataset); + + roleService.rolesAssignments(dataset).stream() + .filter(ra -> ra.getRole().permissions() + .contains(Permission.PublishDataset)) + .flatMap( + ra -> roleAssigneeService + .getExplicitUsers(roleAssigneeService + .getRoleAssignee(ra.getAssigneeIdentifier())) + .stream()) + .distinct() // prevent double-send + .forEach(au -> { + + if (au.isSuperuser()) { + userNotificationService.sendNotification(au, + new Timestamp(new Date().getTime()), + UserNotification.Type.DATASETMENTIONED, dataset.getId(), + null, null, true, jsonstring); + + } + }); + sent = true; + } + } + } + } + } + } + + if (!sent) { + if (citingPID == null || citingType == null) { + throw new BadRequestException( + "Could not parse message to find acceptable citation link to a dataset."); + } else { + throw new ServiceUnavailableException( + "Unable to process message. Please contact the administrators."); + } + } + } else { + logger.info("Ignoring message from IP address: " + origin.toString()); + throw new ForbiddenException("Inbox does not acept messages from this address"); + } + return ok("Message Received"); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java index 5084b5267a4..b0d82b69d1b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java @@ -5,19 +5,25 @@ */ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; + +import java.io.IOException; +import java.util.concurrent.Future; import java.util.logging.Logger; import javax.ejb.EJB; -import javax.ws.rs.GET; -import javax.ws.rs.Path; -import javax.ws.rs.Produces; +import javax.json.Json; +import javax.json.JsonArrayBuilder; +import javax.json.JsonObjectBuilder; +import javax.ws.rs.*; import javax.ws.rs.core.Response; import javax.ws.rs.core.Response; -import javax.ws.rs.PathParam; -import javax.ws.rs.PUT; + +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.harvest.server.OAISetServiceBean; import edu.harvard.iq.dataverse.harvest.server.OAISet; +import org.apache.solr.client.solrj.SolrServerException; /** * @@ -59,7 +65,27 @@ public Response exportAll() { public Response reExportAll() { datasetService.reExportAllAsync(); return this.accepted(); - } + } + + @GET + @Path("{id}/reExportDataset") + public Response indexDatasetByPersistentId(@PathParam("id") String id) { + try { + Dataset dataset = findDatasetOrDie(id); + datasetService.reExportDatasetAsync(dataset); + return ok("export started"); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + + @GET + @Path("clearExportTimestamps") + public Response clearExportTimestamps() { + // only clear the timestamp in the database, cached metadata export files are not deleted + int numItemsCleared = datasetService.clearAllExportTimes(); + return ok("cleared: " + numItemsCleared); + } /** * initial attempt at triggering indexing/creation/population of a OAI set without going throught diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Users.java b/src/main/java/edu/harvard/iq/dataverse/api/Users.java index b1177531874..d3b938af960 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Users.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Users.java @@ -83,7 +83,7 @@ public Response mergeInAuthenticatedUser(@PathParam("consumedIdentifier") String return error(Response.Status.BAD_REQUEST, "Error calling ChangeUserIdentifierCommand: " + e.getLocalizedMessage()); } - return ok("All account data for " + consumedIdentifier + " has been merged into " + baseIdentifier + " ."); + return ok(String.format("All account data for %s has been merged into %s.", consumedIdentifier, baseIdentifier)); } @POST diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordServiceBean.java index 96df3ab400a..2e093dbcf36 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordServiceBean.java @@ -9,6 +9,7 @@ import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -163,7 +164,7 @@ public void setDatasetLicenseAndTermsOfUse(DatasetVersion datasetVersionToMutate terms.setDatasetVersion(datasetVersionToMutate); if (listOfLicensesProvided == null) { - License existingLicense = datasetVersionToMutate.getTermsOfUseAndAccess().getLicense(); + License existingLicense = DatasetUtil.getLicense(datasetVersionToMutate); if (existingLicense != null) { // leave the license alone but set terms of use setTermsOfUse(datasetVersionToMutate, dcterms, existingLicense); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/dto/DataverseMetadataBlockFacetDTO.java b/src/main/java/edu/harvard/iq/dataverse/api/dto/DataverseMetadataBlockFacetDTO.java new file mode 100644 index 00000000000..65b6f0ff58f --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/dto/DataverseMetadataBlockFacetDTO.java @@ -0,0 +1,56 @@ +package edu.harvard.iq.dataverse.api.dto; + +import java.util.List; + +/** + * + * @author adaybujeda + */ +public class DataverseMetadataBlockFacetDTO { + + private Long dataverseId; + private String dataverseAlias; + private boolean isMetadataBlockFacetRoot; + private List metadataBlocks; + + public DataverseMetadataBlockFacetDTO(Long dataverseId, String dataverseAlias, boolean isMetadataBlockFacetRoot, List metadataBlocks) { + this.dataverseId = dataverseId; + this.dataverseAlias = dataverseAlias; + this.isMetadataBlockFacetRoot = isMetadataBlockFacetRoot; + this.metadataBlocks = metadataBlocks; + } + + public Long getDataverseId() { + return dataverseId; + } + + public String getDataverseAlias() { + return dataverseAlias; + } + + public boolean isMetadataBlockFacetRoot() { + return isMetadataBlockFacetRoot; + } + + public List getMetadataBlocks() { + return metadataBlocks; + } + + public static class MetadataBlockDTO { + private String metadataBlockName; + private String metadataBlockFacet; + + public MetadataBlockDTO(String metadataBlockName, String metadataBlockFacet) { + this.metadataBlockName = metadataBlockName; + this.metadataBlockFacet = metadataBlockFacet; + } + + public String getMetadataBlockName() { + return metadataBlockName; + } + + public String getMetadataBlockFacet() { + return metadataBlockFacet; + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index a92e33e223e..a4e78b33a3c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -1181,7 +1181,7 @@ private void processDataAccs(XMLStreamReader xmlr, DatasetVersionDTO dvDTO) thro String noteType = xmlr.getAttributeValue(null, "type"); if (NOTE_TYPE_TERMS_OF_USE.equalsIgnoreCase(noteType) ) { if ( LEVEL_DV.equalsIgnoreCase(xmlr.getAttributeValue(null, "level"))) { - parseText(xmlr, "notes"); + dvDTO.setTermsOfUse(parseText(xmlr, "notes")); } } else if (NOTE_TYPE_TERMS_OF_ACCESS.equalsIgnoreCase(noteType) ) { if (LEVEL_DV.equalsIgnoreCase(xmlr.getAttributeValue(null, "level"))) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java index 56d62e6bf35..c651db2dfae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java @@ -348,7 +348,7 @@ private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { if (!otherIds.isEmpty()) { // We prefer doi or hdl identifiers like "doi:10.7910/DVN/1HE30F" for (String otherId : otherIds) { - if (otherId.startsWith(GlobalId.DOI_PROTOCOL) || otherId.startsWith(GlobalId.HDL_PROTOCOL) || otherId.startsWith(GlobalId.DOI_RESOLVER_URL) || otherId.startsWith(GlobalId.HDL_RESOLVER_URL)) { + if (otherId.startsWith(GlobalId.DOI_PROTOCOL) || otherId.startsWith(GlobalId.HDL_PROTOCOL) || otherId.startsWith(GlobalId.DOI_RESOLVER_URL) || otherId.startsWith(GlobalId.HDL_RESOLVER_URL) || otherId.startsWith(GlobalId.HTTP_DOI_RESOLVER_URL) || otherId.startsWith(GlobalId.HTTP_HDL_RESOLVER_URL) || otherId.startsWith(GlobalId.DXDOI_RESOLVER_URL) || otherId.startsWith(GlobalId.HTTP_DXDOI_RESOLVER_URL)) { return otherId; } } @@ -389,15 +389,17 @@ public String reassignIdentifierAsGlobalId(String identifierString, DatasetDTO d // We also recognize global identifiers formatted as global resolver URLs: - if (identifierString.startsWith(GlobalId.HDL_RESOLVER_URL)) { + if (identifierString.startsWith(GlobalId.HDL_RESOLVER_URL) || identifierString.startsWith(GlobalId.HTTP_HDL_RESOLVER_URL)) { logger.fine("Processing Handle identifier formatted as a resolver URL: "+identifierString); protocol = GlobalId.HDL_PROTOCOL; - index1 = GlobalId.HDL_RESOLVER_URL.length() - 1; + index1 = (identifierString.startsWith(GlobalId.HDL_RESOLVER_URL)) ? GlobalId.HDL_RESOLVER_URL.length() - 1 : GlobalId.HTTP_HDL_RESOLVER_URL.length() - 1; index2 = identifierString.indexOf("/", index1 + 1); - } else if (identifierString.startsWith(GlobalId.DOI_RESOLVER_URL)) { + } else if (identifierString.startsWith(GlobalId.DOI_RESOLVER_URL) || identifierString.startsWith(GlobalId.HTTP_DOI_RESOLVER_URL) || identifierString.startsWith(GlobalId.DXDOI_RESOLVER_URL) || identifierString.startsWith(GlobalId.HTTP_DXDOI_RESOLVER_URL)) { logger.fine("Processing DOI identifier formatted as a resolver URL: "+identifierString); protocol = GlobalId.DOI_PROTOCOL; - index1 = GlobalId.DOI_RESOLVER_URL.length() - 1; + identifierString = identifierString.replace(GlobalId.DXDOI_RESOLVER_URL, GlobalId.DOI_RESOLVER_URL); + identifierString = identifierString.replace(GlobalId.HTTP_DXDOI_RESOLVER_URL, GlobalId.HTTP_DOI_RESOLVER_URL); + index1 = (identifierString.startsWith(GlobalId.DOI_RESOLVER_URL)) ? GlobalId.DOI_RESOLVER_URL.length() - 1 : GlobalId.HTTP_DOI_RESOLVER_URL.length() - 1; index2 = identifierString.indexOf("/", index1 + 1); } else { logger.warning("HTTP Url in supplied as the identifier is neither a Handle nor DOI resolver: "+identifierString); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java b/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java index 07cf21934d4..aef17d1ab34 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java @@ -222,6 +222,7 @@ public JsonResponseBuilder log(Logger logger, Level level, Optional e metadata.deleteCharAt(metadata.length()-1); if (ex.isPresent()) { + ex.get().printStackTrace(); metadata.append("|"); logger.log(level, metadata.toString(), ex); if(includeStackTrace) { diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java index 142420bc7d9..5c0f3a49f76 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java @@ -496,6 +496,7 @@ public void displayNotification() { case GRANTFILEACCESS: case REJECTFILEACCESS: case DATASETCREATED: + case DATASETMENTIONED: userNotification.setTheObject(datasetService.find(userNotification.getObjectId())); break; @@ -522,6 +523,13 @@ public void displayNotification() { userNotification.setTheObject(datasetVersionService.find(userNotification.getObjectId())); break; + case GLOBUSUPLOADCOMPLETED: + case GLOBUSUPLOADCOMPLETEDWITHERRORS: + case GLOBUSDOWNLOADCOMPLETED: + case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: + userNotification.setTheObject(datasetService.find(userNotification.getObjectId())); + break; + case CHECKSUMIMPORT: userNotification.setTheObject(datasetVersionService.find(userNotification.getObjectId())); break; diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/shib/ShibServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/shib/ShibServiceBean.java index ca247d0c9c2..3e986a15689 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/shib/ShibServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/shib/ShibServiceBean.java @@ -64,6 +64,8 @@ public enum DevShibAccountType { UID_WITH_LEADING_SPACE, IDENTIFIER_WITH_LEADING_SPACE, MISSING_REQUIRED_ATTR, + ONE_AFFILIATION, + TWO_AFFILIATIONS, }; public DevShibAccountType getDevShibAccountType() { @@ -146,6 +148,14 @@ public void possiblyMutateRequestInDev(HttpServletRequest request) { ShibUtil.mutateRequestForDevConstantMissingRequiredAttributes(request); break; + case ONE_AFFILIATION: + ShibUtil.mutateRequestForDevConstantOneAffiliation(request); + break; + + case TWO_AFFILIATIONS: + ShibUtil.mutateRequestForDevConstantTwoAffiliations(request); + break; + default: logger.info("Should never reach here"); break; diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/shib/ShibUtil.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/shib/ShibUtil.java index 8d523ceae2f..f8b30710656 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/shib/ShibUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/shib/ShibUtil.java @@ -261,6 +261,28 @@ static void mutateRequestForDevConstantMissingRequiredAttributes(HttpServletRequ request.setAttribute(ShibUtil.usernameAttribute, "missing"); } + static void mutateRequestForDevConstantOneAffiliation(HttpServletRequest request) { + request.setAttribute(ShibUtil.shibIdpAttribute, "https://fake.example.com/idp/shibboleth"); + request.setAttribute(ShibUtil.uniquePersistentIdentifier, "oneAffiliation"); + request.setAttribute(ShibUtil.firstNameAttribute, "Lurneen"); + request.setAttribute(ShibUtil.lastNameAttribute, "Lumpkin"); + request.setAttribute(ShibUtil.emailAttribute, "oneAffiliaton@mailinator.com"); + request.setAttribute(ShibUtil.usernameAttribute, "oneAffiliaton"); + // Affiliation. "ou" is the suggested attribute in :ShibAffiliationAttribute. + request.setAttribute("ou", "Beer-N-Brawl"); + } + + static void mutateRequestForDevConstantTwoAffiliations(HttpServletRequest request) { + request.setAttribute(ShibUtil.shibIdpAttribute, "https://fake.example.com/idp/shibboleth"); + request.setAttribute(ShibUtil.uniquePersistentIdentifier, "twoAffiliatons"); + request.setAttribute(ShibUtil.firstNameAttribute, "Lenny"); + request.setAttribute(ShibUtil.lastNameAttribute, "Leonard"); + request.setAttribute(ShibUtil.emailAttribute, "twoAffiliatons@mailinator.com"); + request.setAttribute(ShibUtil.usernameAttribute, "twoAffiliatons"); + // Affiliation. "ou" is the suggested attribute in :ShibAffiliationAttribute. + request.setAttribute("ou", "SNPP;Stonecutters"); + } + public static Map getRandomUserStatic() { Map fakeUser = new HashMap<>(); String shortRandomString = UUID.randomUUID().toString().substring(0, 8); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index a422a825259..d046fa4661d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -22,6 +22,8 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.util.FileUtil; + import java.io.IOException; import java.util.HashMap; import java.util.Properties; @@ -42,9 +44,16 @@ public DataAccess() { }; + public static final String FILE = "file"; + public static final String S3 = "s3"; + static final String SWIFT = "swift"; + static final String REMOTE = "remote"; + static final String TMP = "tmp"; + public static final String SEPARATOR = "://"; //Default to "file" is for tests only - public static final String DEFAULT_STORAGE_DRIVER_IDENTIFIER = System.getProperty("dataverse.files.storage-driver-id", "file"); + public static final String DEFAULT_STORAGE_DRIVER_IDENTIFIER = System.getProperty("dataverse.files.storage-driver-id", FILE); public static final String UNDEFINED_STORAGE_DRIVER_IDENTIFIER = "undefined"; //Used in dataverse.xhtml as a non-null selection option value (indicating a null driver/inheriting the default) + // The getStorageIO() methods initialize StorageIO objects for // datafiles that are already saved using one of the supported Dataverse @@ -53,41 +62,55 @@ public static StorageIO getStorageIO(T dvObject) throws return getStorageIO(dvObject, null); } - //passing DVObject instead of a datafile to accomodate for use of datafiles as well as datasets - public static StorageIO getStorageIO(T dvObject, DataAccessRequest req) throws IOException { + - if (dvObject == null - || dvObject.getStorageIdentifier() == null - || dvObject.getStorageIdentifier().isEmpty()) { - throw new IOException("getDataAccessObject: null or invalid datafile."); - } - String storageIdentifier = dvObject.getStorageIdentifier(); - int separatorIndex = storageIdentifier.indexOf("://"); - String storageDriverId = DEFAULT_STORAGE_DRIVER_IDENTIFIER; //default - if(separatorIndex>0) { - storageDriverId = storageIdentifier.substring(0,separatorIndex); - } - String storageType = getDriverType(storageDriverId); - switch(storageType) { - case "file": - return new FileAccessIO<>(dvObject, req, storageDriverId); - case "s3": - return new S3AccessIO<>(dvObject, req, storageDriverId); - case "swift": - return new SwiftAccessIO<>(dvObject, req, storageDriverId); - case "tmp": - throw new IOException("DataAccess IO attempted on a temporary file that hasn't been permanently saved yet."); + public static String getStorageDriverFromIdentifier(String storageIdentifier) { + + int separatorIndex = storageIdentifier.indexOf(SEPARATOR); + String driverId = DEFAULT_STORAGE_DRIVER_IDENTIFIER; // default + if (separatorIndex > 0) { + driverId = storageIdentifier.substring(0, separatorIndex); } + return driverId; + } + + //passing DVObject instead of a datafile to accomodate for use of datafiles as well as datasets + public static StorageIO getStorageIO(T dvObject, DataAccessRequest req) throws IOException { - // TODO: - // This code will need to be extended with a system of looking up - // available storage plugins by the storage tag embedded in the - // "storage identifier". - // -- L.A. 4.0.2 + if (dvObject == null || dvObject.getStorageIdentifier() == null || dvObject.getStorageIdentifier().isEmpty()) { + throw new IOException("getDataAccessObject: null or invalid datafile."); + } - logger.warning("Could not find storage driver for: " + storageIdentifier); - throw new IOException("getDataAccessObject: Unsupported storage method."); - } + String storageDriverId = getStorageDriverFromIdentifier(dvObject.getStorageIdentifier()); + + return getStorageIO(dvObject, req, storageDriverId); + } + + protected static StorageIO getStorageIO(T dvObject, DataAccessRequest req, + String storageDriverId) throws IOException { + String storageType = getDriverType(storageDriverId); + switch (storageType) { + case FILE: + return new FileAccessIO<>(dvObject, req, storageDriverId); + case S3: + return new S3AccessIO<>(dvObject, req, storageDriverId); + case SWIFT: + return new SwiftAccessIO<>(dvObject, req, storageDriverId); + case REMOTE: + return new RemoteOverlayAccessIO<>(dvObject, req, storageDriverId); + case TMP: + throw new IOException( + "DataAccess IO attempted on a temporary file that hasn't been permanently saved yet."); + } + // TODO: + // This code will need to be extended with a system of looking up + // available storage plugins by the storage tag embedded in the + // "storage identifier". + // -- L.A. 4.0.2 + + logger.warning("Could not find storage driver for: " + storageDriverId); + throw new IOException("getDataAccessObject: Unsupported storage method."); + } // Experimental extension of the StorageIO system allowing direct access to // stored physical files that may not be associated with any DvObjects @@ -98,12 +121,14 @@ public static StorageIO getDirectStorageIO(String fullStorageLocation) String storageLocation=response[1]; String storageType = getDriverType(storageDriverId); switch(storageType) { - case "file": + case FILE: return new FileAccessIO<>(storageLocation, storageDriverId); - case "s3": + case S3: return new S3AccessIO<>(storageLocation, storageDriverId); - case "swift": + case SWIFT: return new SwiftAccessIO<>(storageLocation, storageDriverId); + case REMOTE: + return new RemoteOverlayAccessIO<>(storageLocation, storageDriverId); default: logger.warning("Could not find storage driver for: " + fullStorageLocation); throw new IOException("getDirectStorageIO: Unsupported storage method."); @@ -113,7 +138,7 @@ public static StorageIO getDirectStorageIO(String fullStorageLocation) public static String[] getDriverIdAndStorageLocation(String storageLocation) { //default if no prefix String storageIdentifier=storageLocation; - int separatorIndex = storageLocation.indexOf("://"); + int separatorIndex = storageLocation.indexOf(SEPARATOR); String storageDriverId = ""; //default if(separatorIndex>0) { storageDriverId = storageLocation.substring(0,separatorIndex); @@ -122,11 +147,11 @@ public static String[] getDriverIdAndStorageLocation(String storageLocation) { return new String[]{storageDriverId, storageIdentifier}; } - public static String getStorarageIdFromLocation(String location) { - if(location.contains("://")) { + public static String getStorageIdFromLocation(String location) { + if(location.contains(SEPARATOR)) { //It's a full location with a driverId, so strip and reapply the driver id //NOte that this will strip the bucketname out (which s3 uses) but the S3IOStorage class knows to look at re-insert it - return location.substring(0,location.indexOf("://") +3) + location.substring(location.lastIndexOf('/')+1); + return location.substring(0,location.indexOf(SEPARATOR) +3) + location.substring(location.lastIndexOf('/')+1); } return location.substring(location.lastIndexOf('/')+1); } @@ -137,6 +162,27 @@ public static String getDriverType(String driverId) { } return System.getProperty("dataverse.files." + driverId + ".type", "Undefined"); } + + //This + public static String getDriverPrefix(String driverId) throws IOException { + if(driverId.isEmpty() || driverId.equals("tmp")) { + return "tmp" + SEPARATOR; + } + String storageType = System.getProperty("dataverse.files." + driverId + ".type", "Undefined"); + switch(storageType) { + case FILE: + return FileAccessIO.getDriverPrefix(driverId); + case S3: + return S3AccessIO.getDriverPrefix(driverId); + case SWIFT: + return SwiftAccessIO.getDriverPrefix(driverId); + default: + logger.warning("Could not find storage driver for id: " + driverId); + throw new IOException("getDriverPrefix: Unsupported storage method."); + } + + + } // createDataAccessObject() methods create a *new*, empty DataAccess objects, // for saving new, not yet saved datafiles. @@ -167,7 +213,7 @@ public static StorageIO createNewStorageIO(T dvObject, S * This if will catch any cases where that's attempted. */ // Tests send objects with no storageIdentifier set - if((dvObject.getStorageIdentifier()!=null) && dvObject.getStorageIdentifier().contains("://")) { + if((dvObject.getStorageIdentifier()!=null) && dvObject.getStorageIdentifier().contains(SEPARATOR)) { throw new IOException("Attempt to create new StorageIO for already stored object: " + dvObject.getStorageIdentifier()); } @@ -180,15 +226,18 @@ public static StorageIO createNewStorageIO(T dvObject, S } String storageType = getDriverType(storageDriverId); switch(storageType) { - case "file": + case FILE: storageIO = new FileAccessIO<>(dvObject, null, storageDriverId); break; - case "swift": + case SWIFT: storageIO = new SwiftAccessIO<>(dvObject, null, storageDriverId); break; - case "s3": + case S3: storageIO = new S3AccessIO<>(dvObject, null, storageDriverId); break; + case REMOTE: + storageIO = createNewStorageIO(dvObject, storageTag, RemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ; + break; default: logger.warning("Could not find storage driver for: " + storageTag); throw new IOException("createDataAccessObject: Unsupported storage method " + storageDriverId); @@ -250,4 +299,79 @@ public static String getStorageDriverLabelFor(String storageDriverId) { } return label; } + + /** + * This method checks to see if an overlay store is being used and, if so, + * defines a base storage identifier for use with auxiliary files, and adds it + * into the returned value + * + * @param newStorageIdentifier + * @return - the newStorageIdentifier (for file, S3, swift stores) - the + * newStorageIdentifier with a new base store identifier inserted (for + * an overlay store) + */ + public static String expandStorageIdentifierIfNeeded(String newStorageIdentifier) { + logger.fine("found: " + newStorageIdentifier); + String driverType = DataAccess + .getDriverType(newStorageIdentifier.substring(0, newStorageIdentifier.indexOf(":"))); + logger.fine("drivertype: " + driverType); + if (driverType.equals(REMOTE)) { + // Add a generated identifier for the aux files + logger.fine("in: " + newStorageIdentifier); + int lastColon = newStorageIdentifier.lastIndexOf(SEPARATOR); + newStorageIdentifier = newStorageIdentifier.substring(0, lastColon + 3) + + FileUtil.generateStorageIdentifier() + "//" + newStorageIdentifier.substring(lastColon + 3); + logger.fine("out: " + newStorageIdentifier); + } + return newStorageIdentifier; + } + + public static boolean uploadToDatasetAllowed(Dataset d, String storageIdentifier) { + boolean allowed=true; + String driverId = DataAccess.getStorageDriverFromIdentifier(storageIdentifier); + String effectiveDriverId = d.getEffectiveStorageDriverId(); + if(!effectiveDriverId.equals(driverId)) { + //Not allowed unless this is a remote store and you're uploading to the basestore + if(getDriverType(driverId).equals(REMOTE)) { + String baseDriverId = RemoteOverlayAccessIO.getBaseStoreIdFor(driverId); + if(!effectiveDriverId.equals(baseDriverId)) { + //Not allowed - wrong base driver + allowed = false; + } else { + //Only allowed if baseStore allows it + allowed = StorageIO.isDirectUploadEnabled(baseDriverId); + } + } else { + //Not allowed - wrong main driver + allowed=false; + } + } else { + //Only allowed if main store allows it + allowed = StorageIO.isDirectUploadEnabled(driverId); + } + return allowed; + } + + + //Method to verify that a submitted storageIdentifier (i.e. in direct/remote uploads) is consistent with the store's configuration. + public static boolean isValidDirectStorageIdentifier(String storageId) { + String driverId = DataAccess.getStorageDriverFromIdentifier(storageId); + String storageType = DataAccess.getDriverType(driverId); + if (storageType.equals("tmp") || storageType.equals("Undefined")) { + return false; + } + switch (storageType) { + case FILE: + return FileAccessIO.isValidIdentifier(driverId, storageId); + case SWIFT: + return SwiftAccessIO.isValidIdentifier(driverId, storageId); + case S3: + return S3AccessIO.isValidIdentifier(driverId, storageId); + case REMOTE: + return RemoteOverlayAccessIO.isValidIdentifier(driverId, storageId); + default: + logger.warning("Request to validate for storage driver: " + driverId); + } + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index bd0549622f0..d5f00b9868f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -29,10 +29,13 @@ import java.io.FileOutputStream; // NIO imports: import java.nio.file.Files; +import java.nio.file.InvalidPathException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; - +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; // Dataverse imports: import edu.harvard.iq.dataverse.DataFile; @@ -49,11 +52,14 @@ public class FileAccessIO extends StorageIO { - public FileAccessIO() { - //Constructor only for testing - super(null, null, null); - } - + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.FileAccessIO"); + + + public FileAccessIO() { + // Constructor only for testing + super(null, null, null); + } + public FileAccessIO(T dvObject, DataAccessRequest req, String driverId ) { super(dvObject, req, driverId); @@ -64,8 +70,9 @@ public FileAccessIO(T dvObject, DataAccessRequest req, String driverId ) { // "Direct" File Access IO, opened on a physical file not associated with // a specific DvObject public FileAccessIO(String storageLocation, String driverId) { - super(storageLocation, driverId); - this.setIsLocalFile(true); + super(storageLocation, driverId); + this.setIsLocalFile(true); + logger.fine("Storage path: " + storageLocation); physicalPath = Paths.get(storageLocation); } @@ -120,10 +127,10 @@ public void open (DataAccessOption... options) throws IOException { } } else if (isWriteAccess) { // Creates a new directory as needed for a dataset. - Path datasetPath=Paths.get(getDatasetDirectory()); - if (datasetPath != null && !Files.exists(datasetPath)) { - Files.createDirectories(datasetPath); - } + Path datasetPath=Paths.get(getDatasetDirectory()); + if (datasetPath != null && !Files.exists(datasetPath)) { + Files.createDirectories(datasetPath); + } FileOutputStream fout = openLocalFileAsOutputStream(); if (fout == null) { @@ -132,8 +139,8 @@ public void open (DataAccessOption... options) throws IOException { this.setOutputStream(fout); setChannel(fout.getChannel()); - if (!storageIdentifier.startsWith(this.driverId + "://")) { - dvObject.setStorageIdentifier(this.driverId + "://" + storageIdentifier); + if (!storageIdentifier.startsWith(this.driverId + DataAccess.SEPARATOR)) { + dvObject.setStorageIdentifier(this.driverId + DataAccess.SEPARATOR + storageIdentifier); } } @@ -159,17 +166,22 @@ public void open (DataAccessOption... options) throws IOException { // this.setInputStream(fin); } else if (isWriteAccess) { //this checks whether a directory for a dataset exists - Path datasetPath=Paths.get(getDatasetDirectory()); - if (datasetPath != null && !Files.exists(datasetPath)) { - Files.createDirectories(datasetPath); - } - dataset.setStorageIdentifier(this.driverId + "://"+dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage()); + Path datasetPath=Paths.get(getDatasetDirectory()); + if (datasetPath != null && !Files.exists(datasetPath)) { + Files.createDirectories(datasetPath); + } + dataset.setStorageIdentifier(this.driverId + DataAccess.SEPARATOR + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage()); } } else if (dvObject instanceof Dataverse) { dataverse = this.getDataverse(); } else { - throw new IOException("Data Access: Invalid DvObject type"); + logger.fine("Overlay case: FileAccessIO open for : " + physicalPath.toString()); + Path datasetPath= physicalPath.getParent(); + if (datasetPath != null && !Files.exists(datasetPath)) { + Files.createDirectories(datasetPath); + } + //throw new IOException("Data Access: Invalid DvObject type"); } // This "status" is a leftover from 3.6; we don't have a use for it // in 4.0 yet; and we may not need it at all. @@ -232,7 +244,7 @@ public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) th Path auxPath = getAuxObjectAsPath(auxItemTag); if (isWriteAccessRequested(options)) { - if (dvObject instanceof Dataset && !this.canWrite()) { + if (((dvObject instanceof Dataset) || isDirectAccess()) && !this.canWrite()) { // If this is a dataset-level auxilary file (a cached metadata export, // dataset logo, etc.) there's a chance that no "real" files // have been saved for this dataset yet, and thus the filesystem @@ -293,7 +305,10 @@ public Path getAuxObjectAsPath(String auxItemTag) throws IOException { if (auxItemTag == null || "".equals(auxItemTag)) { throw new IOException("Null or invalid Auxiliary Object Tag."); } - + if(isDirectAccess()) { + //includes overlay case + return Paths.get(physicalPath.toString() + "." + auxItemTag); + } String datasetDirectory = getDatasetDirectory(); if (dvObject.getStorageIdentifier() == null || "".equals(dvObject.getStorageIdentifier())) { @@ -317,7 +332,7 @@ public Path getAuxObjectAsPath(String auxItemTag) throws IOException { } - @Override + @Override public void backupAsAux(String auxItemTag) throws IOException { Path auxPath = getAuxObjectAsPath(auxItemTag); @@ -415,8 +430,8 @@ public void deleteAllAuxObjects() throws IOException { } } - - + + @Override public String getStorageLocation() { // For a local file, the "storage location" is a complete, absolute @@ -425,7 +440,7 @@ public String getStorageLocation() { try { Path testPath = getFileSystemPath(); if (testPath != null) { - return this.driverId + "://" + testPath.toString(); + return this.driverId + DataAccess.SEPARATOR + testPath.toString(); } } catch (IOException ioex) { // just return null, below: @@ -545,7 +560,7 @@ public FileOutputStream openLocalFileAsOutputStream () { } private String getDatasetDirectory() throws IOException { - if (dvObject == null) { + if (isDirectAccess()) { throw new IOException("No DvObject defined in the Data Access Object"); } @@ -572,14 +587,10 @@ private String getDatasetDirectory() throws IOException { } - private String getFilesRootDirectory() { - String filesRootDirectory = System.getProperty("dataverse.files." + this.driverId + ".directory"); - - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } - return filesRootDirectory; - } + protected String getFilesRootDirectory() { + String filesRootDirectory = System.getProperty("dataverse.files." + this.driverId + ".directory", "/tmp/files"); + return filesRootDirectory; + } private List listCachedFiles() throws IOException { List auxItems = new ArrayList<>(); @@ -642,10 +653,34 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException return in; } private String stripDriverId(String storageIdentifier) { - int separatorIndex = storageIdentifier.indexOf("://"); - if(separatorIndex>0) { - return storageIdentifier.substring(separatorIndex + 3); + int separatorIndex = storageIdentifier.indexOf(DataAccess.SEPARATOR); + if(separatorIndex>0) { + return storageIdentifier.substring(separatorIndex + DataAccess.SEPARATOR.length()); + } + return storageIdentifier; + } + + //Confirm inputs are of the form of a relative file path that doesn't contain . or .. + protected static boolean isValidIdentifier(String driverId, String storageId) { + String pathString = storageId.substring(storageId.lastIndexOf("//") + 2); + String basePath = "/tmp/"; + try { + String rawPathString = basePath + pathString; + Path normalized = Paths.get(rawPathString).normalize(); + if(!rawPathString.equals(normalized.toString())) { + logger.warning("Non-normalized path in submitted identifier " + storageId); + return false; + } + logger.fine(normalized.getFileName().toString()); + if (!usesStandardNamePattern(normalized.getFileName().toString())) { + logger.warning("Unacceptable file name in submitted identifier: " + storageId); + return false; + } + + } catch (InvalidPathException ipe) { + logger.warning("Invalid Path in submitted identifier " + storageId); + return false; } - return storageIdentifier; - } + return true; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 3197234c5ea..2b4aed3a9a5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -369,7 +369,7 @@ private static boolean isThumbnailCached(StorageIO storageIO, int size try { cached = storageIO.isAuxObjectCached(THUMBNAIL_SUFFIX + size); } catch (Exception ioex) { - logger.fine("caught Exception while checking for a cached thumbnail (file " + storageIO.getDataFile().getStorageIdentifier() + ")"); + logger.fine("caught Exception while checking for a cached thumbnail (file " + storageIO.getDataFile().getStorageIdentifier() + "): " + ioex.getMessage()); return false; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java new file mode 100644 index 00000000000..c8e42349318 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -0,0 +1,634 @@ +package edu.harvard.iq.dataverse.dataaccess; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.channels.Channel; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; +import java.nio.channels.WritableByteChannel; +import java.nio.file.Path; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.util.List; +import java.util.logging.Logger; + +import org.apache.http.Header; +import org.apache.http.client.config.CookieSpecs; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpDelete; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpHead; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.conn.ssl.TrustAllStrategy; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.protocol.HTTP; +import org.apache.http.ssl.SSLContextBuilder; +import org.apache.http.util.EntityUtils; + +import javax.net.ssl.SSLContext; + +/** + * @author qqmyers + * @param what it stores + */ +/* + * Remote Overlay Driver + * + * StorageIdentifier format: + * ://// + */ +public class RemoteOverlayAccessIO extends StorageIO { + + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); + + private StorageIO baseStore = null; + private String urlPath = null; + private String baseUrl = null; + + private static HttpClientContext localContext = HttpClientContext.create(); + private PoolingHttpClientConnectionManager cm = null; + CloseableHttpClient httpclient = null; + private int timeout = 1200; + private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); + private static boolean trustCerts = false; + private int httpConcurrency = 4; + + public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { + super(dvObject, req, driverId); + this.setIsLocalFile(false); + configureStores(req, driverId, null); + logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); + urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + validatePath(urlPath); + + logger.fine("Base URL: " + urlPath); + } + + public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException { + super(null, null, driverId); + this.setIsLocalFile(false); + configureStores(null, driverId, storageLocation); + + urlPath = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(urlPath); + logger.fine("Base URL: " + urlPath); + } + + private void validatePath(String path) throws IOException { + try { + URI absoluteURI = new URI(baseUrl + "/" + urlPath); + if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url"); + } + } catch(URISyntaxException use) { + throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId); + } + } + + + @Override + public void open(DataAccessOption... options) throws IOException { + + baseStore.open(options); + + DataAccessRequest req = this.getRequest(); + + if (isWriteAccessRequested(options)) { + isWriteAccess = true; + isReadAccess = false; + } else { + isWriteAccess = false; + isReadAccess = true; + } + + if (dvObject instanceof DataFile) { + String storageIdentifier = dvObject.getStorageIdentifier(); + + DataFile dataFile = this.getDataFile(); + + if (req != null && req.getParameter("noVarHeader") != null) { + baseStore.setNoVarHeader(true); + } + + if (storageIdentifier == null || "".equals(storageIdentifier)) { + throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); + } + + // Fix new DataFiles: DataFiles that have not yet been saved may use this method + // when they don't have their storageidentifier in the final form + // So we fix it up here. ToDo: refactor so that storageidentifier is generated + // by the appropriate StorageIO class and is final from the start. + logger.fine("StorageIdentifier is: " + storageIdentifier); + + if (isReadAccess) { + if (dataFile.getFilesize() >= 0) { + this.setSize(dataFile.getFilesize()); + } else { + logger.fine("Setting size"); + this.setSize(getSizeFromHttpHeader()); + } + if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") + && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { + + List datavariables = dataFile.getDataTable().getDataVariables(); + String varHeaderLine = generateVariableHeader(datavariables); + this.setVarHeader(varHeaderLine); + } + + } + + this.setMimeType(dataFile.getContentType()); + + try { + this.setFileName(dataFile.getFileMetadata().getLabel()); + } catch (Exception ex) { + this.setFileName("unknown"); + } + } else if (dvObject instanceof Dataset) { + throw new IOException( + "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); + } else if (dvObject instanceof Dataverse) { + throw new IOException( + "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); + } else { + this.setSize(getSizeFromHttpHeader()); + } + } + + private long getSizeFromHttpHeader() { + long size = -1; + HttpHead head = new HttpHead(baseUrl + "/" + urlPath); + try { + CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext); + + try { + int code = response.getStatusLine().getStatusCode(); + logger.fine("Response for HEAD: " + code); + switch (code) { + case 200: + Header[] headers = response.getHeaders(HTTP.CONTENT_LEN); + logger.fine("Num headers: " + headers.length); + String sizeString = response.getHeaders(HTTP.CONTENT_LEN)[0].getValue(); + logger.fine("Content-Length: " + sizeString); + size = Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue()); + logger.fine("Found file size: " + size); + break; + default: + logger.warning("Response from " + head.getURI().toString() + " was " + code); + } + } finally { + EntityUtils.consume(response.getEntity()); + } + } catch (IOException e) { + logger.warning(e.getMessage()); + } + return size; + } + + @Override + public InputStream getInputStream() throws IOException { + if (super.getInputStream() == null) { + try { + HttpGet get = new HttpGet(generateTemporaryDownloadUrl(null, null, null)); + CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); + + int code = response.getStatusLine().getStatusCode(); + switch (code) { + case 200: + setInputStream(response.getEntity().getContent()); + break; + default: + logger.warning("Response from " + get.getURI().toString() + " was " + code); + throw new IOException("Cannot retrieve: " + baseUrl + "/" + urlPath + " code: " + code); + } + } catch (Exception e) { + logger.warning(e.getMessage()); + e.printStackTrace(); + throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath + " " + e.getMessage()); + + } + setChannel(Channels.newChannel(super.getInputStream())); + } + return super.getInputStream(); + } + + @Override + public Channel getChannel() throws IOException { + if (super.getChannel() == null) { + getInputStream(); + } + return channel; + } + + @Override + public ReadableByteChannel getReadChannel() throws IOException { + // Make sure StorageIO.channel variable exists + getChannel(); + return super.getReadChannel(); + } + + @Override + public void delete() throws IOException { + // Delete is best-effort - we tell the remote server and it may or may not + // implement this call + if (!isDirectAccess()) { + throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); + } + try { + HttpDelete del = new HttpDelete(baseUrl + "/" + urlPath); + CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); + try { + int code = response.getStatusLine().getStatusCode(); + switch (code) { + case 200: + logger.fine("Sent DELETE for " + baseUrl + "/" + urlPath); + default: + logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); + } + } finally { + EntityUtils.consume(response.getEntity()); + } + } catch (Exception e) { + logger.warning(e.getMessage()); + throw new IOException("Error deleting: " + baseUrl + "/" + urlPath); + + } + + // Delete all the cached aux files as well: + deleteAllAuxObjects(); + + } + + @Override + public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { + return baseStore.openAuxChannel(auxItemTag, options); + } + + @Override + public boolean isAuxObjectCached(String auxItemTag) throws IOException { + return baseStore.isAuxObjectCached(auxItemTag); + } + + @Override + public long getAuxObjectSize(String auxItemTag) throws IOException { + return baseStore.getAuxObjectSize(auxItemTag); + } + + @Override + public Path getAuxObjectAsPath(String auxItemTag) throws IOException { + return baseStore.getAuxObjectAsPath(auxItemTag); + } + + @Override + public void backupAsAux(String auxItemTag) throws IOException { + baseStore.backupAsAux(auxItemTag); + } + + @Override + public void revertBackupAsAux(String auxItemTag) throws IOException { + baseStore.revertBackupAsAux(auxItemTag); + } + + @Override + // this method copies a local filesystem Path into this DataAccess Auxiliary + // location: + public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { + baseStore.savePathAsAux(fileSystemPath, auxItemTag); + } + + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); + } + + /** + * @param inputStream InputStream we want to save + * @param auxItemTag String representing this Auxiliary type ("extension") + * @throws IOException if anything goes wrong. + */ + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag); + } + + @Override + public List listAuxObjects() throws IOException { + return baseStore.listAuxObjects(); + } + + @Override + public void deleteAuxObject(String auxItemTag) throws IOException { + baseStore.deleteAuxObject(auxItemTag); + } + + @Override + public void deleteAllAuxObjects() throws IOException { + baseStore.deleteAllAuxObjects(); + } + + @Override + public String getStorageLocation() throws IOException { + String fullStorageLocation = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStorageLocation); + int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); + if(driverIndex >=0) { + fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + if (this.getDvObject() instanceof Dataset) { + throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); + } else if (this.getDvObject() instanceof DataFile) { + fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; + } else if (dvObject instanceof Dataverse) { + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStorageLocation: " + fullStorageLocation); + return fullStorageLocation; + } + + @Override + public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); + } + + @Override + public boolean exists() { + logger.fine("Exists called"); + return (getSizeFromHttpHeader() != -1); + } + + @Override + public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: there are no write Channels associated with S3 objects."); + } + + @Override + public OutputStream getOutputStream() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: there are no output Streams associated with S3 objects."); + } + + @Override + public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { + return baseStore.getAuxFileAsInputStream(auxItemTag); + } + + @Override + public boolean downloadRedirectEnabled() { + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); + if ("true".equalsIgnoreCase(optionValue)) { + return true; + } + return false; + } + + public boolean downloadRedirectEnabled(String auxObjectTag) { + return baseStore.downloadRedirectEnabled(auxObjectTag); + } + + @Override + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) + throws IOException { + + // ToDo - support remote auxiliary Files + if (auxiliaryTag == null) { + String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key"); + if (secretKey == null) { + return baseUrl + "/" + urlPath; + } else { + return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET", + secretKey); + } + } else { + return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); + } + } + + int getUrlExpirationMinutes() { + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); + if (optionValue != null) { + Integer num; + try { + num = Integer.parseInt(optionValue); + } catch (NumberFormatException ex) { + num = null; + } + if (num != null) { + return num; + } + } + return 60; + } + + private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { + baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url"); + if (baseUrl == null) { + throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); + } else { + try { + new URI(baseUrl); + } catch (Exception e) { + logger.warning( + "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage()); + throw new IOException("Can't interpret base-url as a URI"); + } + + } + + if (baseStore == null) { + String baseDriverId = getBaseStoreIdFor(driverId); + String fullStorageLocation = null; + String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + + if(dvObject instanceof Dataset) { + baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); + } else { + if (this.getDvObject() != null) { + fullStorageLocation = getStoragePath(); + + // S3 expects :/// + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" + + fullStorageLocation; + break; + default: + logger.warning("Not Implemented: RemoteOverlay store with base store type: " + + System.getProperty("dataverse.files." + baseDriverId + ".type")); + throw new IOException("Not implemented"); + } + + } else if (storageLocation != null) { + // ://// + //remoteDriverId:// is removed if coming through directStorageIO + int index = storageLocation.indexOf(DataAccess.SEPARATOR); + if(index > 0) { + storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); + } + //THe base store needs the baseStoreIdentifier and not the relative URL + fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); + + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" + + fullStorageLocation; + break; + default: + logger.warning("Not Implemented: RemoteOverlay store with base store type: " + + System.getProperty("dataverse.files." + baseDriverId + ".type")); + throw new IOException("Not implemented"); + } + } + baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); + } + if (baseDriverType.contentEquals(DataAccess.S3)) { + ((S3AccessIO) baseStore).setMainDriver(false); + } + } + remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name"); + try { + remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); + } catch(MalformedURLException mfue) { + logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); + } + } + + //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required. + private String getStoragePath() throws IOException { + String fullStoragePath = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStoragePath); + int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); + if(driverIndex >=0) { + fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + int suffixIndex = fullStoragePath.indexOf("//"); + if(suffixIndex >=0) { + fullStoragePath = fullStoragePath.substring(0, suffixIndex); + } + if (this.getDvObject() instanceof Dataset) { + fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" + + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (this.getDvObject() instanceof DataFile) { + fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" + + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; + }else if (dvObject instanceof Dataverse) { + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStoragePath: " + fullStoragePath); + return fullStoragePath; + } + + public CloseableHttpClient getSharedHttpClient() { + if (httpclient == null) { + try { + initHttpPool(); + httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); + + } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { + logger.warning(ex.getMessage()); + } + } + return httpclient; + } + + private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { + if (trustCerts) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + SSLContext sslContext; + SSLConnectionSocketFactory connectionFactory; + + sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); + // create an SSL Socket Factory to use the SSLContext with the trust self signed + // certificate strategy + // and allow all hosts verifier. + connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); + + Registry registry = RegistryBuilder.create() + .register("https", connectionFactory).build(); + cm = new PoolingHttpClientConnectionManager(registry); + } else { + cm = new PoolingHttpClientConnectionManager(); + } + cm.setDefaultMaxPerRoute(httpConcurrency); + cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); + } + + @Override + public void savePath(Path fileSystemPath) throws IOException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: savePath() not implemented in this storage driver."); + + } + + @Override + public void saveInputStream(InputStream inputStream) throws IOException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver."); + + } + + @Override + public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); + + } + + protected static boolean isValidIdentifier(String driverId, String storageId) { + String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2); + String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); + try { + URI absoluteURI = new URI(baseUrl + "/" + urlPath); + if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); + return false; + } + } catch(URISyntaxException use) { + logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId); + return false; + } + return true; + } + + public static String getBaseStoreIdFor(String driverId) { + return System.getProperty("dataverse.files." + driverId + ".base-store"); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index ea19d29b41e..3c9cef04980 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -84,6 +84,8 @@ public class S3AccessIO extends StorageIO { private static final Config config = ConfigProvider.getConfig(); private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.S3AccessIO"); + + private boolean mainDriver = true; private static HashMap driverClientMap = new HashMap(); private static HashMap driverTMMap = new HashMap(); @@ -120,6 +122,7 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { public S3AccessIO(String storageLocation, String driverId) { this(null, null, driverId); // TODO: validate the storage location supplied + logger.fine("Instantiating with location: " + storageLocation); bucketName = storageLocation.substring(0,storageLocation.indexOf('/')); minPartSize = getMinPartSize(driverId); key = storageLocation.substring(storageLocation.indexOf('/')+1); @@ -174,22 +177,22 @@ public void open(DataAccessOption... options) throws IOException { //Fix new DataFiles: DataFiles that have not yet been saved may use this method when they don't have their storageidentifier in the final ://: form // So we fix it up here. ToDo: refactor so that storageidentifier is generated by the appropriate StorageIO class and is final from the start. String newStorageIdentifier = null; - if (storageIdentifier.startsWith(this.driverId + "://")) { - if(!storageIdentifier.substring((this.driverId + "://").length()).contains(":")) { + if (storageIdentifier.startsWith(this.driverId + DataAccess.SEPARATOR)) { + if(!storageIdentifier.substring((this.driverId + DataAccess.SEPARATOR).length()).contains(":")) { //Driver id but no bucket if(bucketName!=null) { - newStorageIdentifier=this.driverId + "://" + bucketName + ":" + storageIdentifier.substring((this.driverId + "://").length()); + newStorageIdentifier=this.driverId + DataAccess.SEPARATOR + bucketName + ":" + storageIdentifier.substring((this.driverId + DataAccess.SEPARATOR).length()); } else { throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") is not associated with a bucket."); } } // else we're OK (assumes bucket name in storageidentifier matches the driver's bucketname) } else { - if(!storageIdentifier.substring((this.driverId + "://").length()).contains(":")) { + if(!storageIdentifier.substring((this.driverId + DataAccess.SEPARATOR).length()).contains(":")) { //No driver id or bucket - newStorageIdentifier= this.driverId + "://" + bucketName + ":" + storageIdentifier; + newStorageIdentifier= this.driverId + DataAccess.SEPARATOR + bucketName + ":" + storageIdentifier; } else { //Just the bucketname - newStorageIdentifier= this.driverId + "://" + storageIdentifier; + newStorageIdentifier= this.driverId + DataAccess.SEPARATOR + storageIdentifier; } } if(newStorageIdentifier != null) { @@ -235,39 +238,44 @@ public void open(DataAccessOption... options) throws IOException { } else if (dvObject instanceof Dataset) { Dataset dataset = this.getDataset(); key = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); - dataset.setStorageIdentifier(this.driverId + "://" + key); + dataset.setStorageIdentifier(this.driverId + DataAccess.SEPARATOR + key); } else if (dvObject instanceof Dataverse) { throw new IOException("Data Access: Storage driver does not support dvObject type Dataverse yet"); } else { - // Direct access, e.g. for external upload - no associated DVobject yet, but we want to be able to get the size - // With small files, it looks like we may call before S3 says it exists, so try some retries before failing - if(key!=null) { - ObjectMetadata objectMetadata = null; - int retries = 20; - while(retries > 0) { - try { - objectMetadata = s3.getObjectMetadata(bucketName, key); - if(retries != 20) { - logger.warning("Success for key: " + key + " after " + ((20-retries)*3) + " seconds"); - } - retries = 0; - } catch (SdkClientException sce) { - if(retries > 1) { - retries--; - try { - Thread.sleep(3000); - } catch (InterruptedException e) { - e.printStackTrace(); - } - logger.warning("Retrying after: " + sce.getMessage()); - } else { - throw new IOException("Cannot get S3 object " + key + " ("+sce.getMessage()+")"); - } - } - } - this.setSize(objectMetadata.getContentLength()); - }else { - throw new IOException("Data Access: Invalid DvObject type"); + if (isMainDriver()) { + // Direct access, e.g. for external upload - no associated DVobject yet, but we + // want to be able to get the size + // With small files, it looks like we may call before S3 says it exists, so try + // some retries before failing + if (key != null) { + ObjectMetadata objectMetadata = null; + int retries = 20; + while (retries > 0) { + try { + objectMetadata = s3.getObjectMetadata(bucketName, key); + if (retries != 20) { + logger.warning( + "Success for key: " + key + " after " + ((20 - retries) * 3) + " seconds"); + } + retries = 0; + } catch (SdkClientException sce) { + if (retries > 1) { + retries--; + try { + Thread.sleep(3000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + logger.warning("Retrying after: " + sce.getMessage()); + } else { + throw new IOException("Cannot get S3 object " + key + " (" + sce.getMessage() + ")"); + } + } + } + this.setSize(objectMetadata.getContentLength()); + } else { + throw new IOException("Data Access: Invalid DvObject type"); + } } } } @@ -437,6 +445,7 @@ public void delete() throws IOException { @Override public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { if (isWriteAccessRequested(options)) { + //Need size to write to S3 throw new UnsupportedDataAccessOperationException("S3AccessIO: write mode openAuxChannel() not yet implemented in this storage driver."); } @@ -723,7 +732,7 @@ public String getStorageLocation() throws IOException { throw new IOException("Failed to obtain the S3 key for the file"); } - return this.driverId + "://" + bucketName + "/" + locationKey; + return this.driverId + DataAccess.SEPARATOR + bucketName + "/" + locationKey; } @Override @@ -822,7 +831,7 @@ private static String getMainFileKey(String baseKey, String storageIdentifier, S throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); } - if (storageIdentifier.indexOf(driverId + "://")>=0) { + if (storageIdentifier.indexOf(driverId + DataAccess.SEPARATOR)>=0) { //String driverId = storageIdentifier.substring(0, storageIdentifier.indexOf("://")+3); //As currently implemented (v4.20), the bucket is part of the identifier and we could extract it and compare it with getBucketName() as a check - //Only one bucket per driver is supported (though things might work if the profile creds work with multiple buckets, then again it's not clear when logic is reading from the driver property or from the DataFile). @@ -834,6 +843,7 @@ private static String getMainFileKey(String baseKey, String storageIdentifier, S return key; } + @Override public boolean downloadRedirectEnabled() { String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); if ("true".equalsIgnoreCase(optionValue)) { @@ -842,6 +852,10 @@ public boolean downloadRedirectEnabled() { return false; } + public boolean downloadRedirectEnabled(String auxObjectTag) { + return downloadRedirectEnabled(); + } + /** * Generates a temporary URL for a direct S3 download; * either for the main physical file, or (optionally) for an auxiliary. @@ -851,7 +865,7 @@ public boolean downloadRedirectEnabled() { * @return redirect url * @throws IOException. */ - public String generateTemporaryS3Url(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { //Questions: // Q. Should this work for private and public? // A. Yes! Since the URL has a limited, short life span. -- L.A. @@ -923,9 +937,9 @@ public String generateTemporaryS3Url(String auxiliaryTag, String auxiliaryType, // them for some servers, we check whether the protocol is in the url and then // normalizing to use the part without the protocol String endpointServer = endpoint; - int protocolEnd = endpoint.indexOf("://"); + int protocolEnd = endpoint.indexOf(DataAccess.SEPARATOR); if (protocolEnd >=0 ) { - endpointServer = endpoint.substring(protocolEnd + 3); + endpointServer = endpoint.substring(protocolEnd + DataAccess.SEPARATOR.length()); } logger.fine("Endpoint: " + endpointServer); // We're then replacing @@ -984,9 +998,9 @@ private String generateTemporaryS3UploadUrl(String key, Date expiration) throws // them for some servers, we check whether the protocol is in the url and then // normalizing to use the part without the protocol String endpointServer = endpoint; - int protocolEnd = endpoint.indexOf("://"); + int protocolEnd = endpoint.indexOf(DataAccess.SEPARATOR); if (protocolEnd >=0 ) { - endpointServer = endpoint.substring(protocolEnd + 3); + endpointServer = endpoint.substring(protocolEnd + DataAccess.SEPARATOR.length()); } logger.fine("Endpoint: " + endpointServer); // We're then replacing @@ -1253,4 +1267,44 @@ public static void completeMultipartUpload(String globalId, String storageIdenti s3Client.completeMultipartUpload(req); } + public boolean isMainDriver() { + return mainDriver; + } + + public void setMainDriver(boolean mainDriver) { + this.mainDriver = mainDriver; + } + + public static String getDriverPrefix(String driverId) { + return driverId+ DataAccess.SEPARATOR + getBucketName(driverId) + ":"; + } + + //Confirm inputs are of the form s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42 + protected static boolean isValidIdentifier(String driverId, String storageId) { + String storageBucketAndId = storageId.substring(storageId.lastIndexOf("//") + 2); + String bucketName = getBucketName(driverId); + if(bucketName==null) { + logger.warning("No bucket defined for " + driverId); + return false; + } + int index = storageBucketAndId.lastIndexOf(":"); + if(index<=0) { + logger.warning("No bucket defined in submitted identifier: " + storageId); + return false; + } + String idBucket = storageBucketAndId.substring(0, index); + String id = storageBucketAndId.substring(index+1); + logger.fine(id); + if(!bucketName.equals(idBucket)) { + logger.warning("Incorrect bucket in submitted identifier: " + storageId); + return false; + } + if (!usesStandardNamePattern(id)) { + logger.warning("Unacceptable identifier pattern in submitted identifier: " + storageId); + return false; + } + return true; + } + + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index b0e9648285c..90e4a54dbe8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -30,16 +30,17 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.net.URL; import java.nio.channels.Channel; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; import java.nio.file.Path; +import java.util.HashMap; import java.util.Iterator; import java.util.List; - - -//import org.apache.commons.httpclient.Header; -//import org.apache.commons.httpclient.methods.GetMethod; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** @@ -66,7 +67,7 @@ public StorageIO(T dvObject, DataAccessRequest req, String driverId) { this.req = new DataAccessRequest(); } if (this.driverId == null) { - this.driverId = "file"; + this.driverId = DataAccess.FILE; } } @@ -78,7 +79,10 @@ public StorageIO(T dvObject, DataAccessRequest req, String driverId) { protected boolean isReadAccess = false; protected boolean isWriteAccess = false; - + //A public store is one in which files may be accessible outside Dataverse and therefore accessible without regard to Dataverse's access controls related to restriction and embargoes. + //Currently, this is just used to warn users at upload time rather than disable restriction/embargo. + static protected Map driverPublicAccessMap = new HashMap(); + public boolean canRead() { return isReadAccess; } @@ -183,7 +187,7 @@ public boolean canWrite() { public abstract void deleteAllAuxObjects() throws IOException; private DataAccessRequest req; - private InputStream in; + private InputStream in = null; private OutputStream out; protected Channel channel; protected DvObject dvObject; @@ -222,6 +226,8 @@ public boolean canWrite() { private String swiftFileName; private String remoteUrl; + protected String remoteStoreName = null; + protected URL remoteStoreUrl = null; // For HTTP-based downloads: /*private GetMethod method = null; @@ -330,6 +336,14 @@ public String getSwiftContainerName(){ return swiftContainerName; } + public String getRemoteStoreName() { + return remoteStoreName; + } + + public URL getRemoteStoreUrl() { + return remoteStoreUrl; + } + /*public GetMethod getHTTPMethod() { return method; } @@ -564,4 +578,48 @@ public boolean isBelowIngestSizeLimit() { return true; } } + + public boolean downloadRedirectEnabled() { + return false; + } + + public boolean downloadRedirectEnabled(String auxObjectTag) { + return false; + } + + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { + throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type"); + } + + + public static boolean isPublicStore(String driverId) { + //Read once and cache + if(!driverPublicAccessMap.containsKey(driverId)) { + driverPublicAccessMap.put(driverId, Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".public"))); + } + return driverPublicAccessMap.get(driverId); + } + + public static String getDriverPrefix(String driverId) { + return driverId+ DataAccess.SEPARATOR; + } + + public static boolean isDirectUploadEnabled(String driverId) { + return Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect")); + } + + //Check that storageIdentifier is consistent with store's config + //False will prevent direct uploads + protected static boolean isValidIdentifier(String driverId, String storageId) { + return false; + } + + //Utility to verify the standard UUID pattern for stored files. + protected static boolean usesStandardNamePattern(String identifier) { + + Pattern r = Pattern.compile("^[a-f,0-9]{11}-[a-f,0-9]{12}$"); + Matcher m = r.matcher(identifier); + return m.find(); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 3bc29cb9836..b1725b040a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -508,7 +508,7 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt if (dvObject instanceof DataFile) { Dataset owner = this.getDataFile().getOwner(); - if (storageIdentifier.startsWith(this.driverId + "://")) { + if (storageIdentifier.startsWith(this.driverId + DataAccess.SEPARATOR)) { // This is a call on an already existing swift object. String[] swiftStorageTokens = storageIdentifier.substring(8).split(":", 3); @@ -552,14 +552,14 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt //setSwiftContainerName(swiftFolderPath); //swiftFileName = dataFile.getDisplayName(); //Storage Identifier is now updated after the object is uploaded on Swift. - dvObject.setStorageIdentifier(this.driverId + "://" + swiftDefaultEndpoint + ":" + swiftFolderPath + ":" + swiftFileName); + dvObject.setStorageIdentifier(this.driverId + DataAccess.SEPARATOR + swiftDefaultEndpoint + ":" + swiftFolderPath + ":" + swiftFileName); } else { throw new IOException("SwiftAccessIO: unknown access mode."); } } else if (dvObject instanceof Dataset) { Dataset dataset = this.getDataset(); - if (storageIdentifier.startsWith(this.driverId + "://")) { + if (storageIdentifier.startsWith(this.driverId + DataAccess.SEPARATOR)) { // This is a call on an already existing swift object. //TODO: determine how storage identifier will give us info @@ -601,7 +601,7 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt swiftPseudoFolderPathSeparator + dataset.getIdentifierForFileStorage(); swiftFileName = auxItemTag; - dvObject.setStorageIdentifier(this.driverId + "://" + swiftEndPoint + ":" + swiftFolderPath); + dvObject.setStorageIdentifier(this.driverId + DataAccess.SEPARATOR + swiftEndPoint + ":" + swiftFolderPath); } else { throw new IOException("SwiftAccessIO: unknown access mode."); } @@ -628,7 +628,7 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt other swiftContainerName Object Store pseudo-folder can be created, which is not provide by the joss Java swift library as of yet. */ - if (storageIdentifier.startsWith(this.driverId + "://")) { + if (storageIdentifier.startsWith(this.driverId + DataAccess.SEPARATOR)) { // An existing swift object; the container must already exist as well. this.swiftContainer = account.getContainer(swiftContainerName); } else { @@ -874,7 +874,7 @@ public String getSwiftContainerName() { } return null; } - + //https://gist.github.com/ishikawa/88599 public static String toHexString(byte[] bytes) { Formatter formatter = new Formatter(); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index ccf947b8868..7683aab7dfa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -5,6 +5,7 @@ import edu.harvard.iq.dataverse.DatasetField; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import static edu.harvard.iq.dataverse.dataaccess.DataAccess.getStorageIO; @@ -36,6 +37,7 @@ import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.license.License; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.StringUtil; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @@ -456,13 +458,13 @@ public static List getDatasetSummaryFields(DatasetVersion datasetV return datasetFields; } - public static boolean isAppropriateStorageDriver(Dataset dataset){ - // ToDo - rsync was written before multiple store support and currently is hardcoded to use the "s3" store. + public static boolean isRsyncAppropriateStorageDriver(Dataset dataset){ + // ToDo - rsync was written before multiple store support and currently is hardcoded to use the DataAccess.S3 store. // When those restrictions are lifted/rsync can be configured per store, this test should check that setting // instead of testing for the 's3" store, //This method is used by both the dataset and edit files page so one change here //will fix both - return dataset.getEffectiveStorageDriverId().equals("s3"); + return dataset.getEffectiveStorageDriverId().equals(DataAccess.S3); } /** @@ -476,16 +478,16 @@ public static boolean isAppropriateStorageDriver(Dataset dataset){ public static String getDownloadSize(DatasetVersion dsv, boolean original) { return FileSizeChecker.bytesToHumanReadable(getDownloadSizeNumeric(dsv, original)); } - + public static Long getDownloadSizeNumeric(DatasetVersion dsv, boolean original) { return getDownloadSizeNumericBySelectedFiles(dsv.getFileMetadatas(), original); } - + public static Long getDownloadSizeNumericBySelectedFiles(List fileMetadatas, boolean original) { long bytes = 0l; for (FileMetadata fileMetadata : fileMetadatas) { DataFile dataFile = fileMetadata.getDataFile(); - if (original && dataFile.isTabularData()) { + if (original && dataFile.isTabularData()) { bytes += dataFile.getOriginalFileSize() == null ? 0 : dataFile.getOriginalFileSize(); } else { bytes += dataFile.getFilesize(); @@ -538,14 +540,23 @@ public static boolean validateDatasetMetadataExternally(Dataset ds, String execu } + public static License getLicense(DatasetVersion dsv) { + License license = null; + TermsOfUseAndAccess tua = dsv.getTermsOfUseAndAccess(); + if(tua!=null) { + license = tua.getLicense(); + } + return license; + } + public static String getLicenseName(DatasetVersion dsv) { - License license = dsv.getTermsOfUseAndAccess().getLicense(); + License license = DatasetUtil.getLicense(dsv); return license != null ? license.getName() : BundleUtil.getStringFromBundle("license.custom"); } public static String getLicenseURI(DatasetVersion dsv) { - License license = dsv.getTermsOfUseAndAccess().getLicense(); + License license = DatasetUtil.getLicense(dsv); // Return the URI // For standard licenses, just return the stored URI return (license != null) ? license.getUri().toString() @@ -560,12 +571,12 @@ public static String getLicenseURI(DatasetVersion dsv) { } public static String getLicenseIcon(DatasetVersion dsv) { - License license = dsv.getTermsOfUseAndAccess().getLicense(); + License license = DatasetUtil.getLicense(dsv); return license != null && license.getIconUrl() != null ? license.getIconUrl().toString() : null; } public static String getLicenseDescription(DatasetVersion dsv) { - License license = dsv.getTermsOfUseAndAccess().getLicense(); + License license = DatasetUtil.getLicense(dsv); return license != null ? license.getShortDescription() : BundleUtil.getStringFromBundle("license.custom.description"); } diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index b270393e5e1..8e7922fd83b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -19,12 +19,10 @@ import edu.harvard.iq.dataverse.api.Files; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.impl.AbstractCreateDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataFileCommand; import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; @@ -43,7 +41,6 @@ import java.util.Iterator; import java.util.List; import java.util.Objects; -import java.util.ResourceBundle; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; @@ -59,7 +56,6 @@ import javax.ws.rs.core.Response; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; -import org.apache.commons.lang3.StringUtils; import org.apache.commons.io.IOUtils; import org.ocpsoft.common.util.Strings; @@ -619,7 +615,7 @@ private boolean runAddReplacePhase1(Dataset owner, if (!this.step_050_checkForConstraintViolations()){ return false; } - + msgt("step_055_loadOptionalFileParams"); if (!this.step_055_loadOptionalFileParams(optionalFileParams)){ return false; @@ -778,7 +774,7 @@ private boolean runAddReplacePhase2(boolean tabIngest){ } } } - + msgt("step_090_notifyUser"); if (!this.step_090_notifyUser()){ return false; @@ -1379,7 +1375,7 @@ private boolean step_040_auto_checkForDuplicates(){ String fileType = fileToReplace.getOriginalFileFormat() != null ? fileToReplace.getOriginalFileFormat() : fileToReplace.getContentType(); if (!finalFileList.get(0).getContentType().equalsIgnoreCase(fileType)) { String friendlyType = fileToReplace.getOriginalFormatLabel() != null ? fileToReplace.getOriginalFormatLabel() : fileToReplace.getFriendlyType(); - + List errParams = Arrays.asList(friendlyType, finalFileList.get(0).getFriendlyType()); @@ -1519,8 +1515,16 @@ private boolean step_050_checkForConstraintViolations(){ // violations found: gather all error messages // ----------------------------------------------------------- List errMsgs = new ArrayList<>(); - for (ConstraintViolation violation : constraintViolations){ - this.addError(violation.getMessage()); + for (ConstraintViolation violation : constraintViolations) { + /* + for 8859 return conflict response status if the validation fails + due to terms of use/access out of compliance + */ + if (workingVersion.getTermsOfUseAndAccess().getValidationMessage() != null) { + addError(Response.Status.CONFLICT,workingVersion.getTermsOfUseAndAccess().getValidationMessage()); + } else { + this.addError(violation.getMessage()); + } } return this.hasError(); @@ -2049,6 +2053,10 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { String newStorageIdentifier = null; if (optionalFileParams.hasStorageIdentifier()) { newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + newStorageIdentifier = DataAccess.expandStorageIdentifierIfNeeded(newStorageIdentifier); + if(!DataAccess.uploadToDatasetAllowed(dataset, newStorageIdentifier)) { + addErrorSevere("Dataset store configuration does not allow provided storageIdentifier."); + } if (optionalFileParams.hasFileName()) { newFilename = optionalFileParams.getFileName(); if (optionalFileParams.hasMimetype()) { @@ -2057,14 +2065,10 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } msgt("ADD! = " + newFilename); - - runAddFileByDataset(dataset, - newFilename, - newFileContentType, - newStorageIdentifier, - null, - optionalFileParams, true); - + if (!hasError()) { + runAddFileByDataset(dataset, newFilename, newFileContentType, newStorageIdentifier, + null, optionalFileParams, true); + } if (hasError()) { JsonObjectBuilder fileoutput = Json.createObjectBuilder() .add("storageIdentifier", newStorageIdentifier) @@ -2088,8 +2092,8 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); jarr.add(fileoutput); } - } successNumberofFiles = successNumberofFiles + 1; + } } else { JsonObjectBuilder fileoutput = Json.createObjectBuilder() .add("errorMessage", "You must provide a storageidentifier, filename, and mimetype.") diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java index 35687151090..959dbc4e262 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java @@ -16,6 +16,7 @@ import edu.harvard.iq.dataverse.DataFileTag; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.api.Util; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.util.BundleUtil; import java.lang.reflect.Type; @@ -371,8 +372,15 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ // get storage identifier as string // ------------------------------- if ((jsonObj.has(STORAGE_IDENTIFIER_ATTR_NAME)) && (!jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).isJsonNull())){ + // Basic sanity check that driver specified is defined and the overall + // identifier is consistent with that store's config. Note that being able to + // specify a driver that does not support direct uploads is currently used with + // out-of-band uploads, e.g. for bulk migration. + String storageId = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); + if (DataAccess.isValidDirectStorageIdentifier(storageId)) { + this.storageIdentifier = storageId; + } - this.storageIdentifier = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); } // ------------------------------- @@ -396,7 +404,7 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ // ------------------------------- if ((jsonObj.has(LEGACY_CHECKSUM_ATTR_NAME)) && (!jsonObj.get(LEGACY_CHECKSUM_ATTR_NAME).isJsonNull())){ - this.checkSumValue = jsonObj.get(LEGACY_CHECKSUM_ATTR_NAME).getAsString(); + this.checkSumValue = jsonObj.get(LEGACY_CHECKSUM_ATTR_NAME).getAsString().toLowerCase(); this.checkSumType= ChecksumType.MD5; } // ------------------------------- @@ -404,7 +412,7 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ // ------------------------------- else if ((jsonObj.has(CHECKSUM_OBJECT_NAME)) && (!jsonObj.get(CHECKSUM_OBJECT_NAME).isJsonNull())){ - this.checkSumValue = ((JsonObject) jsonObj.get(CHECKSUM_OBJECT_NAME)).get(CHECKSUM_OBJECT_VALUE).getAsString(); + this.checkSumValue = ((JsonObject) jsonObj.get(CHECKSUM_OBJECT_NAME)).get(CHECKSUM_OBJECT_VALUE).getAsString().toLowerCase(); this.checkSumType = ChecksumType.fromString(((JsonObject) jsonObj.get(CHECKSUM_OBJECT_NAME)).get(CHECKSUM_OBJECT_TYPE).getAsString()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java index ec544d9490a..1465cbd74e2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java @@ -102,7 +102,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { } if (theDataset.getStorageIdentifier() == null) { String driverId = theDataset.getEffectiveStorageDriverId(); - theDataset.setStorageIdentifier(driverId + "://" + theDataset.getAuthorityForFileStorage() + "/" + theDataset.getIdentifierForFileStorage()); + theDataset.setStorageIdentifier(driverId + DataAccess.SEPARATOR + theDataset.getAuthorityForFileStorage() + "/" + theDataset.getIdentifierForFileStorage()); } if (theDataset.getIdentifier()==null) { theDataset.setIdentifier(ctxt.datasets().generateDatasetIdentifier(theDataset, idServiceBean)); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java index 02e00e36855..f3b75d23c63 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java @@ -114,6 +114,12 @@ protected void validateOrDie(DatasetVersion dsv, Boolean lenient) throws Command validationMessage += constraintViolations.stream() .filter(cv -> cv.getRootBean() instanceof TermsOfUseAndAccess) .map(cv -> cv.toString()); + + for (ConstraintViolation cv : constraintViolations){ + if (cv.getRootBean() instanceof TermsOfUseAndAccess){ + throw new IllegalCommandException(validationMessage, this); + } + } throw new IllegalCommandException(validationMessage, this); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 4fa0961d134..b988fd05f03 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -1,7 +1,11 @@ package edu.harvard.iq.dataverse.engine.command.impl; +import edu.harvard.iq.dataverse.DOIDataCiteRegisterService; +import edu.harvard.iq.dataverse.DataCitation; +import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -12,9 +16,13 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.bagit.BagGenerator; +import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; -import java.util.Date; +import java.io.IOException; +import java.io.PipedInputStream; +import java.io.PipedOutputStream; +import java.security.DigestInputStream; import java.util.HashMap; import java.util.Map; import java.util.logging.Logger; @@ -24,7 +32,9 @@ public abstract class AbstractSubmitToArchiveCommand extends AbstractCommand requestedSettings = new HashMap(); + protected boolean success=false; private static final Logger logger = Logger.getLogger(AbstractSubmitToArchiveCommand.class.getName()); + private static final int MAX_ZIP_WAIT = 20000; private static final int DEFAULT_THREADS = 2; public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { @@ -86,5 +96,91 @@ public String describe() { return super.describe() + "DatasetVersion: [" + version.getId() + " (v" + version.getFriendlyVersionNumber()+")]"; } + + String getDataCiteXml(DatasetVersion dv) { + DataCitation dc = new DataCitation(dv); + Map metadata = dc.getDataCiteMetadata(); + return DOIDataCiteRegisterService.getMetadataFromDvObject(dv.getDataset().getGlobalId().asString(), metadata, + dv.getDataset()); + } + + public Thread startBagThread(DatasetVersion dv, PipedInputStream in, DigestInputStream digestInputStream2, + String dataciteXml, ApiToken token) throws IOException, InterruptedException { + Thread bagThread = new Thread(new Runnable() { + public void run() { + try (PipedOutputStream out = new PipedOutputStream(in)) { + // Generate bag + BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); + bagger.setNumConnections(getNumberOfBagGeneratorThreads()); + bagger.setAuthenticationKey(token.getTokenString()); + bagger.generateBag(out); + success = true; + } catch (Exception e) { + logger.severe("Error creating bag: " + e.getMessage()); + // TODO Auto-generated catch block + e.printStackTrace(); + try { + digestInputStream2.close(); + } catch (Exception ex) { + logger.warning(ex.getLocalizedMessage()); + } + throw new RuntimeException("Error creating bag: " + e.getMessage()); + } + } + }); + bagThread.start(); + /* + * The following loop handles two issues. First, with no delay, the + * bucket.create() call below can get started before the piped streams are set + * up, causing a failure (seen when triggered in a PostPublishDataset workflow). + * A minimal initial wait, e.g. until some bytes are available, would address + * this. Second, the BagGenerator class, due to it's use of parallel streaming + * creation of the zip file, has the characteristic that it makes a few bytes + * available - from setting up the directory structure for the zip file - + * significantly earlier than it is ready to stream file content (e.g. for + * thousands of files and GB of content). If, for these large datasets, + * the transfer is started as soon as bytes are available, the call can + * timeout before the bytes for all the zipped files are available. To manage + * this, the loop waits until 90K bytes are available, larger than any expected + * dir structure for the zip and implying that the main zipped content is + * available, or until the thread terminates, with all of its content written to + * the pipe. (Note the PipedInputStream buffer is set at 100K above - I didn't + * want to test whether that means that exactly 100K bytes will be available() + * for large datasets or not, so the test below is at 90K.) + * + * An additional sanity check limits the wait to 20K (MAX_ZIP_WAIT) seconds. The BagGenerator + * has been used to archive >120K files, 2K directories, and ~600GB files on the + * SEAD project (streaming content to disk rather than over an internet + * connection) which would take longer than 20K seconds (even 10+ hours) and might + * produce an initial set of bytes for directories > 90K. If Dataverse ever + * needs to support datasets of this size, the numbers here would need to be + * increased, and/or a change in how archives are sent to google (e.g. as + * multiple blobs that get aggregated) would be required. + */ + int i = 0; + while (digestInputStream2.available() <= 90000 && i < MAX_ZIP_WAIT && bagThread.isAlive()) { + Thread.sleep(1000); + logger.fine("avail: " + digestInputStream2.available() + " : " + bagThread.getState().toString()); + i++; + } + logger.fine("Bag: transfer started, i=" + i + ", avail = " + digestInputStream2.available()); + if(i==MAX_ZIP_WAIT) { + throw new IOException("Stream not available"); + } + return bagThread; + } + public static boolean isArchivable(Dataset dataset, SettingsWrapper settingsWrapper) { + return true; + } + + //Check if the chosen archiver imposes single-version-only archiving - in a View context + public static boolean isSingleVersion(SettingsWrapper settingsWrapper) { + return false; + } + + //Check if the chosen archiver imposes single-version-only archiving - in the API + public static boolean isSingleVersion(SettingsServiceBean settingsService) { + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java index dbd2cfa9df8..72439d4ba4a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java @@ -44,9 +44,7 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { throw new IllegalCommandException("Latest version is already a draft. Cannot add another draft", this); } } - - prepareDatasetAndVersion(); - + List newVersionMetadatum = new ArrayList<>(latest.getFileMetadatas().size()); for ( FileMetadata fmd : latest.getFileMetadatas() ) { FileMetadata fmdCopy = fmd.createCopy(); @@ -55,6 +53,12 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { } newVersion.setFileMetadatas(newVersionMetadatum); + //moving prepare Dataset here + //because it includes validation and we need the validation + //to happen after file metdata is added to return a + //good wrapped response if the TOA/Request Access not in compliance + prepareDatasetAndVersion(); + // TODO make async // ctxt.index().indexDataset(dataset); return ctxt.datasets().storeVersion(newVersion); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java new file mode 100644 index 00000000000..89666f02db2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -0,0 +1,381 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.SettingsWrapper; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.workflow.step.Failure; +import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.security.KeyFactory; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.security.interfaces.RSAPrivateKey; +import java.security.spec.InvalidKeySpecException; +import java.security.spec.PKCS8EncodedKeySpec; +import java.time.Instant; +import java.util.Base64; +import java.util.Date; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.logging.Logger; + +import javax.json.Json; +import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; +import javax.json.JsonValue; +import javax.net.ssl.SSLContext; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.TrustAllStrategy; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.ssl.SSLContextBuilder; + +import org.erdtman.jcs.JsonCanonicalizer; + +import com.auth0.jwt.JWT; +import com.auth0.jwt.algorithms.Algorithm; +import com.auth0.jwt.exceptions.JWTCreationException; + +@RequiredPermissions(Permission.PublishDataset) +public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { + + private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); + private static final String DRS_CONFIG = ":DRSArchiverConfig"; + private static final String ADMIN_METADATA = "admin_metadata"; + private static final String S3_BUCKET_NAME = "s3_bucket_name"; + private static final String S3_PATH = "s3_path"; + private static final String COLLECTIONS = "collections"; + private static final String PACKAGE_ID = "package_id"; + private static final String SINGLE_VERSION = "single_version"; + private static final String DRS_ENDPOINT = "DRS_endpoint"; + + + private static final String RSA_KEY = "dataverse.archiver.drs.rsa_key"; + + private static final String TRUST_CERT = "trust_cert"; + private static final String TIMEOUT = "timeout"; + + public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { + super(aRequest, version); + } + + @Override + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, + Map requestedSettings) { + logger.fine("In DRSSubmitToArchiveCommand..."); + JsonObject drsConfigObject = null; + + try { + drsConfigObject = JsonUtil.getJsonObject(requestedSettings.get(DRS_CONFIG)); + } catch (Exception e) { + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); + } + if (drsConfigObject != null) { + JsonObject adminMetadata = drsConfigObject.getJsonObject(ADMIN_METADATA); + Set collections = adminMetadata.getJsonObject(COLLECTIONS).keySet(); + Dataset dataset = dv.getDataset(); + Dataverse ancestor = dataset.getOwner(); + String alias = getArchivableAncestor(ancestor, collections); + String spaceName = getSpaceName(dataset); + String packageId = getFileName(spaceName, dv); + + if (alias != null) { + if (drsConfigObject.getBoolean(SINGLE_VERSION, false)) { + for (DatasetVersion version : dataset.getVersions()) { + if (version.getArchivalCopyLocation() != null) { + return new Failure("DRS Archiver fail: version " + version.getFriendlyVersionNumber() + + " already archived."); + } + } + } + + JsonObject collectionConfig = adminMetadata.getJsonObject(COLLECTIONS).getJsonObject(alias); + + WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); + + JsonObjectBuilder statusObject = Json.createObjectBuilder(); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); + + if (s3Result == WorkflowStepResult.OK) { + //This will be overwritten if the further steps are successful + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag transferred, DRS ingest call failed"); + + // Now contact DRS + boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); + int jwtTimeout = drsConfigObject.getInt(TIMEOUT, 5); + JsonObjectBuilder job = Json.createObjectBuilder(); + + job.add(S3_BUCKET_NAME, adminMetadata.getString(S3_BUCKET_NAME)); + + job.add(PACKAGE_ID, packageId); + job.add(S3_PATH, spaceName); + + // We start with the default admin_metadata + JsonObjectBuilder amob = Json.createObjectBuilder(adminMetadata); + // Remove collections and then override any params for the given alias + amob.remove(COLLECTIONS); + // Allow override of bucket name + if (collectionConfig.containsKey(S3_BUCKET_NAME)) { + job.add(S3_BUCKET_NAME, collectionConfig.get(S3_BUCKET_NAME)); + } + + for (Entry entry : collectionConfig.entrySet()) { + if (!entry.getKey().equals(S3_BUCKET_NAME)) { + amob.add(entry.getKey(), entry.getValue()); + } + } + job.add(ADMIN_METADATA, amob); + + String drsConfigString = JsonUtil.prettyPrint(job.build()); + + CloseableHttpClient client = null; + if (trustCert) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + try { + SSLContext sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()) + .build(); + client = HttpClients.custom().setSSLContext(sslContext) + .setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE).build(); + } catch (KeyManagementException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (NoSuchAlgorithmException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (KeyStoreException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + if (client == null) { + client = HttpClients.createDefault(); + } + HttpPost ingestPost; + try { + ingestPost = new HttpPost(); + ingestPost.setURI(new URI(drsConfigObject.getString(DRS_ENDPOINT))); + + byte[] encoded = Base64.getDecoder().decode(System.getProperty(RSA_KEY).replaceAll("[\\r\\n]", "")); + + KeyFactory keyFactory = KeyFactory.getInstance("RSA"); + PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(encoded); + RSAPrivateKey privKey = (RSAPrivateKey) keyFactory.generatePrivate(keySpec); + //RSAPublicKey publicKey; + /* + * If public key is needed: encoded = Base64.decodeBase64(publicKeyPEM); + * + * KeyFactory keyFactory = KeyFactory.getInstance("RS256"); X509EncodedKeySpec + * keySpec = new X509EncodedKeySpec(encoded); return (RSAPublicKey) + * keyFactory.generatePublic(keySpec); RSAPublicKey publicKey = new + * RSAPublicKey(System.getProperty(RS256_KEY)); + */ + Algorithm algorithmRSA = Algorithm.RSA256(null, privKey); + + String body = drsConfigString; + String jwtString = createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), body, jwtTimeout); + logger.fine("JWT: " + jwtString); + + ingestPost.setHeader("Authorization", "Bearer " + jwtString); + + logger.fine("Body: " + body); + ingestPost.setEntity(new StringEntity(body, "utf-8")); + ingestPost.setHeader("Content-Type", "application/json"); + + try (CloseableHttpResponse response = client.execute(ingestPost)) { + int code = response.getStatusLine().getStatusCode(); + String responseBody = new String(response.getEntity().getContent().readAllBytes(), + StandardCharsets.UTF_8); + if (code == 202) { + logger.fine("Status: " + code); + logger.fine("Response" + responseBody); + JsonObject responseObject = JsonUtil.getJsonObject(responseBody); + if (responseObject.containsKey(DatasetVersion.ARCHIVAL_STATUS) + && responseObject.containsKey(DatasetVersion.ARCHIVAL_STATUS_MESSAGE)) { + String status = responseObject.getString(DatasetVersion.ARCHIVAL_STATUS); + if (status.equals(DatasetVersion.ARCHIVAL_STATUS_PENDING) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE) + || status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { + statusObject.addAll(Json.createObjectBuilder(responseObject)); + switch (status) { + case DatasetVersion.ARCHIVAL_STATUS_PENDING: + logger.info("DRS Ingest successfully started for: " + packageId + " : " + + responseObject.toString()); + break; + case DatasetVersion.ARCHIVAL_STATUS_FAILURE: + logger.severe("DRS Ingest Failed for: " + packageId + " : " + + responseObject.toString()); + return new Failure("DRS Archiver fail in Ingest call"); + case DatasetVersion.ARCHIVAL_STATUS_SUCCESS: + // We don't expect this from DRS + logger.warning("Unexpected Status: " + status); + } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + " with returned status: " + + status); + return new Failure( + "DRS Archiver fail in Ingest call with returned status: " + status); + } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + + " - response does not include status and message"); + return new Failure( + "DRS Archiver fail in Ingest call \" - response does not include status and message"); + } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); + logger.fine("Response" + responseBody); + return new Failure("DRS Archiver fail in Ingest call with status code: " + code); + } + } catch (ClientProtocolException e2) { + e2.printStackTrace(); + } catch (IOException e2) { + e2.printStackTrace(); + } + } catch (URISyntaxException e) { + return new Failure( + "DRS Archiver workflow step failed: unable to parse " + DRS_ENDPOINT ); + } catch (JWTCreationException exception) { + // Invalid Signing configuration / Couldn't convert Claims. + return new Failure( + "DRS Archiver JWT Creation failure: " + exception.getMessage() ); + + } + // execute + catch (InvalidKeySpecException e) { + e.printStackTrace(); + } catch (NoSuchAlgorithmException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } finally { + //Set status after success or failure + dv.setArchivalCopyLocation(statusObject.build().toString()); + } + } else { + logger.warning("DRS: S3 archiving failed - will not call ingest: " + packageId); + dv.setArchivalCopyLocation(statusObject.build().toString()); + return new Failure("DRS Archiver fail in initial S3 Archiver transfer"); + } + + } else { + logger.fine("DRS Archiver: No matching collection found - will not archive: " + packageId); + return WorkflowStepResult.OK; + } + } else { + logger.warning(DRS_CONFIG + " not found"); + return new Failure("DRS Submission not configured - no " + DRS_CONFIG + " found."); + } + return WorkflowStepResult.OK; + } + + @Override + protected String getFileName(String spaceName, DatasetVersion dv) { + return spaceName + (".v" + dv.getFriendlyVersionNumber()).replace('.', '_'); + } + + @Override + protected String getDataCiteFileName(String spaceName, DatasetVersion dv) { + return spaceName + ("_datacite.v" + dv.getFriendlyVersionNumber()).replace('.','_'); + } + + + public static String createJWTString(Algorithm algorithmRSA, String installationBrandName, String body, int expirationInMinutes) throws IOException { + String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); + logger.fine("Canonical body: " + canonicalBody); + String digest = DigestUtils.sha256Hex(canonicalBody); + return JWT.create().withIssuer(BrandingUtil.getInstallationBrandName()).withIssuedAt(Date.from(Instant.now())) + .withExpiresAt(Date.from(Instant.now().plusSeconds(60 * expirationInMinutes))) + .withKeyId("defaultDataverse").withClaim("bodySHA256Hash", digest).sign(algorithmRSA); + } + + private static String getArchivableAncestor(Dataverse ancestor, Set collections) { + String alias = ancestor.getAlias(); + while (ancestor != null && !collections.contains(alias)) { + ancestor = ancestor.getOwner(); + if (ancestor != null) { + alias = ancestor.getAlias(); + } else { + alias = null; + } + } + return alias; + } + + //Overrides inherited method to also check whether the dataset is in a collection for which the DRS Archiver is configured + public static boolean isArchivable(Dataset d, SettingsWrapper sw) { + JsonObject drsConfigObject = null; + + try { + String config = sw.get(DRS_CONFIG, null); + if (config != null) { + drsConfigObject = JsonUtil.getJsonObject(config); + } + } catch (Exception e) { + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); + } + if (drsConfigObject != null) { + JsonObject adminMetadata = drsConfigObject.getJsonObject(ADMIN_METADATA); + if (adminMetadata != null) { + JsonObject collectionObj = adminMetadata.getJsonObject(COLLECTIONS); + if (collectionObj != null) { + Set collections = collectionObj.keySet(); + return getArchivableAncestor(d.getOwner(), collections) != null; + } + } + } + return false; + } + + // DRS Archiver supports single-version semantics if the SINGLE_VERSION key in + // the DRS_CONFIG is true + // These methods make that choices visible on the page (cached via + // SettingsWrapper) or in the API (using SettingServiceBean), both using the + // same underlying logic + + public static boolean isSingleVersion(SettingsWrapper sw) { + String config = sw.get(DRS_CONFIG, null); + return isSingleVersion(config); + } + + public static boolean isSingleVersion(SettingsServiceBean ss) { + String config = ss.get(DRS_CONFIG, null); + return isSingleVersion(config); + } + + private static boolean isSingleVersion(String config) { + JsonObject drsConfigObject = null; + try { + if (config != null) { + drsConfigObject = JsonUtil.getJsonObject(config); + } + } catch (Exception e) { + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); + } + if (drsConfigObject != null) { + return drsConfigObject.getBoolean(SINGLE_VERSION, false); + } + return false; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index f30183663e6..2ca73af3b3c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -1,7 +1,5 @@ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.DOIDataCiteRegisterService; -import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetLock.Reason; @@ -10,8 +8,6 @@ import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; -import edu.harvard.iq.dataverse.util.bagit.BagGenerator; -import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; @@ -25,6 +21,9 @@ import java.util.Map; import java.util.logging.Logger; +import javax.json.Json; +import javax.json.JsonObjectBuilder; + import org.apache.commons.codec.binary.Hex; import org.duracloud.client.ContentStore; import org.duracloud.client.ContentStoreManager; @@ -48,13 +47,21 @@ public DuraCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, + Map requestedSettings) { - String port = requestedSettings.get(DURACLOUD_PORT) != null ? requestedSettings.get(DURACLOUD_PORT) : DEFAULT_PORT; - String dpnContext = requestedSettings.get(DURACLOUD_CONTEXT) != null ? requestedSettings.get(DURACLOUD_CONTEXT) : DEFAULT_CONTEXT; + String port = requestedSettings.get(DURACLOUD_PORT) != null ? requestedSettings.get(DURACLOUD_PORT) + : DEFAULT_PORT; + String dpnContext = requestedSettings.get(DURACLOUD_CONTEXT) != null ? requestedSettings.get(DURACLOUD_CONTEXT) + : DEFAULT_CONTEXT; String host = requestedSettings.get(DURACLOUD_HOST); + if (host != null) { Dataset dataset = dv.getDataset(); + // ToDo - change after HDC 3A changes to status reporting + // This will make the archivalCopyLocation non-null after a failure which should + // stop retries + if (dataset.getLockFor(Reason.finalizePublication) == null && dataset.getLockFor(Reason.FileValidationFailed) == null) { // Use Duracloud client classes to login @@ -62,11 +69,31 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Credential credential = new Credential(System.getProperty("duracloud.username"), System.getProperty("duracloud.password")); storeManager.login(credential); - - String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase(); + /* + * Aliases can contain upper case characters which are not allowed in space + * names. Similarly, aliases can contain '_' which isn't allowed in a space + * name. The line below replaces any upper case chars with lowercase and + * replaces any '_' with '.-' . The '-' after the dot assures we don't break the + * rule that + * "The last period in a aspace may not immediately be followed by a number". + * (Although we could check, it seems better to just add '.-' all the time.As + * written the replaceAll will also change any chars not valid in a spaceName to + * '.' which would avoid code breaking if the alias constraints change. That + * said, this line may map more than one alias to the same spaceName, e.g. + * "test" and "Test" aliases both map to the "test" space name. This does not + * break anything but does potentially put bags from more than one collection in + * the same space. + */ + String spaceName = dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".dcsafe"); + String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase() + "_v" + dv.getFriendlyVersionNumber(); ContentStore store; + //Set a failure status that will be updated if we succeed + JsonObjectBuilder statusObject = Json.createObjectBuilder(); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); + try { /* * If there is a failure in creating a space, it is likely that a prior version @@ -76,88 +103,85 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t */ store = storeManager.getPrimaryContentStore(); // Create space to copy archival files to - store.createSpace(spaceName); - DataCitation dc = new DataCitation(dv); - Map metadata = dc.getDataCiteMetadata(); - String dataciteXml = DOIDataCiteRegisterService.getMetadataFromDvObject( - dv.getDataset().getGlobalId().asString(), metadata, dv.getDataset()); + if (!store.spaceExists(spaceName)) { + store.createSpace(spaceName); + } + String dataciteXml = getDataCiteXml(dv); MessageDigest messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream dataciteIn = new PipedInputStream(); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + try (PipedInputStream dataciteIn = new PipedInputStream(); + DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { // Add datacite.xml file - new Thread(new Runnable() { + Thread dcThread = new Thread(new Runnable() { public void run() { try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { dataciteOut.write(dataciteXml.getBytes(Charset.forName("utf-8"))); dataciteOut.close(); + success=true; } catch (Exception e) { logger.severe("Error creating datacite.xml: " + e.getMessage()); // TODO Auto-generated catch block e.printStackTrace(); - throw new RuntimeException("Error creating datacite.xml: " + e.getMessage()); } } - }).start(); - //Have seen Pipe Closed errors for other archivers when used as a workflow without this delay loop - int i=0; - while(digestInputStream.available()<=0 && i<100) { + }); + dcThread.start(); + // Have seen Pipe Closed errors for other archivers when used as a workflow + // without this delay loop + int i = 0; + while (digestInputStream.available() <= 0 && i < 100) { Thread.sleep(10); i++; } - String checksum = store.addContent(spaceName, "datacite.xml", digestInputStream, -1l, null, null, - null); + String checksum = store.addContent(spaceName, baseFileName + "_datacite.xml", digestInputStream, + -1l, null, null, null); logger.fine("Content: datacite.xml added with checksum: " + checksum); + dcThread.join(); String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); - if (!checksum.equals(localchecksum)) { - logger.severe(checksum + " not equal to " + localchecksum); + if (!success || !checksum.equals(localchecksum)) { + logger.severe("Failure on " + baseFileName); + logger.severe(success ? checksum + " not equal to " + localchecksum : "failed to transfer to DuraCloud"); + try { + store.deleteContent(spaceName, baseFileName + "_datacite.xml"); + } catch (ContentStoreException cse) { + logger.warning(cse.getMessage()); + } return new Failure("Error in transferring DataCite.xml file to DuraCloud", "DuraCloud Submission Failure: incomplete metadata transfer"); } // Store BagIt file - String fileName = spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; + success = false; + String fileName = baseFileName + ".zip"; // Add BagIt ZIP file // Although DuraCloud uses SHA-256 internally, it's API uses MD5 to verify the // transfer + messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream in = new PipedInputStream(); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { - new Thread(new Runnable() { - public void run() { - try (PipedOutputStream out = new PipedOutputStream(in)){ - // Generate bag - BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); - bagger.setNumConnections(getNumberOfBagGeneratorThreads()); - bagger.setAuthenticationKey(token.getTokenString()); - bagger.generateBag(out); - } catch (Exception e) { - logger.severe("Error creating bag: " + e.getMessage()); - // TODO Auto-generated catch block - e.printStackTrace(); - throw new RuntimeException("Error creating bag: " + e.getMessage()); - } - } - }).start(); - i=0; - while(digestInputStream.available()<=0 && i<100) { - Thread.sleep(10); - i++; + try (PipedInputStream in = new PipedInputStream(100000); + DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { + Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, token); + checksum = store.addContent(spaceName, fileName, digestInputStream2, -1l, null, null, null); + bagThread.join(); + if (success) { + logger.fine("Content: " + fileName + " added with checksum: " + checksum); + localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); } - checksum = store.addContent(spaceName, fileName, digestInputStream2, -1l, null, null, - null); - logger.fine("Content: " + fileName + " added with checksum: " + checksum); - localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); - if (!checksum.equals(localchecksum)) { - logger.severe(checksum + " not equal to " + localchecksum); + if (!success || !checksum.equals(localchecksum)) { + logger.severe("Failure on " + fileName); + logger.severe(success ? checksum + " not equal to " + localchecksum : "failed to transfer to DuraCloud"); + try { + store.deleteContent(spaceName, fileName); + store.deleteContent(spaceName, baseFileName + "_datacite.xml"); + } catch (ContentStoreException cse) { + logger.warning(cse.getMessage()); + } return new Failure("Error in transferring Zip file to DuraCloud", "DuraCloud Submission Failure: incomplete archive transfer"); } - } catch (RuntimeException rte) { - logger.severe(rte.getMessage()); - return new Failure("Error in generating Bag", - "DuraCloud Submission Failure: archive file not created"); } logger.fine("DuraCloud Submission step: Content Transferred"); @@ -173,7 +197,9 @@ public void run() { sb.append("/duradmin/spaces/sm/"); sb.append(store.getStoreId()); sb.append("/" + spaceName + "/" + fileName); - dv.setArchivalCopyLocation(sb.toString()); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, sb.toString()); + logger.fine("DuraCloud Submission step complete: " + sb.toString()); } catch (ContentStoreException | IOException e) { // TODO Auto-generated catch block @@ -181,10 +207,6 @@ public void run() { e.printStackTrace(); return new Failure("Error in transferring file to DuraCloud", "DuraCloud Submission Failure: archive file not transferred"); - } catch (RuntimeException rte) { - logger.severe(rte.getMessage()); - return new Failure("Error in generating datacite.xml file", - "DuraCloud Submission Failure: metadata file not created"); } catch (InterruptedException e) { logger.warning(e.getLocalizedMessage()); e.printStackTrace(); @@ -196,12 +218,16 @@ public void run() { if (!(1 == dv.getVersion()) || !(0 == dv.getMinorVersionNumber())) { mesg = mesg + ": Prior Version archiving not yet complete?"; } - return new Failure("Unable to create DuraCloud space with name: " + spaceName, mesg); + return new Failure("Unable to create DuraCloud space with name: " + baseFileName, mesg); } catch (NoSuchAlgorithmException e) { logger.severe("MD5 MessageDigest not available!"); } + finally { + dv.setArchivalCopyLocation(statusObject.build().toString()); + } } else { - logger.warning("DuraCloud Submision Workflow aborted: Dataset locked for finalizePublication, or because file validation failed"); + logger.warning( + "DuraCloud Submision Workflow aborted: Dataset locked for finalizePublication, or because file validation failed"); return new Failure("Dataset locked"); } return WorkflowStepResult.OK; @@ -209,5 +235,4 @@ public void run() { return new Failure("DuraCloud Submission not configured - no \":DuraCloudHost\"."); } } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 52b7e1c5376..12bb3fb6a0a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -37,6 +37,9 @@ import java.util.concurrent.Future; import org.apache.solr.client.solrj.SolrServerException; +import javax.ejb.EJB; +import javax.inject.Inject; + /** * @@ -48,7 +51,9 @@ public class FinalizeDatasetPublicationCommand extends AbstractPublishDatasetCommand { private static final Logger logger = Logger.getLogger(FinalizeDatasetPublicationCommand.class.getName()); - + + + /** * mirror field from {@link PublishDatasetCommand} of same name */ @@ -256,30 +261,23 @@ public boolean onSuccess(CommandContext ctxt, Object r) { } } - exportMetadata(dataset); - - ctxt.datasets().updateLastExportTimeStamp(dataset.getId()); - - return retVal; - } - - /** - * Attempting to run metadata export, for all the formats for which we have - * metadata Exporters. - */ - private void exportMetadata(Dataset dataset) { - + // Metadata export: + try { ExportService instance = ExportService.getInstance(); instance.exportAllFormats(dataset); - + dataset = ctxt.datasets().merge(dataset); } catch (Exception ex) { // Something went wrong! // Just like with indexing, a failure to export is not a fatal // condition. We'll just log the error as a warning and keep // going: - logger.log(Level.WARNING, "Dataset publication finalization: exception while exporting:{0}", ex.getMessage()); - } + logger.warning("Finalization: exception caught while exporting: "+ex.getMessage()); + // ... but it is important to only update the export time stamp if the + // export was indeed successful. + } + + return retVal; } /** diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index af4c960c2d6..5d017173685 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -1,7 +1,5 @@ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.DOIDataCiteRegisterService; -import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetLock.Reason; @@ -10,29 +8,28 @@ import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; -import edu.harvard.iq.dataverse.util.bagit.BagGenerator; -import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; -import java.io.BufferedInputStream; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.PipedInputStream; import java.io.PipedOutputStream; import java.nio.charset.Charset; import java.security.DigestInputStream; import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; import java.util.Map; import java.util.logging.Logger; +import javax.json.Json; +import javax.json.JsonObjectBuilder; + import org.apache.commons.codec.binary.Hex; import com.google.auth.oauth2.ServiceAccountCredentials; import com.google.cloud.storage.Blob; import com.google.cloud.storage.Bucket; import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageException; import com.google.cloud.storage.StorageOptions; @RequiredPermissions(Permission.PublishDataset) @@ -54,8 +51,13 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t logger.fine("Project: " + projectName + " Bucket: " + bucketName); if (bucketName != null && projectName != null) { Storage storage; + //Set a failure status that will be updated if we succeed + JsonObjectBuilder statusObject = Json.createObjectBuilder(); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); + try { - FileInputStream fis = new FileInputStream(System.getProperty("dataverse.files.directory") + System.getProperty("file.separator")+ "googlecloudkey.json"); + FileInputStream fis = new FileInputStream(System.getProperty("dataverse.files.directory") + System.getProperty("file.separator") + "googlecloudkey.json"); storage = StorageOptions.newBuilder() .setCredentials(ServiceAccountCredentials.fromStream(fis)) .setProjectId(projectName) @@ -69,145 +71,98 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); - DataCitation dc = new DataCitation(dv); - Map metadata = dc.getDataCiteMetadata(); - String dataciteXml = DOIDataCiteRegisterService.getMetadataFromDvObject( - dv.getDataset().getGlobalId().asString(), metadata, dv.getDataset()); - String blobIdString = null; + String dataciteXml = getDataCiteXml(dv); MessageDigest messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream dataciteIn = new PipedInputStream(); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + try (PipedInputStream dataciteIn = new PipedInputStream(); + DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { // Add datacite.xml file - new Thread(new Runnable() { + Thread dcThread = new Thread(new Runnable() { public void run() { try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { dataciteOut.write(dataciteXml.getBytes(Charset.forName("utf-8"))); dataciteOut.close(); + success = true; } catch (Exception e) { logger.severe("Error creating datacite.xml: " + e.getMessage()); // TODO Auto-generated catch block e.printStackTrace(); - throw new RuntimeException("Error creating datacite.xml: " + e.getMessage()); + // throw new RuntimeException("Error creating datacite.xml: " + e.getMessage()); } } - }).start(); - //Have seen broken pipe in PostPublishDataset workflow without this delay - int i=0; - while(digestInputStream.available()<=0 && i<100) { + }); + dcThread.start(); + // Have seen Pipe Closed errors for other archivers when used as a workflow + // without this delay loop + int i = 0; + while (digestInputStream.available() <= 0 && i < 100) { Thread.sleep(10); i++; } - Blob dcXml = bucket.create(spaceName + "/datacite.v" + dv.getFriendlyVersionNumber()+".xml", digestInputStream, "text/xml", Bucket.BlobWriteOption.doesNotExist()); + Blob dcXml = bucket.create(spaceName + "/datacite.v" + dv.getFriendlyVersionNumber() + ".xml", digestInputStream, "text/xml", Bucket.BlobWriteOption.doesNotExist()); + + dcThread.join(); String checksum = dcXml.getMd5ToHexString(); logger.fine("Content: datacite.xml added with checksum: " + checksum); String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); - if (!checksum.equals(localchecksum)) { - logger.severe(checksum + " not equal to " + localchecksum); + if (!success || !checksum.equals(localchecksum)) { + logger.severe("Failure on " + spaceName); + logger.severe(success ? checksum + " not equal to " + localchecksum : "datacite.xml transfer did not succeed"); + try { + dcXml.delete(Blob.BlobSourceOption.generationMatch()); + } catch (StorageException se) { + logger.warning(se.getMessage()); + } return new Failure("Error in transferring DataCite.xml file to GoogleCloud", "GoogleCloud Submission Failure: incomplete metadata transfer"); } // Store BagIt file + success = false; String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; // Add BagIt ZIP file // Google uses MD5 as one way to verify the // transfer messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream in = new PipedInputStream(100000); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest);) { - Thread writeThread = new Thread(new Runnable() { - public void run() { - try (PipedOutputStream out = new PipedOutputStream(in)) { - // Generate bag - BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); - bagger.setNumConnections(getNumberOfBagGeneratorThreads()); - bagger.setAuthenticationKey(token.getTokenString()); - bagger.generateBag(out); - } catch (Exception e) { - logger.severe("Error creating bag: " + e.getMessage()); - // TODO Auto-generated catch block - e.printStackTrace(); - try { - digestInputStream2.close(); - } catch(Exception ex) { - logger.warning(ex.getLocalizedMessage()); - } - throw new RuntimeException("Error creating bag: " + e.getMessage()); - } - } - }); - writeThread.start(); - /* - * The following loop handles two issues. First, with no delay, the - * bucket.create() call below can get started before the piped streams are set - * up, causing a failure (seen when triggered in a PostPublishDataset workflow). - * A minimal initial wait, e.g. until some bytes are available, would address - * this. Second, the BagGenerator class, due to it's use of parallel streaming - * creation of the zip file, has the characteristic that it makes a few bytes - * available - from setting up the directory structure for the zip file - - * significantly earlier than it is ready to stream file content (e.g. for - * thousands of files and GB of content). If, for these large datasets, - * bucket.create() is called as soon as bytes are available, the call can - * timeout before the bytes for all the zipped files are available. To manage - * this, the loop waits until 90K bytes are available, larger than any expected - * dir structure for the zip and implying that the main zipped content is - * available, or until the thread terminates, with all of its content written to - * the pipe. (Note the PipedInputStream buffer is set at 100K above - I didn't - * want to test whether that means that exactly 100K bytes will be available() - * for large datasets or not, so the test below is at 90K.) - * - * An additional sanity check limits the wait to 2K seconds. The BagGenerator - * has been used to archive >120K files, 2K directories, and ~600GB files on the - * SEAD project (streaming content to disk rather than over an internet - * connection) which would take longer than 2K seconds (10+ hours) and might - * produce an initial set of bytes for directories > 90K. If Dataverse ever - * needs to support datasets of this size, the numbers here would need to be - * increased, and/or a change in how archives are sent to google (e.g. as - * multiple blobs that get aggregated) would be required. - */ - i=0; - while(digestInputStream2.available()<=90000 && i<2000 && writeThread.isAlive()) { - Thread.sleep(1000); - logger.fine("avail: " + digestInputStream2.available() + " : " + writeThread.getState().toString()); - i++; - } - logger.fine("Bag: transfer started, i=" + i + ", avail = " + digestInputStream2.available()); - if(i==2000) { - throw new IOException("Stream not available"); - } - Blob bag = bucket.create(spaceName + "/" + fileName, digestInputStream2, "application/zip", Bucket.BlobWriteOption.doesNotExist()); - if(bag.getSize()==0) { + try (PipedInputStream in = new PipedInputStream(100000); + DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { + Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, token); + Blob bag = bucket.create(spaceName + "/" + fileName, digestInputStream2, "application/zip", + Bucket.BlobWriteOption.doesNotExist()); + if (bag.getSize() == 0) { throw new IOException("Empty Bag"); } - blobIdString = bag.getBlobId().getBucket() + "/" + bag.getBlobId().getName(); + bagThread.join(); + checksum = bag.getMd5ToHexString(); logger.fine("Bag: " + fileName + " added with checksum: " + checksum); localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); - if (!checksum.equals(localchecksum)) { - logger.severe(checksum + " not equal to " + localchecksum); + if (!success || !checksum.equals(localchecksum)) { + logger.severe(success ? checksum + " not equal to " + localchecksum + : "bag transfer did not succeed"); + try { + bag.delete(Blob.BlobSourceOption.generationMatch()); + } catch (StorageException se) { + logger.warning(se.getMessage()); + } return new Failure("Error in transferring Zip file to GoogleCloud", "GoogleCloud Submission Failure: incomplete archive transfer"); } - } catch (RuntimeException rte) { - logger.severe("Error creating Bag during GoogleCloud archiving: " + rte.getMessage()); - return new Failure("Error in generating Bag", - "GoogleCloud Submission Failure: archive file not created"); } logger.fine("GoogleCloud Submission step: Content Transferred"); // Document the location of dataset archival copy location (actually the URL - // where you can - // view it as an admin) + // where you can view it as an admin) + // Changed to point at bucket where the zip and datacite.xml are visible StringBuffer sb = new StringBuffer("https://console.cloud.google.com/storage/browser/"); - sb.append(blobIdString); - dv.setArchivalCopyLocation(sb.toString()); - } catch (RuntimeException rte) { - logger.severe("Error creating datacite xml file during GoogleCloud Archiving: " + rte.getMessage()); - return new Failure("Error in generating datacite.xml file", - "GoogleCloud Submission Failure: metadata file not created"); + sb.append(bucketName + "/" + spaceName); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, sb.toString()); + } } else { logger.warning("GoogleCloud Submision Workflow aborted: Dataset locked for pidRegister"); @@ -219,6 +174,8 @@ public void run() { return new Failure("GoogleCloud Submission Failure", e.getLocalizedMessage() + ": check log for details"); + } finally { + dv.setArchivalCopyLocation(statusObject.build().toString()); } return WorkflowStepResult.OK; } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListMetadataBlockFacetsCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListMetadataBlockFacetsCommand.java new file mode 100644 index 00000000000..abc444dc538 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListMetadataBlockFacetsCommand.java @@ -0,0 +1,40 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DataverseMetadataBlockFacet; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * + * @author adaybujeda + */ +public class ListMetadataBlockFacetsCommand extends AbstractCommand> { + + private final Dataverse dv; + + public ListMetadataBlockFacetsCommand(DataverseRequest aRequest, Dataverse aDataverse) { + super(aRequest, aDataverse); + dv = aDataverse; + } + + @Override + public List execute(CommandContext ctxt) throws CommandException { + return dv.getMetadataBlockFacets(); + } + + @Override + public Map> getRequiredPermissions() { + return Collections.singletonMap("", + dv.isReleased() ? Collections.emptySet() + : Collections.singleton(Permission.ViewUnpublishedDataverse)); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java index b336d9a77f9..c7e91b2967b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java @@ -1,7 +1,5 @@ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.DOIDataCiteRegisterService; -import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetLock.Reason; @@ -19,6 +17,9 @@ import java.util.Map; import java.util.logging.Logger; +import javax.json.Json; +import javax.json.JsonObjectBuilder; + import java.io.File; import java.io.FileOutputStream; @@ -39,6 +40,12 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t logger.fine("In LocalCloudSubmitToArchive..."); String localPath = requestedSettings.get(":BagItLocalPath"); String zipName = null; + + //Set a failure status that will be updated if we succeed + JsonObjectBuilder statusObject = Json.createObjectBuilder(); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); + try { Dataset dataset = dv.getDataset(); @@ -49,11 +56,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); - DataCitation dc = new DataCitation(dv); - Map metadata = dc.getDataCiteMetadata(); - String dataciteXml = DOIDataCiteRegisterService - .getMetadataFromDvObject(dv.getDataset().getGlobalId().asString(), metadata, dv.getDataset()); - + String dataciteXml = getDataCiteXml(dv); + FileUtils.writeStringToFile( new File(localPath + "/" + spaceName + "-datacite.v" + dv.getFriendlyVersionNumber() + ".xml"), dataciteXml, StandardCharsets.UTF_8); @@ -61,6 +65,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t bagger.setNumConnections(getNumberOfBagGeneratorThreads()); bagger.setAuthenticationKey(token.getTokenString()); zipName = localPath + "/" + spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; + //ToDo: generateBag(File f, true) seems to do the same thing (with a .tmp extension) - since we don't have to use a stream here, could probably just reuse the existing code? bagger.generateBag(new FileOutputStream(zipName + ".partial")); File srcFile = new File(zipName + ".partial"); @@ -68,7 +73,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t if (srcFile.renameTo(destFile)) { logger.fine("Localhost Submission step: Content Transferred"); - dv.setArchivalCopyLocation("file://" + zipName); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "file://" + zipName); } else { logger.warning("Unable to move " + zipName + ".partial to " + zipName); } @@ -80,7 +86,10 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } catch (Exception e) { logger.warning("Failed to archive " + zipName + " : " + e.getLocalizedMessage()); e.printStackTrace(); + } finally { + dv.setArchivalCopyLocation(statusObject.build().toString()); } + return WorkflowStepResult.OK; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java index 8eeca0cb4cd..286b107a5fd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java @@ -12,7 +12,8 @@ import edu.harvard.iq.dataverse.export.ExportException; import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.util.EjbUtil; -import edu.harvard.iq.dataverse.util.FileTypeDetection; +import edu.harvard.iq.dataverse.util.FileUtil; + import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -62,7 +63,7 @@ public DataFile execute(CommandContext ctxt) throws CommandException { } logger.fine("target file: " + localFile); - String newlyDetectedContentType = FileTypeDetection.determineFileType(localFile); + String newlyDetectedContentType = FileUtil.determineFileType(localFile, fileToRedetect.getDisplayName()); fileToRedetect.setContentType(newlyDetectedContentType); } catch (IOException ex) { throw new CommandException("Exception while attempting to get the bytes of the file during file type redetection: " + ex.getLocalizedMessage(), this); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java new file mode 100644 index 00000000000..f24d956e9d7 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -0,0 +1,269 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.util.bagit.BagGenerator; +import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.workflow.step.Failure; +import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.util.Map; +import java.util.logging.Logger; + +import javax.json.Json; +import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; + +import org.eclipse.microprofile.config.Config; +import org.eclipse.microprofile.config.ConfigProvider; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSCredentialsProviderChain; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.profile.ProfileCredentialsProvider; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PutObjectRequest; +import com.amazonaws.services.s3.transfer.TransferManager; +import com.amazonaws.services.s3.transfer.TransferManagerBuilder; + +@RequiredPermissions(Permission.PublishDataset) +public class S3SubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { + + private static final Logger logger = Logger.getLogger(S3SubmitToArchiveCommand.class.getName()); + private static final String S3_CONFIG = ":S3ArchiverConfig"; + + private static final Config config = ConfigProvider.getConfig(); + protected AmazonS3 s3 = null; + protected TransferManager tm = null; + private String spaceName = null; + protected String bucketName = null; + + public S3SubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { + super(aRequest, version); + } + + @Override + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, + Map requestedSettings) { + logger.fine("In S3SubmitToArchiveCommand..."); + JsonObject configObject = null; + + try { + configObject = JsonUtil.getJsonObject(requestedSettings.get(S3_CONFIG)); + logger.fine("Config: " + configObject); + bucketName = configObject.getString("s3_bucket_name", null); + } catch (Exception e) { + logger.warning("Unable to parse " + S3_CONFIG + " setting as a Json object"); + } + if (configObject != null && bucketName != null) { + + s3 = createClient(configObject); + tm = TransferManagerBuilder.standard().withS3Client(s3).build(); + + //Set a failure status that will be updated if we succeed + JsonObjectBuilder statusObject = Json.createObjectBuilder(); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); + + try { + + Dataset dataset = dv.getDataset(); + if (dataset.getLockFor(Reason.finalizePublication) == null) { + + spaceName = getSpaceName(dataset); + String dataciteXml = getDataCiteXml(dv); + try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8"))) { + // Add datacite.xml file + ObjectMetadata om = new ObjectMetadata(); + om.setContentLength(dataciteIn.available()); + String dcKey = spaceName + "/" + getDataCiteFileName(spaceName, dv) + ".xml"; + tm.upload(new PutObjectRequest(bucketName, dcKey, dataciteIn, om)).waitForCompletion(); + om = s3.getObjectMetadata(bucketName, dcKey); + if (om == null) { + logger.warning("Could not write datacite xml to S3"); + return new Failure("S3 Archiver failed writing datacite xml file"); + } + + // Store BagIt file + String fileName = getFileName(spaceName, dv); + + String bagKey = spaceName + "/" + fileName + ".zip"; + // Add BagIt ZIP file + // Google uses MD5 as one way to verify the + // transfer + + // Generate bag + BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); + bagger.setAuthenticationKey(token.getTokenString()); + if (bagger.generateBag(fileName, false)) { + File bagFile = bagger.getBagFile(fileName); + + try (FileInputStream in = new FileInputStream(bagFile)) { + om = new ObjectMetadata(); + om.setContentLength(bagFile.length()); + + tm.upload(new PutObjectRequest(bucketName, bagKey, in, om)).waitForCompletion(); + om = s3.getObjectMetadata(bucketName, bagKey); + + if (om == null) { + logger.severe("Error sending file to S3: " + fileName); + return new Failure("Error in transferring Bag file to S3", + "S3 Submission Failure: incomplete transfer"); + } + } catch (RuntimeException rte) { + logger.severe("Error creating Bag during S3 archiving: " + rte.getMessage()); + return new Failure("Error in generating Bag", + "S3 Submission Failure: archive file not created"); + } + + logger.fine("S3 Submission step: Content Transferred"); + + // Document the location of dataset archival copy location (actually the URL + // where you can + // view it as an admin) + + // Unsigned URL - gives location but not access without creds + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, s3.getUrl(bucketName, bagKey).toString()); + } else { + logger.warning("Could not write local Bag file " + fileName); + return new Failure("S3 Archiver fail writing temp local bag"); + } + + } + } else { + logger.warning( + "S3 Archiver Submision Workflow aborted: Dataset locked for publication/pidRegister"); + return new Failure("Dataset locked"); + } + } catch (Exception e) { + logger.warning(e.getLocalizedMessage()); + e.printStackTrace(); + return new Failure("S3 Archiver Submission Failure", + e.getLocalizedMessage() + ": check log for details"); + + } finally { + dv.setArchivalCopyLocation(statusObject.build().toString()); + } + return WorkflowStepResult.OK; + } else { + return new Failure( + "S3 Submission not configured - no \":S3ArchivalProfile\" and/or \":S3ArchivalConfig\" or no bucket-name defined in config."); + } + } + + protected String getDataCiteFileName(String spaceName, DatasetVersion dv) { + return spaceName + "_datacite.v" + dv.getFriendlyVersionNumber(); + } + + protected String getFileName(String spaceName, DatasetVersion dv) { + return spaceName + ".v" + dv.getFriendlyVersionNumber(); + } + + protected String getSpaceName(Dataset dataset) { + if (spaceName == null) { + spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-').replace('.', '-') + .toLowerCase(); + } + return spaceName; + } + + private AmazonS3 createClient(JsonObject configObject) { + // get a standard client, using the standard way of configuration the + // credentials, etc. + AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard(); + + ClientConfiguration cc = new ClientConfiguration(); + Integer poolSize = configObject.getInt("connection-pool-size", 256); + cc.setMaxConnections(poolSize); + s3CB.setClientConfiguration(cc); + + /** + * Pass in a URL pointing to your S3 compatible storage. For possible values see + * https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html + */ + String s3CEUrl = configObject.getString("custom-endpoint-url", ""); + /** + * Pass in a region to use for SigV4 signing of requests. Defaults to + * "dataverse" as it is not relevant for custom S3 implementations. + */ + String s3CERegion = configObject.getString("custom-endpoint-region", "dataverse"); + + // if the admin has set a system property (see below) we use this endpoint URL + // instead of the standard ones. + if (!s3CEUrl.isEmpty()) { + s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); + } + /** + * Pass in a boolean value if path style access should be used within the S3 + * client. Anything but case-insensitive "true" will lead to value of false, + * which is default value, too. + */ + Boolean s3pathStyleAccess = configObject.getBoolean("path-style-access", false); + // some custom S3 implementations require "PathStyleAccess" as they us a path, + // not a subdomain. default = false + s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); + + /** + * Pass in a boolean value if payload signing should be used within the S3 + * client. Anything but case-insensitive "true" will lead to value of false, + * which is default value, too. + */ + Boolean s3payloadSigning = configObject.getBoolean("payload-signing", false); + /** + * Pass in a boolean value if chunked encoding should not be used within the S3 + * client. Anything but case-insensitive "false" will lead to value of true, + * which is default value, too. + */ + Boolean s3chunkedEncoding = configObject.getBoolean("chunked-encoding", true); + // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. + // default = false + s3CB.setPayloadSigningEnabled(s3payloadSigning); + // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. + // default = true + // Boolean is inverted, otherwise setting + // dataverse.files..chunked-encoding=false would result in leaving Chunked + // Encoding enabled + s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); + + /** + * Pass in a string value if this archiver should use a non-default AWS S3 + * profile. The default is "default" which should work when only one profile + * exists. + */ + ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(configObject.getString("profile", "default")); + + // Try to retrieve credentials via Microprofile Config API, too. For production + // use, you should not use env + // vars or system properties to provide these, but use the secrets config source + // provided by Payara. + AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider(new BasicAWSCredentials( + config.getOptionalValue("dataverse.s3archiver.access-key", String.class).orElse(""), + config.getOptionalValue("dataverse.s3archiver.secret-key", String.class).orElse(""))); + + // Add both providers to chain - the first working provider will be used (so + // static credentials are the fallback) + AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(profileCredentials, + staticCredentials); + s3CB.setCredentials(providerChain); + + // let's build the client :-) + AmazonS3 client = s3CB.build(); + return client; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java index ad290fe221b..e38f5bae8e0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java @@ -31,6 +31,8 @@ public SubmitDatasetForReviewCommand(DataverseRequest aRequest, Dataset dataset) @Override public Dataset execute(CommandContext ctxt) throws CommandException { + validateOrDie(getDataset().getLatestVersion(), false); + if (getDataset().getLatestVersion().isReleased()) { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.submit.failure.isReleased"), this); } @@ -47,7 +49,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { return updatedDataset; } - public Dataset save(CommandContext ctxt) throws CommandException { + private Dataset save(CommandContext ctxt) throws CommandException { getDataset().getEditVersion().setLastUpdateTime(getTimestamp()); getDataset().setModificationTime(getTimestamp()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateMetadataBlockFacetRootCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateMetadataBlockFacetRootCommand.java new file mode 100644 index 00000000000..2e5b6b59ebe --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateMetadataBlockFacetRootCommand.java @@ -0,0 +1,65 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DataverseMetadataBlockFacet; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +/** + * + * @author adaybujeda + */ +@RequiredPermissions( Permission.EditDataverse ) +public class UpdateMetadataBlockFacetRootCommand extends AbstractCommand { + + private final Dataverse editedDv; + private final boolean metadataBlockFacetRoot; + + public UpdateMetadataBlockFacetRootCommand(DataverseRequest aRequest, Dataverse editedDv, boolean metadataBlockFacetRoot) { + super(aRequest, editedDv); + this.editedDv = editedDv; + this.metadataBlockFacetRoot = metadataBlockFacetRoot; + } + + @Override + public Dataverse execute(CommandContext ctxt) throws CommandException { + if(editedDv.isMetadataBlockFacetRoot() != metadataBlockFacetRoot) { + // Update metadata block facets when root changes value + // if you set root to be false (i.e. inherit), it should clear the blocks. + // if you set to true (i.e. use your own), it should make a copy of what is in the parent + List newBlockFacets = Collections.emptyList(); + if (metadataBlockFacetRoot) { + newBlockFacets = editedDv.getMetadataBlockFacets().stream().map(blockFacet -> { + DataverseMetadataBlockFacet metadataBlockFacet = new DataverseMetadataBlockFacet(); + metadataBlockFacet.setDataverse(editedDv); + metadataBlockFacet.setMetadataBlock(blockFacet.getMetadataBlock()); + return metadataBlockFacet; + }).collect(Collectors.toList()); + } + editedDv.setMetadataBlockFacets(newBlockFacets); + + editedDv.setMetadataBlockFacetRoot(metadataBlockFacetRoot); + return ctxt.dataverses().save(editedDv); + } + + return editedDv; + } + + // Visible for testing + public Dataverse getEditedDataverse() { + return this.editedDv; + } + + // Visible for testing + public boolean getMetadataBlockFacetRoot() { + return metadataBlockFacetRoot; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateMetadataBlockFacetsCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateMetadataBlockFacetsCommand.java new file mode 100644 index 00000000000..72a41f5cc3c --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateMetadataBlockFacetsCommand.java @@ -0,0 +1,52 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DataverseMetadataBlockFacet; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; + +import java.util.List; + +/** + * + * @author adaybujeda + */ +@RequiredPermissions( Permission.EditDataverse ) +public class UpdateMetadataBlockFacetsCommand extends AbstractCommand { + + private final Dataverse editedDv; + private final List metadataBlockFacets; + + public UpdateMetadataBlockFacetsCommand(DataverseRequest aRequest, Dataverse editedDv, List metadataBlockFacets) { + super(aRequest, editedDv); + this.editedDv = editedDv; + this.metadataBlockFacets = metadataBlockFacets; + } + + @Override + public Dataverse execute(CommandContext ctxt) throws CommandException { + if (!editedDv.isMetadataBlockFacetRoot()) { + throw new IllegalCommandException("Cannot update metadata blocks facets when dataverse has metadata block facet root set to false", this); + } + + editedDv.setMetadataBlockFacets(metadataBlockFacets); + Dataverse updated = ctxt.dataverses().save(editedDv); + return updated; + } + + // Visible for testing + public Dataverse getEditedDataverse() { + return this.editedDv; + } + + // Visible for testing + public List getMetadataBlockFacets() { + return this.metadataBlockFacets; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DublinCoreExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/DublinCoreExporter.java index 7c4ebfdd44d..113e669f511 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/DublinCoreExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/DublinCoreExporter.java @@ -18,7 +18,7 @@ public class DublinCoreExporter implements Exporter { - + @Override public String getProviderName() { return "oai_dc"; diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ExportService.java b/src/main/java/edu/harvard/iq/dataverse/export/ExportService.java index 27bfd03e460..ddc6296093c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ExportService.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ExportService.java @@ -213,16 +213,16 @@ public void exportAllFormats(Dataset dataset) throws ExportException { cacheExport(releasedVersion, formatName, datasetAsJson, e); } + // Finally, if we have been able to successfully export in all available + // formats, we'll increment the "last exported" time stamp: + dataset.setLastExportTime(new Timestamp(new Date().getTime())); + } catch (ServiceConfigurationError serviceError) { throw new ExportException("Service configuration error during export. " + serviceError.getMessage()); - } catch (Exception e) { - e.printStackTrace(); - logger.warning(e.getMessage()); + } catch (RuntimeException e) { + //e.printStackTrace(); + throw new ExportException("Unknown runtime exception exporting metadata. " + (e.getMessage() == null ? "" : e.getMessage())); } - // Finally, if we have been able to successfully export in all available - // formats, we'll increment the "last exported" time stamp: - - dataset.setLastExportTime(new Timestamp(new Date().getTime())); } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 1952acb67a3..4bbcd653ac3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -129,7 +129,7 @@ private static void dtoddi(DatasetDTO datasetDto, OutputStream outputStream) thr xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION); writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION); - if(isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { + if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); @@ -151,7 +151,7 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, DatasetVersion v xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION); writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION); - if(isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { + if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); @@ -161,14 +161,6 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, DatasetVersion v xmlw.writeEndElement(); // codeBook xmlw.flush(); } - - - private static boolean isMetadataLanguageSet(String mdLang) { - if(mdLang!=null && !mdLang.equals(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE)) { - return true; - } - return false; - } /** * @todo This is just a stub, copied from DDIExportServiceBean. It should @@ -944,7 +936,7 @@ private static void writeDistributorsElement(XMLStreamWriter xmlw, DatasetVersio } if (!distributorName.isEmpty()) { xmlw.writeStartElement("distrbtr"); - if(isMetadataLanguageSet(lang)) { + if(DvObjectContainer.isMetadataLanguageSet(lang)) { writeAttribute(xmlw, "xml:lang", lang); } if (!distributorAffiliation.isEmpty()) { @@ -1064,7 +1056,7 @@ private static void writeAbstractElement(XMLStreamWriter xmlw, DatasetVersionDTO if(!descriptionDate.isEmpty()){ writeAttribute(xmlw,"date",descriptionDate); } - if(isMetadataLanguageSet(lang)) { + if(DvObjectContainer.isMetadataLanguageSet(lang)) { writeAttribute(xmlw, "xml:lang", lang); } xmlw.writeCharacters(descriptionText); @@ -1538,7 +1530,7 @@ private static void writeFullElement (XMLStreamWriter xmlw, String name, String //For the simplest Elements we can if (!StringUtilisEmpty(value)) { xmlw.writeStartElement(name); - if(isMetadataLanguageSet(lang)) { + if(DvObjectContainer.isMetadataLanguageSet(lang)) { writeAttribute(xmlw, "xml:lang", lang); } xmlw.writeCharacters(value); diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java index c996e332bdb..7f94b1bbbbf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java @@ -276,63 +276,6 @@ public JsonObjectBuilder toJson() { return jab; } - public enum ReservedWord { - - // TODO: Research if a format like "{reservedWord}" is easily parse-able or if another format would be - // better. The choice of curly braces is somewhat arbitrary, but has been observed in documenation for - // various REST APIs. For example, "Variable substitutions will be made when a variable is named in {brackets}." - // from https://swagger.io/specification/#fixed-fields-29 but that's for URLs. - FILE_ID("fileId"), - FILE_PID("filePid"), - SITE_URL("siteUrl"), - API_TOKEN("apiToken"), - // datasetId is the database id - DATASET_ID("datasetId"), - // datasetPid is the DOI or Handle - DATASET_PID("datasetPid"), - DATASET_VERSION("datasetVersion"), - FILE_METADATA_ID("fileMetadataId"), - LOCALE_CODE("localeCode"); - - private final String text; - private final String START = "{"; - private final String END = "}"; - - private ReservedWord(final String text) { - this.text = START + text + END; - } - - /** - * This is a centralized method that enforces that only reserved words - * are allowed to be used by external tools. External tool authors - * cannot pass their own query parameters through Dataverse such as - * "mode=mode1". - * - * @throws IllegalArgumentException - */ - public static ReservedWord fromString(String text) throws IllegalArgumentException { - if (text != null) { - for (ReservedWord reservedWord : ReservedWord.values()) { - if (text.equals(reservedWord.text)) { - return reservedWord; - } - } - } - // TODO: Consider switching to a more informative message that enumerates the valid reserved words. - boolean moreInformativeMessage = false; - if (moreInformativeMessage) { - throw new IllegalArgumentException("Unknown reserved word: " + text + ". A reserved word must be one of these values: " + Arrays.asList(ReservedWord.values()) + "."); - } else { - throw new IllegalArgumentException("Unknown reserved word: " + text); - } - } - - @Override - public String toString() { - return text; - } - } - public String getDescriptionLang() { String description = ""; if (this.toolName != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index a4a51666cc5..33d8c2d0d54 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -2,16 +2,14 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import edu.harvard.iq.dataverse.externaltools.ExternalTool.ReservedWord; -import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + import java.io.StringReader; import java.util.ArrayList; import java.util.List; -import java.util.logging.Logger; + import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonObject; @@ -22,18 +20,9 @@ * instantiated. Applies logic based on an {@link ExternalTool} specification, * such as constructing a URL to access that file. */ -public class ExternalToolHandler { - - private static final Logger logger = Logger.getLogger(ExternalToolHandler.class.getCanonicalName()); +public class ExternalToolHandler extends URLTokenUtil { private final ExternalTool externalTool; - private final DataFile dataFile; - private final Dataset dataset; - private final FileMetadata fileMetadata; - - private ApiToken apiToken; - private String localeCode; - /** * File level tool * @@ -43,22 +32,8 @@ public class ExternalToolHandler { * used anonymously. */ public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToken apiToken, FileMetadata fileMetadata, String localeCode) { + super(dataFile, apiToken, fileMetadata, localeCode); this.externalTool = externalTool; - if (dataFile == null) { - String error = "A DataFile is required."; - logger.warning("Error in ExternalToolHandler constructor: " + error); - throw new IllegalArgumentException(error); - } - if (fileMetadata == null) { - String error = "A FileMetadata is required."; - logger.warning("Error in ExternalToolHandler constructor: " + error); - throw new IllegalArgumentException(error); - } - this.dataFile = dataFile; - this.apiToken = apiToken; - this.fileMetadata = fileMetadata; - dataset = fileMetadata.getDatasetVersion().getDataset(); - this.localeCode = localeCode; } /** @@ -70,33 +45,8 @@ public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToke * used anonymously. */ public ExternalToolHandler(ExternalTool externalTool, Dataset dataset, ApiToken apiToken, String localeCode) { + super(dataset, apiToken, localeCode); this.externalTool = externalTool; - if (dataset == null) { - String error = "A Dataset is required."; - logger.warning("Error in ExternalToolHandler constructor: " + error); - throw new IllegalArgumentException(error); - } - this.dataset = dataset; - this.apiToken = apiToken; - this.dataFile = null; - this.fileMetadata = null; - this.localeCode = localeCode; - } - - public DataFile getDataFile() { - return dataFile; - } - - public FileMetadata getFileMetadata() { - return fileMetadata; - } - - public ApiToken getApiToken() { - return apiToken; - } - - public String getLocaleCode() { - return localeCode; } // TODO: rename to handleRequest() to someday handle sending headers as well as query parameters. @@ -130,60 +80,6 @@ public String getQueryParametersForUrl(boolean preview) { } } - private String getQueryParam(String key, String value) { - ReservedWord reservedWord = ReservedWord.fromString(value); - switch (reservedWord) { - case FILE_ID: - // getDataFile is never null for file tools because of the constructor - return key + "=" + getDataFile().getId(); - case FILE_PID: - GlobalId filePid = getDataFile().getGlobalId(); - if (filePid != null) { - return key + "=" + getDataFile().getGlobalId(); - } - break; - case SITE_URL: - return key + "=" + SystemConfig.getDataverseSiteUrlStatic(); - case API_TOKEN: - String apiTokenString = null; - ApiToken theApiToken = getApiToken(); - if (theApiToken != null) { - apiTokenString = theApiToken.getTokenString(); - return key + "=" + apiTokenString; - } - break; - case DATASET_ID: - return key + "=" + dataset.getId(); - case DATASET_PID: - return key + "=" + dataset.getGlobalId().asString(); - case DATASET_VERSION: - String versionString = null; - if(fileMetadata!=null) { //true for file case - versionString = fileMetadata.getDatasetVersion().getFriendlyVersionNumber(); - } else { //Dataset case - return the latest visible version (unless/until the dataset case allows specifying a version) - if (getApiToken() != null) { - versionString = dataset.getLatestVersion().getFriendlyVersionNumber(); - } else { - versionString = dataset.getLatestVersionForCopy().getFriendlyVersionNumber(); - } - } - if (("DRAFT").equals(versionString)) { - versionString = ":draft"; // send the token needed in api calls that can be substituted for a numeric - // version. - } - return key + "=" + versionString; - case FILE_METADATA_ID: - if(fileMetadata!=null) { //true for file case - return key + "=" + fileMetadata.getId(); - } - case LOCALE_CODE: - return key + "=" + getLocaleCode(); - default: - break; - } - return null; - } - public String getToolUrlWithQueryParams() { return externalTool.getToolUrl() + getQueryParametersForUrl(); } @@ -200,4 +96,14 @@ public void setApiToken(ApiToken apiToken) { this.apiToken = apiToken; } + /** + * @return Returns Javascript that opens the explore tool in a new browser + * tab if the browser allows it.If not, it shows an alert that popups must + * be enabled in the browser. + */ + public String getExploreScript() { + String toolUrl = this.getToolUrlWithQueryParams(); + logger.fine("Exploring with " + toolUrl); + return getScriptForUrl(toolUrl); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java index 95fd900e4d2..d49d66c26f7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java @@ -3,8 +3,9 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import edu.harvard.iq.dataverse.externaltools.ExternalTool.ReservedWord; import edu.harvard.iq.dataverse.externaltools.ExternalTool.Type; +import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.URLTokenUtil.ReservedWord; import edu.harvard.iq.dataverse.externaltools.ExternalTool.Scope; import java.io.StringReader; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/AccessList.java b/src/main/java/edu/harvard/iq/dataverse/globus/AccessList.java new file mode 100644 index 00000000000..9a963000541 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/AccessList.java @@ -0,0 +1,33 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + +public class AccessList { + private int length; + private String endpoint; + private ArrayList DATA; + + public void setDATA(ArrayList DATA) { + this.DATA = DATA; + } + + public void setEndpoint(String endpoint) { + this.endpoint = endpoint; + } + + public void setLength(int length) { + this.length = length; + } + + public String getEndpoint() { + return endpoint; + } + + public ArrayList getDATA() { + return DATA; + } + + public int getLength() { + return length; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java new file mode 100644 index 00000000000..877fc68e4a1 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java @@ -0,0 +1,88 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + +public class AccessToken implements java.io.Serializable { + + private String accessToken; + private String idToken; + private Long expiresIn; + private String resourceServer; + private String tokenType; + private String state; + private String scope; + private String refreshToken; + private ArrayList otherTokens; + + public String getAccessToken() { + return accessToken; + } + + String getIdToken() { + return idToken; + } + + Long getExpiresIn() { + return expiresIn; + } + + String getResourceServer() { + return resourceServer; + } + + String getTokenType() { + return tokenType; + } + + String getState() { + return state; + } + + String getScope() { + return scope; + } + + String getRefreshToken() { + return refreshToken; + } + + ArrayList getOtherTokens() { + return otherTokens; + } + + public void setAccessToken(String accessToken) { + this.accessToken = accessToken; + } + + public void setExpiresIn(Long expiresIn) { + this.expiresIn = expiresIn; + } + + public void setIdToken(String idToken) { + this.idToken = idToken; + } + + public void setOtherTokens(ArrayList otherTokens) { + this.otherTokens = otherTokens; + } + + public void setRefreshToken(String refreshToken) { + this.refreshToken = refreshToken; + } + + public void setResourceServer(String resourceServer) { + this.resourceServer = resourceServer; + } + + public void setScope(String scope) { + this.scope = scope; + } + + public void setState(String state) { + this.state = state; + } + + public void setTokenType(String tokenType) { + this.tokenType = tokenType; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/FileDetailsHolder.java b/src/main/java/edu/harvard/iq/dataverse/globus/FileDetailsHolder.java new file mode 100644 index 00000000000..0b8373cba09 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/FileDetailsHolder.java @@ -0,0 +1,29 @@ +package edu.harvard.iq.dataverse.globus; + +public class FileDetailsHolder { + + private String hash; + private String mime; + private String storageID; + + public FileDetailsHolder(String id, String hash, String mime) { + + this.storageID = id; + this.hash = hash; + this.mime = mime; + + } + + public String getStorageID() { + return this.storageID; + } + + public String getHash() { + return hash; + } + + public String getMime() { + return mime; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java new file mode 100644 index 00000000000..9d80c5cc280 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -0,0 +1,1265 @@ +package edu.harvard.iq.dataverse.globus; + +import com.google.gson.FieldNamingPolicy; +import com.google.gson.GsonBuilder; +import edu.harvard.iq.dataverse.*; + +import javax.ejb.Asynchronous; +import javax.ejb.EJB; +import javax.ejb.Stateless; +import javax.ejb.TransactionAttribute; +import javax.ejb.TransactionAttributeType; +import javax.inject.Inject; +import javax.inject.Named; +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonArrayBuilder; +import javax.json.JsonObject; +import javax.json.JsonPatch; +import javax.servlet.http.HttpServletRequest; + +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; + +import java.io.*; + +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLEncoder; +import java.sql.Timestamp; +import java.text.SimpleDateFormat; +import java.util.*; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.logging.FileHandler; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import com.google.gson.Gson; +import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + +@Stateless +@Named("GlobusServiceBean") +public class GlobusServiceBean implements java.io.Serializable { + + @EJB + protected DatasetServiceBean datasetSvc; + + @EJB + protected SettingsServiceBean settingsSvc; + + @Inject + DataverseSession session; + + @EJB + protected AuthenticationServiceBean authSvc; + + @EJB + EjbDataverseEngine commandEngine; + + @EJB + UserNotificationServiceBean userNotificationService; + + private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName()); + private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); + + private String code; + private String userTransferToken; + private String state; + + public String getState() { + return state; + } + + public void setState(String state) { + this.state = state; + } + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getUserTransferToken() { + return userTransferToken; + } + + public void setUserTransferToken(String userTransferToken) { + this.userTransferToken = userTransferToken; + } + + ArrayList checkPermisions(AccessToken clientTokenUser, String directory, String globusEndpoint, + String principalType, String principal) throws MalformedURLException { + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access_list"); + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); + ArrayList ids = new ArrayList(); + if (result.status == 200) { + AccessList al = parseJson(result.jsonResponse, AccessList.class, false); + + for (int i = 0; i < al.getDATA().size(); i++) { + Permissions pr = al.getDATA().get(i); + if ((pr.getPath().equals(directory + "/") || pr.getPath().equals(directory)) + && pr.getPrincipalType().equals(principalType) + && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))) { + ids.add(pr.getId()); + } else { + logger.info(pr.getPath() + " === " + directory + " == " + pr.getPrincipalType()); + continue; + } + } + } + + return ids; + } + + public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm) + throws MalformedURLException { + if (directory != null && !directory.equals("")) { + directory = directory + "/"; + } + logger.info("Start updating permissions." + " Directory is " + directory); + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + ArrayList rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, null); + logger.info("Size of rules " + rules.size()); + int count = 0; + while (count < rules.size()) { + logger.info("Start removing rules " + rules.get(count)); + Permissions permissions = new Permissions(); + permissions.setDATA_TYPE("access"); + permissions.setPermissions(perm); + permissions.setPath(directory); + + Gson gson = new GsonBuilder().create(); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + + rules.get(count)); + logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + + rules.get(count)); + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", gson.toJson(permissions)); + if (result.status != 200) { + logger.warning("Cannot update access rule " + rules.get(count)); + } else { + logger.info("Access rule " + rules.get(count) + " was updated"); + } + count++; + } + } + + public void deletePermision(String ruleId, Logger globusLogger) throws MalformedURLException { + + if (ruleId.length() > 0) { + AccessToken clientTokenUser = getClientToken(); + globusLogger.info("Start deleting permissions."); + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + + URL url = new URL( + "https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "DELETE", null); + if (result.status != 200) { + globusLogger.warning("Cannot delete access rule " + ruleId); + } else { + globusLogger.info("Access rule " + ruleId + " was deleted successfully"); + } + } + + } + + public int givePermission(String principalType, String principal, String perm, AccessToken clientTokenUser, + String directory, String globusEndpoint) throws MalformedURLException { + + ArrayList rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, principal); + + Permissions permissions = new Permissions(); + permissions.setDATA_TYPE("access"); + permissions.setPrincipalType(principalType); + permissions.setPrincipal(principal); + permissions.setPath(directory + "/"); + permissions.setPermissions(perm); + + Gson gson = new GsonBuilder().create(); + MakeRequestResponse result = null; + if (rules.size() == 0) { + logger.info("Start creating the rule"); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access"); + result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "POST", + gson.toJson(permissions)); + + if (result.status == 400) { + logger.severe("Path " + permissions.getPath() + " is not valid"); + } else if (result.status == 409) { + logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); + } + + return result.status; + } else { + logger.info("Start Updating the rule"); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + + rules.get(0)); + result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", + gson.toJson(permissions)); + + if (result.status == 400) { + logger.severe("Path " + permissions.getPath() + " is not valid"); + } else if (result.status == 409) { + logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); + } + logger.info("Result status " + result.status); + } + + return result.status; + } + + public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException { + + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId + + "/successful_transfers"); + + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); + + if (result.status == 200) { + logger.info(" SUCCESS ====== "); + return true; + } + return false; + } + + public GlobusTask getTask(AccessToken clientTokenUser, String taskId, Logger globusLogger) throws MalformedURLException { + + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); + + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); + + GlobusTask task = null; + + if (result.status == 200) { + task = parseJson(result.jsonResponse, GlobusTask.class, false); + } + if (result.status != 200) { + globusLogger.warning("Cannot find information for the task " + taskId + " : Reason : " + + result.jsonResponse.toString()); + } + + return task; + } + + public AccessToken getClientToken() throws MalformedURLException { + String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); + URL url = new URL( + "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); + + MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); + AccessToken clientTokenUser = null; + if (result.status == 200) { + clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); + } + return clientTokenUser; + } + + public AccessToken getAccessToken(HttpServletRequest origRequest, String globusBasicToken) + throws UnsupportedEncodingException, MalformedURLException { + String serverName = origRequest.getServerName(); + if (serverName.equals("localhost")) { + logger.severe("Changing localhost to utoronto"); + serverName = "utl-192-123.library.utoronto.ca"; + } + + String redirectURL = "https://" + serverName + "/globus.xhtml"; + + redirectURL = URLEncoder.encode(redirectURL, "UTF-8"); + + URL url = new URL("https://auth.globus.org/v2/oauth2/token?code=" + code + "&redirect_uri=" + redirectURL + + "&grant_type=authorization_code"); + logger.info(url.toString()); + + MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); + AccessToken accessTokenUser = null; + + if (result.status == 200) { + logger.info("Access Token: \n" + result.toString()); + accessTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); + logger.info(accessTokenUser.getAccessToken()); + } + + return accessTokenUser; + + } + + public MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, + String jsonString) { + String str = null; + HttpURLConnection connection = null; + int status = 0; + try { + connection = (HttpURLConnection) url.openConnection(); + // Basic + // NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9 + logger.info(authType + " " + authCode); + connection.setRequestProperty("Authorization", authType + " " + authCode); + // connection.setRequestProperty("Content-Type", + // "application/x-www-form-urlencoded"); + connection.setRequestMethod(method); + if (jsonString != null) { + connection.setRequestProperty("Content-Type", "application/json"); + connection.setRequestProperty("Accept", "application/json"); + logger.info(jsonString); + connection.setDoOutput(true); + OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream()); + wr.write(jsonString); + wr.flush(); + } + + status = connection.getResponseCode(); + logger.info("Status now " + status); + InputStream result = connection.getInputStream(); + if (result != null) { + logger.info("Result is not null"); + str = readResultJson(result).toString(); + logger.info("str is "); + logger.info(result.toString()); + } else { + logger.info("Result is null"); + str = null; + } + + logger.info("status: " + status); + } catch (IOException ex) { + logger.info("IO"); + logger.severe(ex.getMessage()); + logger.info(ex.getCause().toString()); + logger.info(ex.getStackTrace().toString()); + } finally { + if (connection != null) { + connection.disconnect(); + } + } + MakeRequestResponse r = new MakeRequestResponse(str, status); + return r; + + } + + private StringBuilder readResultJson(InputStream in) { + StringBuilder sb = null; + try { + + BufferedReader br = new BufferedReader(new InputStreamReader(in)); + sb = new StringBuilder(); + String line; + while ((line = br.readLine()) != null) { + sb.append(line + "\n"); + } + br.close(); + logger.info(sb.toString()); + } catch (IOException e) { + sb = null; + logger.severe(e.getMessage()); + } + return sb; + } + + private T parseJson(String sb, Class jsonParserClass, boolean namingPolicy) { + if (sb != null) { + Gson gson = null; + if (namingPolicy) { + gson = new GsonBuilder().setFieldNamingPolicy(FieldNamingPolicy.LOWER_CASE_WITH_UNDERSCORES).create(); + + } else { + gson = new GsonBuilder().create(); + } + T jsonClass = gson.fromJson(sb, jsonParserClass); + return jsonClass; + } else { + logger.severe("Bad respond from token rquest"); + return null; + } + } + + public String getDirectory(String datasetId) { + Dataset dataset = null; + String directory = null; + try { + dataset = datasetSvc.find(Long.parseLong(datasetId)); + if (dataset == null) { + logger.severe("Dataset not found " + datasetId); + return null; + } + String storeId = dataset.getStorageIdentifier(); + storeId.substring(storeId.indexOf("//") + 1); + directory = storeId.substring(storeId.indexOf("//") + 1); + logger.info(storeId); + logger.info(directory); + logger.info("Storage identifier:" + dataset.getIdentifierForFileStorage()); + return directory; + + } catch (NumberFormatException nfe) { + logger.severe(nfe.getMessage()); + + return null; + } + + } + + class MakeRequestResponse { + public String jsonResponse; + public int status; + + MakeRequestResponse(String jsonResponse, int status) { + this.jsonResponse = jsonResponse; + this.status = status; + } + + } + + private MakeRequestResponse findDirectory(String directory, AccessToken clientTokenUser, String globusEndpoint) + throws MalformedURLException { + URL url = new URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/ls?path=" + + directory + "/"); + + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); + logger.info("find directory status:" + result.status); + + return result; + } + + public boolean giveGlobusPublicPermissions(String datasetId) + throws UnsupportedEncodingException, MalformedURLException { + + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); + if (globusEndpoint.equals("") || globusBasicToken.equals("")) { + return false; + } + AccessToken clientTokenUser = getClientToken(); + if (clientTokenUser == null) { + logger.severe("Cannot get client token "); + return false; + } + + String directory = getDirectory(datasetId); + logger.info(directory); + + MakeRequestResponse status = findDirectory(directory, clientTokenUser, globusEndpoint); + + if (status.status == 200) { + + /* + * FilesList fl = parseJson(status.jsonResponse, FilesList.class, false); + * ArrayList files = fl.getDATA(); if (files != null) { for (FileG file: + * files) { if (!file.getName().contains("cached") && + * !file.getName().contains(".thumb")) { int perStatus = + * givePermission("all_authenticated_users", "", "r", clientTokenUser, directory + * + "/" + file.getName(), globusEndpoint); logger.info("givePermission status " + * + perStatus + " for " + file.getName()); if (perStatus == 409) { + * logger.info("Permissions already exist or limit was reached for " + + * file.getName()); } else if (perStatus == 400) { + * logger.info("No file in Globus " + file.getName()); } else if (perStatus != + * 201) { logger.info("Cannot get permission for " + file.getName()); } } } } + */ + + int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, directory, + globusEndpoint); + logger.info("givePermission status " + perStatus); + if (perStatus == 409) { + logger.info("Permissions already exist or limit was reached"); + } else if (perStatus == 400) { + logger.info("No directory in Globus"); + } else if (perStatus != 201 && perStatus != 200) { + logger.info("Cannot give read permission"); + return false; + } + + } else if (status.status == 404) { + logger.info("There is no globus directory"); + } else { + logger.severe("Cannot find directory in globus, status " + status); + return false; + } + + return true; + } + + // Generates the URL to launch the Globus app + public String getGlobusAppUrlForDataset(Dataset d) { + return getGlobusAppUrlForDataset(d, true, null); + } + + public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) { + String localeCode = session.getLocaleCode(); + ApiToken apiToken = null; + User user = session.getUser(); + + if (user instanceof AuthenticatedUser) { + apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) user); + + if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { + logger.fine("Created apiToken for user: " + user.getIdentifier()); + apiToken = authSvc.generateApiTokenForUser((AuthenticatedUser) user); + } + } + String storePrefix = ""; + String driverId = d.getEffectiveStorageDriverId(); + try { + storePrefix = DataAccess.getDriverPrefix(driverId); + } catch (Exception e) { + logger.warning("GlobusAppUrlForDataset: Failed to get storePrefix for " + driverId); + } + //Use URLTokenUtil for params currently in common with external tools. + URLTokenUtil tokenUtil = new URLTokenUtil(d, df, apiToken, localeCode); + String appUrl; + if (upload) { + appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") + + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + } else { + if (df == null) { + appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") + + "/download?datasetPid={datasetPid}&siteUrl={siteUrl}" + + ((apiToken != null) ? "&apiToken={apiToken}" : "") + + "&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + } else { + String rawStorageId = df.getStorageIdentifier(); + rawStorageId=rawStorageId.substring(rawStorageId.lastIndexOf(":")+1); + appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") + + "/download-file?datasetPid={datasetPid}&siteUrl={siteUrl}" + + ((apiToken != null) ? "&apiToken={apiToken}" : "") + + "&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}&fileId={fileId}&storageIdentifier=" + + rawStorageId + "&fileName=" + df.getCurrentName(); + } + } + return tokenUtil.replaceTokensWithValues(appUrl) + "&storePrefix=" + storePrefix; + } + + public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken) { + return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, null)); + + } + + @Asynchronous + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, + AuthenticatedUser authUser) throws ExecutionException, InterruptedException, MalformedURLException { + + Integer countAll = 0; + Integer countSuccess = 0; + Integer countError = 0; + String logTimestamp = logFormatter.format(new Date()); + Logger globusLogger = Logger.getLogger( + "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); + String logFileName = "../logs" + File.separator + "globusUpload_id_" + dataset.getId() + "_" + logTimestamp + + ".log"; + FileHandler fileHandler; + boolean fileHandlerSuceeded; + try { + fileHandler = new FileHandler(logFileName); + globusLogger.setUseParentHandlers(false); + fileHandlerSuceeded = true; + } catch (IOException | SecurityException ex) { + Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); + return; + } + + if (fileHandlerSuceeded) { + globusLogger.addHandler(fileHandler); + } else { + globusLogger = logger; + } + + globusLogger.info("Starting an globusUpload "); + + String datasetIdentifier = dataset.getStorageIdentifier(); + + // ToDo - use DataAccess methods? + String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); + datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); + + Thread.sleep(5000); + + JsonObject jsonObject = null; + try (StringReader rdr = new StringReader(jsonData)) { + jsonObject = Json.createReader(rdr).readObject(); + } catch (Exception jpe) { + jpe.printStackTrace(); + logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); + } + logger.info("json: " + JsonUtil.prettyPrint(jsonObject)); + + String taskIdentifier = jsonObject.getString("taskIdentifier"); + + String ruleId = ""; + try { + ruleId = jsonObject.getString("ruleId"); + } catch (NullPointerException npe) { + logger.warning("NPE for jsonData object"); + } + + // globus task status check + GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger); + String taskStatus = getTaskStatus(task); + + if (ruleId.length() > 0) { + deletePermision(ruleId, globusLogger); + } + + // If success, switch to an EditInProgress lock - do this before removing the + // GlobusUpload lock + // Keeping a lock through the add datafiles API call avoids a conflicting edit + // and keeps any open dataset page refreshing until the datafile appears + if (!(taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE"))) { + datasetSvc.addDatasetLock(dataset, + new DatasetLock(DatasetLock.Reason.EditInProgress, authUser, "Completing Globus Upload")); + } + + DatasetLock gLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); + if (gLock == null) { + logger.log(Level.WARNING, "No lock found for dataset"); + } else { + logger.log(Level.FINE, "Removing GlobusUpload lock " + gLock.getId()); + /* + * Note: This call to remove a lock only works immediately because it is in + * another service bean. Despite the removeDatasetLocks method having the + * REQUIRES_NEW transaction annotation, when the globusUpload method and that + * method were in the same bean (globusUpload was in the DatasetServiceBean to + * start), the globus lock was still seen in the API call initiated in the + * addFilesAsync method called within the globusUpload method. I.e. it appeared + * that the lock removal was not committed/visible outside this method until + * globusUpload itself ended. + */ + datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); + } + + if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { + String comment = "Reason : " + taskStatus.split("#")[1] + "
        Short Description : " + + taskStatus.split("#")[2]; + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), comment, true); + globusLogger.info("Globus task failed "); + + } else { + try { + // + + List inputList = new ArrayList(); + JsonArray filesJsonArray = jsonObject.getJsonArray("files"); + + if (filesJsonArray != null) { + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + + // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from + // externalTool + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String[] bits = storageIdentifier.split(":"); + String bucketName = bits[1].replace("/", ""); + String fileId = bits[bits.length - 1]; + + // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + String fullPath = storageType + bucketName + "/" + datasetIdentifier + "/" + fileId; + String fileName = fileJsonObject.getString("fileName"); + + inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); + } + + // calculateMissingMetadataFields: checksum, mimetype + JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger); + JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); + + JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder(); + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + + countAll++; + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String fileName = fileJsonObject.getString("fileName"); + String directoryLabel = fileJsonObject.getString("directoryLabel"); + String[] bits = storageIdentifier.split(":"); + String fileId = bits[bits.length - 1]; + + List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) + .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) + .filter(Objects::nonNull).collect(Collectors.toList()); + + if (newfileJsonObject != null) { + if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { + JsonPatch path = Json.createPatchBuilder() + .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); + fileJsonObject = path.apply(fileJsonObject); + path = Json.createPatchBuilder() + .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); + fileJsonObject = path.apply(fileJsonObject); + jsonDataSecondAPI.add(fileJsonObject); + countSuccess++; + } else { + globusLogger.info(fileName + + " will be skipped from adding to dataset by second API due to missing values "); + countError++; + } + } else { + globusLogger.info(fileName + + " will be skipped from adding to dataset by second API due to missing values "); + countError++; + } + } + + String newjsonData = jsonDataSecondAPI.build().toString(); + + globusLogger.info("Successfully generated new JsonData for Second API call"); + + String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" + + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; + System.out.println("*******====command ==== " + command); + + String output = addFilesAsync(command, globusLogger); + if (output.equalsIgnoreCase("ok")) { + // if(!taskSkippedFiles) + if (countError == 0) { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, + dataset.getId(), countSuccess + " files added out of " + countAll, true); + } else { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), + countSuccess + " files added out of " + countAll, true); + } + globusLogger.info("Successfully completed api/datasets/:persistentId/addFiles call "); + } else { + globusLogger.log(Level.SEVERE, + "******* Error while executing api/datasets/:persistentId/add call ", command); + } + + } + + globusLogger.info("Files processed: " + countAll.toString()); + globusLogger.info("Files added successfully: " + countSuccess.toString()); + globusLogger.info("Files failures: " + countError.toString()); + globusLogger.info("Finished upload via Globus job."); + + if (fileHandlerSuceeded) { + fileHandler.close(); + } + + } catch (Exception e) { + logger.info("Exception from globusUpload call "); + e.printStackTrace(); + globusLogger.info("Exception from globusUpload call " + e.getMessage()); + datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); + } + } + } + + public String addFilesAsync(String curlCommand, Logger globusLogger) + throws ExecutionException, InterruptedException { + CompletableFuture addFilesFuture = CompletableFuture.supplyAsync(() -> { + try { + Thread.sleep(2000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + return (addFiles(curlCommand, globusLogger)); + }, executor).exceptionally(ex -> { + globusLogger.fine("Something went wrong : " + ex.getLocalizedMessage()); + ex.printStackTrace(); + return null; + }); + + String result = addFilesFuture.get(); + + return result; + } + + private String addFiles(String curlCommand, Logger globusLogger) { + ProcessBuilder processBuilder = new ProcessBuilder(); + Process process = null; + String line; + String status = ""; + + try { + globusLogger.info("Call to : " + curlCommand); + processBuilder.command("bash", "-c", curlCommand); + process = processBuilder.start(); + process.waitFor(); + + BufferedReader br = new BufferedReader(new InputStreamReader(process.getInputStream())); + + StringBuilder sb = new StringBuilder(); + while ((line = br.readLine()) != null) + sb.append(line); + globusLogger.info(" API Output : " + sb.toString()); + JsonObject jsonObject = null; + try (StringReader rdr = new StringReader(sb.toString())) { + jsonObject = Json.createReader(rdr).readObject(); + } catch (Exception jpe) { + jpe.printStackTrace(); + globusLogger.log(Level.SEVERE, "Error parsing dataset json."); + } + + status = jsonObject.getString("status"); + } catch (Exception ex) { + globusLogger.log(Level.SEVERE, + "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); + } + + return status; + } + + @Asynchronous + public void globusDownload(String jsonData, Dataset dataset, User authUser) throws MalformedURLException { + + String logTimestamp = logFormatter.format(new Date()); + Logger globusLogger = Logger.getLogger( + "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusDownload" + logTimestamp); + + String logFileName = "../logs" + File.separator + "globusDownload_id_" + dataset.getId() + "_" + logTimestamp + + ".log"; + FileHandler fileHandler; + boolean fileHandlerSuceeded; + try { + fileHandler = new FileHandler(logFileName); + globusLogger.setUseParentHandlers(false); + fileHandlerSuceeded = true; + } catch (IOException | SecurityException ex) { + Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); + return; + } + + if (fileHandlerSuceeded) { + globusLogger.addHandler(fileHandler); + } else { + globusLogger = logger; + } + + globusLogger.info("Starting an globusDownload "); + + JsonObject jsonObject = null; + try (StringReader rdr = new StringReader(jsonData)) { + jsonObject = Json.createReader(rdr).readObject(); + } catch (Exception jpe) { + jpe.printStackTrace(); + globusLogger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); + } + + String taskIdentifier = jsonObject.getString("taskIdentifier"); + String ruleId = ""; + + try { + jsonObject.getString("ruleId"); + } catch (NullPointerException npe) { + + } + + // globus task status check + GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger); + String taskStatus = getTaskStatus(task); + + if (ruleId.length() > 0) { + deletePermision(ruleId, globusLogger); + } + + if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { + String comment = "Reason : " + taskStatus.split("#")[1] + "
        Short Description : " + + taskStatus.split("#")[2]; + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, dataset.getId(), comment, true); + globusLogger.info("Globus task failed during download process"); + } else { + boolean taskSkippedFiles = (task.getSkip_source_errors() == null) ? false : task.getSkip_source_errors(); + if (!taskSkippedFiles) { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETED, + dataset.getId()); + } else { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, + dataset.getId(), ""); + } + } + } + + Executor executor = Executors.newFixedThreadPool(10); + + private GlobusTask globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException { + boolean taskCompletion = false; + String status = ""; + GlobusTask task = null; + int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 50); + do { + try { + globusLogger.info("checking globus transfer task " + taskId); + Thread.sleep(pollingInterval * 1000); + AccessToken clientTokenUser = getClientToken(); + // success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); + task = getTask(clientTokenUser, taskId, globusLogger); + if (task != null) { + status = task.getStatus(); + if (status != null) { + // The task is in progress. + if (status.equalsIgnoreCase("ACTIVE")) { + if (task.getNice_status().equalsIgnoreCase("ok") + || task.getNice_status().equalsIgnoreCase("queued")) { + taskCompletion = false; + } else { + taskCompletion = true; + // status = "FAILED" + "#" + task.getNice_status() + "#" + + // task.getNice_status_short_description(); + } + } else { + // The task is either succeeded, failed or inactive. + taskCompletion = true; + // status = status + "#" + task.getNice_status() + "#" + + // task.getNice_status_short_description(); + } + } else { + // status = "FAILED"; + taskCompletion = true; + } + } else { + // status = "FAILED"; + taskCompletion = true; + } + } catch (Exception ex) { + ex.printStackTrace(); + } + + } while (!taskCompletion); + + globusLogger.info("globus transfer task completed successfully"); + return task; + } + + private String getTaskStatus(GlobusTask task) { + String status = null; + if (task != null) { + status = task.getStatus(); + if (status != null) { + // The task is in progress. + if (status.equalsIgnoreCase("ACTIVE")) { + status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } else { + // The task is either succeeded, failed or inactive. + status = status + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } + } else { + status = "FAILED"; + } + } else { + status = "FAILED"; + } + return status; + } + + public JsonObject calculateMissingMetadataFields(List inputList, Logger globusLogger) + throws InterruptedException, ExecutionException, IOException { + + List> hashvalueCompletableFutures = inputList.stream() + .map(iD -> calculateDetailsAsync(iD, globusLogger)).collect(Collectors.toList()); + + CompletableFuture allFutures = CompletableFuture + .allOf(hashvalueCompletableFutures.toArray(new CompletableFuture[hashvalueCompletableFutures.size()])); + + CompletableFuture> allCompletableFuture = allFutures.thenApply(future -> { + return hashvalueCompletableFutures.stream().map(completableFuture -> completableFuture.join()) + .collect(Collectors.toList()); + }); + + CompletableFuture completableFuture = allCompletableFuture.thenApply(files -> { + return files.stream().map(d -> json(d)).collect(toJsonArray()); + }); + + JsonArrayBuilder filesObject = (JsonArrayBuilder) completableFuture.get(); + + JsonObject output = Json.createObjectBuilder().add("files", filesObject).build(); + + return output; + + } + + private CompletableFuture calculateDetailsAsync(String id, Logger globusLogger) { + // logger.info(" calcualte additional details for these globus id ==== " + id); + + return CompletableFuture.supplyAsync(() -> { + try { + Thread.sleep(2000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + return (calculateDetails(id, globusLogger)); + } catch (InterruptedException | IOException e) { + e.printStackTrace(); + } + return null; + }, executor).exceptionally(ex -> { + return null; + }); + } + + private FileDetailsHolder calculateDetails(String id, Logger globusLogger) + throws InterruptedException, IOException { + int count = 0; + String checksumVal = ""; + InputStream in = null; + String fileId = id.split("IDsplit")[0]; + String fullPath = id.split("IDsplit")[1]; + String fileName = id.split("IDsplit")[2]; + + // ToDo: what if the file doesnot exists in s3 + // ToDo: what if checksum calculation failed + + do { + try { + StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); + in = dataFileStorageIO.getInputStream(); + checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + count = 3; + } catch (IOException ioex) { + count = 3; + logger.info(ioex.getMessage()); + globusLogger.info("S3AccessIO: DataFile (fullPAth " + fullPath + + ") does not appear to be an S3 object associated with driver: "); + } catch (Exception ex) { + count = count + 1; + ex.printStackTrace(); + logger.info(ex.getMessage()); + Thread.sleep(5000); + } + + } while (count < 3); + + if (checksumVal.length() == 0) { + checksumVal = "NULL"; + } + + String mimeType = calculatemime(fileName); + globusLogger.info(" File Name " + fileName + " File Details " + fileId + " checksum = " + checksumVal + + " mimeType = " + mimeType); + return new FileDetailsHolder(fileId, checksumVal, mimeType); + // getBytes(in)+"" ); + // calculatemime(fileName)); + } + + public String calculatemime(String fileName) throws InterruptedException { + + String finalType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; + String type = FileUtil.determineFileTypeByNameAndExtension(fileName); + + if (type!=null && !type.isBlank()) { + if (FileUtil.useRecognizedType(finalType, type)) { + finalType = type; + } + } + + return finalType; + } + /* + * public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) + * throws MalformedURLException { + * + * logger.info("=====Tasklist == dataset id :" + dataset.getId()); String + * directory = null; + * + * try { + * + * List fileMetadatas = new ArrayList<>(); + * + * StorageIO datasetSIO = DataAccess.getStorageIO(dataset); + * + * + * + * DatasetVersion workingVersion = dataset.getEditVersion(); + * + * if (workingVersion.getCreateTime() != null) { + * workingVersion.setCreateTime(new Timestamp(new Date().getTime())); } + * + * directory = dataset.getAuthorityForFileStorage() + "/" + + * dataset.getIdentifierForFileStorage(); + * + * System.out.println("======= directory ==== " + directory + + * " ==== datasetId :" + dataset.getId()); Map checksumMapOld + * = new HashMap<>(); + * + * Iterator fmIt = workingVersion.getFileMetadatas().iterator(); + * + * while (fmIt.hasNext()) { FileMetadata fm = fmIt.next(); if (fm.getDataFile() + * != null && fm.getDataFile().getId() != null) { String chksum = + * fm.getDataFile().getChecksumValue(); if (chksum != null) { + * checksumMapOld.put(chksum, 1); } } } + * + * List dFileList = new ArrayList<>(); boolean update = false; for + * (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { + * + * String s3ObjectKey = s3ObjectSummary.getKey(); + * + * + * String t = s3ObjectKey.replace(directory, ""); + * + * if (t.indexOf(".") > 0) { long totalSize = s3ObjectSummary.getSize(); String + * filePath = s3ObjectKey; String fileName = + * filePath.split("/")[filePath.split("/").length - 1]; String fullPath = + * datasetSIO.getStorageLocation() + "/" + fileName; + * + * logger.info("Full path " + fullPath); StorageIO dataFileStorageIO = + * DataAccess.getDirectStorageIO(fullPath); InputStream in = + * dataFileStorageIO.getInputStream(); + * + * String checksumVal = FileUtil.calculateChecksum(in, + * DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag(); + * logger.info("The checksum is " + checksumVal); if + * ((checksumMapOld.get(checksumVal) != null)) { logger.info("datasetId :" + + * dataset.getId() + "======= filename ==== " + filePath + + * " == file already exists "); } else if (filePath.contains("cached") || + * filePath.contains(".thumb")) { logger.info(filePath + " is ignored"); } else + * { update = true; logger.info("datasetId :" + dataset.getId() + + * "======= filename ==== " + filePath + " == new file "); try { + * + * DataFile datafile = new DataFile(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE); + * //MIME_TYPE_GLOBUS datafile.setModificationTime(new Timestamp(new + * Date().getTime())); datafile.setCreateDate(new Timestamp(new + * Date().getTime())); datafile.setPermissionModificationTime(new Timestamp(new + * Date().getTime())); + * + * FileMetadata fmd = new FileMetadata(); + * + * + * fmd.setLabel(fileName); fmd.setDirectoryLabel(filePath.replace(directory, + * "").replace(File.separator + fileName, "")); + * + * fmd.setDataFile(datafile); + * + * datafile.getFileMetadatas().add(fmd); + * + * FileUtil.generateS3PackageStorageIdentifierForGlobus(datafile); + * logger.info("==== datasetId :" + dataset.getId() + "======= filename ==== " + * + filePath + " == added to datafile, filemetadata "); + * + * try { // We persist "SHA1" rather than "SHA-1". + * //datafile.setChecksumType(DataFile.ChecksumType.SHA1); + * datafile.setChecksumType(DataFile.ChecksumType.MD5); + * datafile.setChecksumValue(checksumVal); } catch (Exception cksumEx) { + * logger.info("==== datasetId :" + dataset.getId() + + * "======Could not calculate checksumType signature for the new file "); } + * + * datafile.setFilesize(totalSize); + * + * dFileList.add(datafile); + * + * } catch (Exception ioex) { logger.info("datasetId :" + dataset.getId() + + * "======Failed to process and/or save the file " + ioex.getMessage()); return + * false; + * + * } } } } if (update) { + * + * List filesAdded = new ArrayList<>(); + * + * if (dFileList != null && dFileList.size() > 0) { + * + * // Dataset dataset = version.getDataset(); + * + * for (DataFile dataFile : dFileList) { + * + * if (dataFile.getOwner() == null) { dataFile.setOwner(dataset); + * + * workingVersion.getFileMetadatas().add(dataFile.getFileMetadata()); + * dataFile.getFileMetadata().setDatasetVersion(workingVersion); + * dataset.getFiles().add(dataFile); + * + * } + * + * filesAdded.add(dataFile); + * + * } + * + * logger.info("==== datasetId :" + dataset.getId() + + * " ===== Done! Finished saving new files to the dataset."); } + * + * fileMetadatas.clear(); for (DataFile addedFile : filesAdded) { + * fileMetadatas.add(addedFile.getFileMetadata()); } filesAdded = null; + * + * if (workingVersion.isDraft()) { + * + * logger.info("Async: ==== datasetId :" + dataset.getId() + + * " ==== inside draft version "); + * + * Timestamp updateTime = new Timestamp(new Date().getTime()); + * + * workingVersion.setLastUpdateTime(updateTime); + * dataset.setModificationTime(updateTime); + * + * + * for (FileMetadata fileMetadata : fileMetadatas) { + * + * if (fileMetadata.getDataFile().getCreateDate() == null) { + * fileMetadata.getDataFile().setCreateDate(updateTime); + * fileMetadata.getDataFile().setCreator((AuthenticatedUser) user); } + * fileMetadata.getDataFile().setModificationTime(updateTime); } + * + * + * } else { logger.info("datasetId :" + dataset.getId() + + * " ==== inside released version "); + * + * for (int i = 0; i < workingVersion.getFileMetadatas().size(); i++) { for + * (FileMetadata fileMetadata : fileMetadatas) { if + * (fileMetadata.getDataFile().getStorageIdentifier() != null) { + * + * if (fileMetadata.getDataFile().getStorageIdentifier().equals(workingVersion. + * getFileMetadatas().get(i).getDataFile().getStorageIdentifier())) { + * workingVersion.getFileMetadatas().set(i, fileMetadata); } } } } + * + * + * } + * + * + * try { Command cmd; logger.info("Async: ==== datasetId :" + + * dataset.getId() + + * " ======= UpdateDatasetVersionCommand START in globus function "); cmd = new + * UpdateDatasetVersionCommand(dataset, new DataverseRequest(user, + * (HttpServletRequest) null)); ((UpdateDatasetVersionCommand) + * cmd).setValidateLenient(true); //new DataverseRequest(authenticatedUser, + * (HttpServletRequest) null) //dvRequestService.getDataverseRequest() + * commandEngine.submit(cmd); } catch (CommandException ex) { + * logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + + * "======CommandException updating DatasetVersion from batch job: " + + * ex.getMessage()); return false; } + * + * logger.info("==== datasetId :" + dataset.getId() + + * " ======= GLOBUS CALL COMPLETED SUCCESSFULLY "); + * + * //return true; } + * + * } catch (Exception e) { String message = e.getMessage(); + * + * logger.info("==== datasetId :" + dataset.getId() + + * " ======= GLOBUS CALL Exception ============== " + message); + * e.printStackTrace(); return false; //return + * error(Response.Status.INTERNAL_SERVER_ERROR, + * "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" + * + message + "'."); } + * + * String globusBasicToken = + * settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); + * AccessToken clientTokenUser = getClientToken(globusBasicToken); + * updatePermision(clientTokenUser, directory, "identity", "r"); return true; } + * + */ +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java new file mode 100644 index 00000000000..c2b01779f4a --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java @@ -0,0 +1,92 @@ +package edu.harvard.iq.dataverse.globus; + +public class GlobusTask { + + private String DATA_TYPE; + private String type; + private String status; + private String owner_id; + private String request_time; + private String task_id; + private String destination_endpoint_display_name; + private boolean skip_source_errors; + private String nice_status; + private String nice_status_short_description; + + public String getDestination_endpoint_display_name() { + return destination_endpoint_display_name; + } + + public void setDestination_endpoint_display_name(String destination_endpoint_display_name) { + this.destination_endpoint_display_name = destination_endpoint_display_name; + } + + public void setRequest_time(String request_time) { + this.request_time = request_time; + } + + public String getRequest_time() { + return request_time; + } + + public String getTask_id() { + return task_id; + } + + public void setTask_id(String task_id) { + this.task_id = task_id; + } + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getOwner_id() { + return owner_id; + } + + public void setOwner_id(String owner_id) { + this.owner_id = owner_id; + } + + public Boolean getSkip_source_errors() { + return skip_source_errors; + } + + public void setSkip_source_errors(Boolean skip_source_errors) { + this.skip_source_errors = skip_source_errors; + } + + public String getNice_status() { + return nice_status; + } + + public void setNice_status(String nice_status) { + this.nice_status = nice_status; + } + + public String getNice_status_short_description() { + return nice_status_short_description; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Permissions.java b/src/main/java/edu/harvard/iq/dataverse/globus/Permissions.java new file mode 100644 index 00000000000..b8bb5193fa4 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Permissions.java @@ -0,0 +1,58 @@ +package edu.harvard.iq.dataverse.globus; + +public class Permissions { + private String DATA_TYPE; + private String principal_type; + private String principal; + private String id; + private String path; + private String permissions; + + public void setPath(String path) { + this.path = path; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public void setPermissions(String permissions) { + this.permissions = permissions; + } + + public void setPrincipal(String principal) { + this.principal = principal; + } + + public void setPrincipalType(String principalType) { + this.principal_type = principalType; + } + + public String getPath() { + return path; + } + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public String getPermissions() { + return permissions; + } + + public String getPrincipal() { + return principal; + } + + public String getPrincipalType() { + return principal_type; + } + + public void setId(String id) { + this.id = id; + } + + public String getId() { + return id; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java index 02e7675a776..057903d506a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java @@ -232,7 +232,7 @@ public void exportAllFormats(Dataset dataset) { ExportService exportServiceInstance = ExportService.getInstance(); logger.log(Level.FINE, "Attempting to run export on dataset {0}", dataset.getGlobalId()); exportServiceInstance.exportAllFormats(dataset); - datasetService.updateLastExportTimeStamp(dataset.getId()); + dataset = datasetService.merge(dataset); } catch (ExportException ee) {logger.fine("Caught export exception while trying to export. (ignoring)");} catch (Exception e) {logger.fine("Caught unknown exception while trying to export (ignoring)");} } @@ -242,7 +242,7 @@ public void exportAllFormatsInNewTransaction(Dataset dataset) throws ExportExcep try { ExportService exportServiceInstance = ExportService.getInstance(); exportServiceInstance.exportAllFormats(dataset); - datasetService.updateLastExportTimeStamp(dataset.getId()); + dataset = datasetService.merge(dataset); } catch (Exception e) { logger.fine("Caught unknown exception while trying to export"); throw new ExportException(e.getMessage()); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 1add8e53ef0..b03bae618a4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -290,10 +290,6 @@ public List saveAndAddFilesToDataset(DatasetVersion version, } } } - - if (unattached) { - dataFile.setOwner(null); - } // Any necessary post-processing: // performPostProcessingTasks(dataFile); } else { @@ -302,6 +298,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, //Populate metadata dataAccess.open(DataAccessOption.READ_ACCESS); //set file size + logger.fine("Setting file size: " + dataAccess.getSize()); dataFile.setFilesize(dataAccess.getSize()); if(dataAccess instanceof S3AccessIO) { ((S3AccessIO)dataAccess).removeTempTag(); @@ -311,18 +308,20 @@ public List saveAndAddFilesToDataset(DatasetVersion version, + ioex.getMessage() + ")"); } savedSuccess = true; - dataFile.setOwner(null); } logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset."); boolean belowLimit = false; try { + //getting StorageIO may require knowing the owner (so this must come before owner is potentially set back to null belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit(); } catch (IOException e) { logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage()); } - + if (unattached) { + dataFile.setOwner(null); + } if (savedSuccess && belowLimit) { // These are all brand new files, so they should all have // one filemetadata total. -- L.A. diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index bd66e822c20..484e5768eb1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -49,6 +49,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.Future; import java.util.function.Function; @@ -816,6 +817,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set langs = settingsService.getConfiguredLanguages(); Map cvocMap = datasetFieldService.getCVocConf(false); + Set metadataBlocksWithValue = new HashSet<>(); for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) { DatasetFieldType dsfType = dsf.getDatasetFieldType(); @@ -823,6 +825,11 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set dataversePaths = retrieveDVOPaths(dataset); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java index 63b5a777b0e..2e75a81ed5f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java @@ -206,6 +206,7 @@ public class SearchFields { * A dataverse, a dataset, or a file. */ public static final String TYPE = "dvObjectType"; + public static final String METADATA_TYPES = "metadata_type_ss"; public static final String NAME_SORT = "nameSort"; // PUBLICATION_YEAR used to be called PUBLICATION_DATE. public static final String PUBLICATION_YEAR = "publicationDate"; diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 6da4960679d..9bb83c88add 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -32,6 +32,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.logging.Logger; import javax.ejb.EJB; @@ -1150,9 +1151,20 @@ public List getFriendlyNamesFromFilterQuery(String filterQuery) { friendlyNames.add(key); } } + String noLeadingQuote = value.replaceAll("^\"", ""); String noTrailingQuote = noLeadingQuote.replaceAll("\"$", ""); String valueWithoutQuotes = noTrailingQuote; + + if (key.equals(SearchFields.METADATA_TYPES) && getDataverse() != null && getDataverse().getMetadataBlockFacets() != null) { + Optional friendlyName = getDataverse().getMetadataBlockFacets().stream().filter(block -> block.getMetadataBlock().getName().equals(valueWithoutQuotes)).findFirst().map(block -> block.getMetadataBlock().getLocaleDisplayFacet()); + logger.fine(String.format("action=getFriendlyNamesFromFilterQuery key=%s value=%s friendlyName=%s", key, value, friendlyName)); + if(friendlyName.isPresent()) { + friendlyNames.add(friendlyName.get()); + return friendlyNames; + } + } + friendlyNames.add(valueWithoutQuotes); return friendlyNames; } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 8dc367ec5c9..ca158198204 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -7,6 +7,7 @@ import edu.harvard.iq.dataverse.DatasetVersionServiceBean; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseFacet; +import edu.harvard.iq.dataverse.DataverseMetadataBlockFacet; import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.authorization.groups.Group; import edu.harvard.iq.dataverse.authorization.groups.GroupServiceBean; @@ -26,9 +27,11 @@ import java.util.Collections; import java.util.Date; import java.util.HashMap; +import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.MissingResourceException; import java.util.logging.Level; @@ -207,6 +210,7 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List metadataBlockFacets = new LinkedList<>(); //I'm not sure if just adding null here is good for hte permissions system... i think it needs something if(dataverses != null) { for(Dataverse dataverse : dataverses) { @@ -244,6 +249,8 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List 0) { if(metadataBlockName.length() > 0 ) { localefriendlyName = getLocaleTitle(datasetFieldName,facetFieldCount.getName(), metadataBlockName); - } else { + } else if (facetField.getName().equals(SearchFields.METADATA_TYPES)) { + Optional metadataBlockFacet = metadataBlockFacets.stream().filter(blockFacet -> blockFacet.getMetadataBlock().getName().equals(facetFieldCount.getName())).findFirst(); + if (metadataBlockFacet.isEmpty()) { + // metadata block facet is not configured to be displayed => ignore + continue; + } + + localefriendlyName = metadataBlockFacet.get().getMetadataBlock().getLocaleDisplayFacet(); + } else { try { localefriendlyName = BundleUtil.getStringFromPropertyFile(facetFieldCount.getName(), "Bundle"); } catch (Exception e) { @@ -694,7 +709,7 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List 0) { facetCategory.setFriendlyName(friendlyName); } else { @@ -749,7 +764,7 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List 0) { - FacetLabel facetLabel = new FacetLabel(start + "-" + end, new Long(rangeFacetCount.getCount())); + FacetLabel facetLabel = new FacetLabel(start + "-" + end, Long.valueOf(rangeFacetCount.getCount())); // special [12 TO 34] syntax for range facets facetLabel.setFilterQuery(rangeFacet.getName() + ":" + "[" + start + " TO " + end + "]"); facetLabelList.add(facetLabel); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java new file mode 100644 index 00000000000..223e4b86da9 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -0,0 +1,352 @@ +package edu.harvard.iq.dataverse.settings; + +import org.eclipse.microprofile.config.ConfigProvider; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Enum to store each and every JVM-based setting as a reference, + * much like the enum {@link SettingsServiceBean.Key} for DB settings. + * + * To be able to have more control over JVM settings names, + * avoid typos, maybe create lists of settings and so on, + * this enum will provide the place to add any old and new + * settings that are destined to be made at the JVM level. + * + * Further future extensions of this enum class include + * - adding predicates for validation and + * - adding data manipulation for aliased config names. + * + * To create a setting, simply add it within a scope: + * {@link JvmSettings#JvmSettings(JvmSettings, String)} + * + * Settings that might get renamed may provide their old names as aliases: + * {@link JvmSettings#JvmSettings(JvmSettings, String, String...)} + * + * Some scopes or settings may need one or more placeholders, simply don't give + * a key in these cases: + * {@link JvmSettings#JvmSettings(JvmSettings)} + * + */ +public enum JvmSettings { + // the upmost root scope - every setting shall start with it. + PREFIX("dataverse"), + + // GENERAL SETTINGS + VERSION(PREFIX, "version"), + BUILD(PREFIX, "build"), + + ; + + private static final String SCOPE_SEPARATOR = "."; + public static final String PLACEHOLDER_KEY = "%s"; + private static final Pattern OLD_NAME_PLACEHOLDER_PATTERN = Pattern.compile("%(\\d\\$)?s"); + + private final String key; + private final String scopedKey; + private final JvmSettings parent; + private final List oldNames; + private final int placeholders; + + /** + * Create a root scope. + * @param key The scopes name. + */ + JvmSettings(String key) { + this.key = key; + this.scopedKey = key; + this.parent = null; + this.oldNames = List.of(); + this.placeholders = 0; + } + + /** + * Create a scope or setting with a placeholder for a variable argument in it. + * Used to create "configurable objects" with certain attributes using dynamic, programmatic lookup. + * + * Any placeholder present in a settings full scoped key will be replaced when looked up + * via {@link #lookup(Class, String...)}. + * + * @param scope The parent scope. + */ + JvmSettings(JvmSettings scope) { + this.key = PLACEHOLDER_KEY; + this.scopedKey = scope.scopedKey + SCOPE_SEPARATOR + this.key; + this.parent = scope; + this.oldNames = List.of(); + this.placeholders = scope.placeholders + 1; + } + + /** + * Create a scope or setting with name it and associate with a parent scope. + * @param scope The parent scope. + * @param key The name of this scope or setting. + */ + JvmSettings(JvmSettings scope, String key) { + this.key = key; + this.scopedKey = scope.scopedKey + SCOPE_SEPARATOR + key; + this.parent = scope; + this.oldNames = List.of(); + this.placeholders = scope.placeholders; + } + + /** + * Create a setting with name it and associate with a parent scope. + * (Could also be a scope, but old names for scopes aren't the way this is designed.) + * + * When old names are given, these need to be given as fully scoped setting names! (Otherwise + * it would not be possible to switch between completely different scopes.) + * + * @param scope The parent scope of this setting. + * @param key The name of the setting. + * @param oldNames Any previous names this setting was known as. + * Must be given as fully scopes names, not just the old unscoped key/name. + * Used by {@link edu.harvard.iq.dataverse.settings.source.AliasConfigSource} to allow backward + * compatible, non-breaking deprecation and switching to new setting names. + */ + JvmSettings(JvmSettings scope, String key, String... oldNames) { + this.key = key; + this.scopedKey = scope.scopedKey + SCOPE_SEPARATOR + key; + this.parent = scope; + this.oldNames = Arrays.stream(oldNames).collect(Collectors.toUnmodifiableList()); + this.placeholders = scope.placeholders; + } + + private static final List aliased = new ArrayList<>(); + static { + for (JvmSettings setting : JvmSettings.values()) { + if (!setting.oldNames.isEmpty()) { + aliased.add(setting); + } + } + } + + /** + * Get all settings having old names to include them in {@link edu.harvard.iq.dataverse.settings.source.AliasConfigSource} + * @return List of settings with old alias names. Can be empty, but will not be null. + */ + public static List getAliasedSettings() { + return Collections.unmodifiableList(aliased); + } + + /** + * Return a list of old names to be used as aliases for backward compatibility. + * Will return empty list if no old names present. + * + * This method should only be used by {@link edu.harvard.iq.dataverse.settings.source.AliasConfigSource}. + * In case of a setting containing placeholder(s), it will check any old names given in the definition + * for presence of at least one placeholder plus it doesn't use more placeholders than available. + * (Old names containing placeholders for settings without any are checked, too.) + * + * Violations will result in a {@link IllegalArgumentException} and will be noticed during any test execution. + * A developer must fix the old name definition before shipping the code. + * + * @return List of old names, may be empty, but never null. + * @throws IllegalArgumentException When an old name has no or too many placeholders for this setting. + */ + public List getOldNames() { + if (needsVarArgs()) { + for (String name : oldNames) { + long matches = OLD_NAME_PLACEHOLDER_PATTERN.matcher(name).results().count(); + + if (matches == 0) { + throw new IllegalArgumentException("JvmSettings." + this.name() + "'s old name '" + + name + "' needs at least one placeholder"); + } else if (matches > this.placeholders) { + throw new IllegalArgumentException("JvmSettings." + this.name() + "'s old name '" + + name + "' has more placeholders than the current name"); + } + } + } else if (! this.oldNames.stream().noneMatch(OLD_NAME_PLACEHOLDER_PATTERN.asPredicate())) { + throw new IllegalArgumentException("JvmSettings." + this.name() + " has no placeholder but old name requires it"); + } + + return oldNames; + } + + /** + * Retrieve the scoped key for this setting. Scopes are separated by dots. + * If the setting contains placeholders, these will be represented as {@link #PLACEHOLDER_KEY}. + * + * @return The scoped key (or the key if no scope). Example: dataverse.subscope.subsubscope.key + */ + public String getScopedKey() { + return this.scopedKey; + } + + public Pattern getPatternizedKey() { + return Pattern.compile( + getScopedKey() + .replace(SCOPE_SEPARATOR, "\\.") + .replace(PLACEHOLDER_KEY, "(.+?)")); + } + + + /** + * Does this setting carry and placeholders for variable arguments? + * @return True if so, False otherwise. + */ + public boolean needsVarArgs() { + return this.placeholders > 0; + } + + /** + * Return the number of placeholders / variable arguments are necessary to lookup this setting. + * An exact match in the number of arguments will be necessary for a successful lookup. + * @return Number of placeholders for this scoped setting. + */ + public int numberOfVarArgs() { + return placeholders; + } + + /** + * Lookup this setting via MicroProfile Config as a required option (it will fail if not present). + * @throws java.util.NoSuchElementException - if the property is not defined or is defined as an empty string + * @return The setting as a String + */ + public String lookup() { + return lookup(String.class); + } + + /** + * Lookup this setting via MicroProfile Config as an optional setting. + * @return The setting as String wrapped in a (potentially empty) Optional + */ + public Optional lookupOptional() { + return lookupOptional(String.class); + } + + /** + * Lookup this setting via MicroProfile Config as a required option (it will fail if not present). + * + * @param klass The target type class to convert the setting to if found and not null + * @return The setting as an instance of {@link T} + * @param Target type to convert the setting to (you can create custom converters) + * + * @throws java.util.NoSuchElementException When the property is not defined or is defined as an empty string. + * @throws IllegalArgumentException When the settings value could not be converted to target type. + */ + public T lookup(Class klass) { + if (needsVarArgs()) { + throw new IllegalArgumentException("Cannot lookup a setting containing placeholders with this method."); + } + + // This must be done with the full-fledged lookup, as we cannot store the config in an instance or static + // variable, as the alias config source depends on this enum (circular dependency). This is easiest + // avoided by looking up the static cached config at the cost of a method invocation. + return ConfigProvider.getConfig().getValue(this.getScopedKey(), klass); + } + + /** + * Lookup this setting via MicroProfile Config as an optional setting. + * + * @param klass The target type class to convert the setting to if found and not null + * @param Target type to convert the setting to (you can create custom converters) + * @return The setting as an instance of {@link Optional} or an empty Optional + * + * @throws IllegalArgumentException When the settings value could not be converted to target type. + */ + public Optional lookupOptional(Class klass) { + if (needsVarArgs()) { + throw new IllegalArgumentException("Cannot lookup a setting containing variable arguments with this method."); + } + + // This must be done with the full-fledged lookup, as we cannot store the config in an instance or static + // variable, as the alias config source depends on this enum (circular dependency). This is easiest + // avoided by looking up the static cached config at the cost of a method invocation. + return ConfigProvider.getConfig().getOptionalValue(this.getScopedKey(), klass); + } + + /** + * Lookup a required setting containing placeholders for arguments like a name and return as plain String. + * To use type conversion, use {@link #lookup(Class, String...)}. + * + * @param arguments The var args to replace the placeholders of this setting. + * @return The value of the setting. + * + * @throws java.util.NoSuchElementException When the setting has not been set in any MPCONFIG source or is an empty string. + * @throws IllegalArgumentException When using it on a setting without placeholders. + * @throws IllegalArgumentException When not providing as many arguments as there are placeholders. + */ + public String lookup(String... arguments) { + return lookup(String.class, arguments); + } + + /** + * Lookup an optional setting containing placeholders for arguments like a name and return as plain String. + * To use type conversion, use {@link #lookupOptional(Class, String...)}. + * + * @param arguments The var args to replace the placeholders of this setting. + * @return The value as an instance of {@link Optional} or an empty Optional + * + * @throws IllegalArgumentException When using it on a setting without placeholders. + * @throws IllegalArgumentException When not providing as many arguments as there are placeholders. + */ + public Optional lookupOptional(String... arguments) { + return lookupOptional(String.class, arguments); + } + + /** + * Lookup a required setting containing placeholders for arguments like a name and return as converted type. + * To avoid type conversion, use {@link #lookup(String...)}. + * + * @param klass The target type class. + * @param arguments The var args to replace the placeholders of this setting. + * @param Target type to convert the setting to (you can create custom converters) + * @return The value of the setting, converted to the given type. + * + * @throws java.util.NoSuchElementException When the setting has not been set in any MPCONFIG source or is an empty string. + * @throws IllegalArgumentException When using it on a setting without placeholders. + * @throws IllegalArgumentException When not providing as many arguments as there are placeholders. + * @throws IllegalArgumentException When the settings value could not be converted to the target type. + */ + public T lookup(Class klass, String... arguments) { + if (needsVarArgs()) { + if (arguments == null || arguments.length != placeholders) { + throw new IllegalArgumentException("You must specify " + placeholders + " placeholder lookup arguments."); + } + return ConfigProvider.getConfig().getValue(this.insert(arguments), klass); + } + throw new IllegalArgumentException("Cannot lookup a setting without variable arguments with this method."); + } + + /** + * Lookup an optional setting containing placeholders for arguments like a name and return as converted type. + * To avoid type conversion, use {@link #lookupOptional(String...)}. + * + * @param klass The target type class. + * @param arguments The var args to replace the placeholders of this setting. + * @param Target type to convert the setting to (you can create custom converters) + * @return The value as an instance of {@link Optional} or an empty Optional + * + * @throws IllegalArgumentException When using it on a setting without placeholders. + * @throws IllegalArgumentException When not providing as many arguments as there are placeholders. + * @throws IllegalArgumentException When the settings value could not be converted to the target type. + */ + public Optional lookupOptional(Class klass, String... arguments) { + if (needsVarArgs()) { + if (arguments == null || arguments.length != placeholders) { + throw new IllegalArgumentException("You must specify " + placeholders + " placeholder lookup arguments."); + } + return ConfigProvider.getConfig().getOptionalValue(this.insert(arguments), klass); + } + throw new IllegalArgumentException("Cannot lookup a setting without variable arguments with this method."); + } + + /** + * Inject arguments into the placeholders of this setting. Will not do anything when no placeholders present. + * + * @param arguments The variable arguments to be inserted for the placeholders. + * @return The formatted setting name. + */ + public String insert(String... arguments) { + return String.format(this.getScopedKey(), (Object[]) arguments); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 12ae777f3f8..50e29d2a333 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -425,6 +425,14 @@ Whether Harvesting (OAI) service is enabled * Convert shibboleth AJP attributes from ISO-8859-1 to UTF-8 */ ShibAttributeCharacterSetConversionEnabled, + /** + *Return the last or first value of an array of affiliation names + */ + ShibAffiliationOrder, + /** + *Split the affiliation array on given string, default ";" + */ + ShibAffiliationSeparator, /** * Validate physical files for all the datafiles in the dataset when publishing */ @@ -449,6 +457,31 @@ Whether Harvesting (OAI) service is enabled * when the Distributor field (citation metadatablock) is set (true) */ ExportInstallationAsDistributorOnlyWhenNotSet, + + /** + * Basic Globus Token for Globus Application + */ + GlobusBasicToken, + /** + * GlobusEndpoint is Globus endpoint for Globus application + */ + GlobusEndpoint, + /** + * Comma separated list of Globus enabled stores + */ + GlobusStores, + /** Globus App URL + * + */ + GlobusAppUrl, + /** Globus Polling Interval how long in seconds Dataverse waits between checks on Globus upload status checks + * + */ + GlobusPollingInterval, + /**Enable single-file download/transfers for Globus + * + */ + GlobusSingleFileTransfer, /** * Optional external executables to run on the metadata for dataverses * and datasets being published; as an extra validation step, to @@ -522,7 +555,15 @@ Whether Harvesting (OAI) service is enabled * would also work) of never muted notifications that cannot be turned off by the users. AlwaysMuted setting overrides * Nevermuted setting warning is logged. */ - NeverMuted + NeverMuted, + /** + * LDN Inbox Allowed Hosts - a comma separated list of IP addresses allowed to submit messages to the inbox + */ + LDNMessageHosts, + /* + * Allow a custom JavaScript to control values of specific fields. + */ + ControlledVocabularyCustomJavaScript ; @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/source/AliasConfigSource.java b/src/main/java/edu/harvard/iq/dataverse/settings/source/AliasConfigSource.java index fbdbd982085..407f39ce0f9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/source/AliasConfigSource.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/source/AliasConfigSource.java @@ -1,31 +1,37 @@ package edu.harvard.iq.dataverse.settings.source; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.eclipse.microprofile.config.ConfigProvider; import org.eclipse.microprofile.config.spi.ConfigSource; import java.io.IOException; +import java.io.InputStream; import java.net.URL; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Properties; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.logging.Level; import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Enable using an old name for a new config name. * Usages will be logged and this source will ALWAYS stand back if the new name is used anywhere. - * - * By using a DbSettingConfigSource value (dataverse.settings.fromdb.XXX) as old name, we can - * alias a new name to an old db setting, enabling backward compatibility. */ public final class AliasConfigSource implements ConfigSource { private static final Logger logger = Logger.getLogger(AliasConfigSource.class.getName()); + private static final String ALIASES_PROP_FILE = "META-INF/microprofile-aliases.properties"; - private final ConcurrentHashMap aliases = new ConcurrentHashMap<>(); - private final String ALIASES_PROP_FILE = "META-INF/microprofile-aliases.properties"; + private final ConcurrentHashMap> aliases = new ConcurrentHashMap<>(); + private final ConcurrentHashMap> varArgAliases = new ConcurrentHashMap<>(); public AliasConfigSource() { try { @@ -34,33 +40,63 @@ public AliasConfigSource() { // store in our aliases map importAliases(aliasProps); } catch (IOException e) { - logger.info("Could not read from "+ALIASES_PROP_FILE+". Skipping MPCONFIG alias setup."); + // Usually it's an anti-pattern to catch the exception here, but skipping the file + // should be fine here, as it's optional. + logger.log(Level.INFO, "Could not read from "+ALIASES_PROP_FILE+". Skipping MPCONFIG alias setup.", e); } + + // Store all old names from JvmSettings + importJvmSettings(JvmSettings.getAliasedSettings()); + } + + private void importJvmSettings(List aliasedSettings) { + // First add all simple aliases not containing placeholders + aliasedSettings.stream() + .filter(s -> ! s.needsVarArgs()) + .forEach(setting -> aliases.put(setting.getScopedKey(), setting.getOldNames())); + + // Aliases with placeholders need to be compiled into a regex + aliasedSettings.stream() + .filter(JvmSettings::needsVarArgs) + .forEach(setting -> varArgAliases.put(setting.getPatternizedKey(), setting.getOldNames())); } - Properties readAliases(String filePath) throws IOException { + + private Properties readAliases(String filePath) throws IOException { // get resource from classpath ClassLoader classLoader = this.getClass().getClassLoader(); URL aliasesResource = classLoader.getResource(filePath); + + // Prevent errors if file not found or could not be loaded + if (aliasesResource == null) { + throw new IOException("Could not find or load, class loader returned null"); + } // load properties from file resource (parsing included) Properties aliasProps = new Properties(); - try { - aliasProps.load(aliasesResource.openStream()); - } catch (NullPointerException e) { - throw new IOException(e.getMessage()); + try (InputStream propStream = aliasesResource.openStream()) { + aliasProps.load(propStream); } return aliasProps; } - void importAliases(Properties aliasProps) { - aliasProps.forEach((key, value) -> aliases.put(key.toString(), value.toString())); + private void importAliases(Properties aliasProps) { + aliasProps.forEach((key, value) -> aliases.put(key.toString(), List.of(value.toString()))); + } + + // Has visibility "package" to be usable from test class! + void addAlias(String newName, List oldNames) { + this.aliases.put(newName, oldNames); + } + + // Has visibility "package" to be usable from test class! + void addAlias(Pattern newNamePattern, List oldNamePatterns) { + this.varArgAliases.put(newNamePattern, oldNamePatterns); } @Override public Map getProperties() { - // No, just no. We're not going to drop a list of stuff. We're only - // dealiasing on getValue(); + // No, just no. We're not going to drop a list of stuff. We're only de-aliasing on calls to getValue() return new HashMap<>(); } @@ -79,16 +115,63 @@ public int getOrdinal() { @Override public String getValue(String key) { - String value = null; + + // If the key is null or not starting with the prefix ("dataverse"), we are not going to jump through loops, + // avoiding computation overhead + if (key == null || ! key.startsWith(JvmSettings.PREFIX.getScopedKey())) { + return null; + } + + List oldNames = new ArrayList<>(); + + // Retrieve simple cases without placeholders if (this.aliases.containsKey(key)) { - String oldKey = this.aliases.get(key); - value = ConfigProvider.getConfig().getOptionalValue(oldKey, String.class).orElse(null); + oldNames.addAll(this.aliases.get(key)); + // Or try with regex patterns + } else { + // Seek for the given key within all the patterns for placeholder containing settings, + // returning a Matcher to extract the variable arguments as regex match groups. + Optional foundMatcher = varArgAliases.keySet().stream() + .map(pattern -> pattern.matcher(key)) + .filter(Matcher::matches) + .findFirst(); - if (value != null) { - logger.warning("Detected deprecated config option '"+oldKey+"' in use. Please update your config to use '"+key+"'."); + // Extract the matched groups and construct all old setting names with them + if (foundMatcher.isPresent()) { + Matcher matcher = foundMatcher.get(); + + List varArgs = new ArrayList<>(); + for (int i = 1; i <= matcher.groupCount(); i++) { + varArgs.add(matcher.group(i)); + } + Object[] args = varArgs.toArray(); + + this.varArgAliases + .get(matcher.pattern()) + .forEach(oldNamePattern -> oldNames.add(String.format(oldNamePattern, args))); + } + } + + // Return the first non-empty result + // NOTE: When there are multiple old names in use, they would conflict anyway. Upon deletion of one of the + // old settings the other becomes visible and triggers the warning again. There might even be different + // old settings in different sources, which might conflict, too (see ordinal value). + // NOTE: As the default is an empty oldNames array, loop will only be executed if anything was found before. + for (String oldName : oldNames) { + Optional value = ConfigProvider.getConfig().getOptionalValue(oldName, String.class); + + if (value.isPresent()) { + logger.log( + Level.WARNING, + "Detected deprecated config option {0} in use. Please update your config to use {1}.", + new String[]{oldName, key} + ); + return value.get(); } } - return value; + + // Sane default: nothing found. + return null; } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/source/DbSettingConfigHelper.java b/src/main/java/edu/harvard/iq/dataverse/settings/source/DbSettingConfigHelper.java deleted file mode 100644 index 7b9783dee06..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/settings/source/DbSettingConfigHelper.java +++ /dev/null @@ -1,27 +0,0 @@ -package edu.harvard.iq.dataverse.settings.source; - -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; - -import javax.annotation.PostConstruct; -import javax.ejb.EJB; -import javax.ejb.Singleton; -import javax.ejb.Startup; - -/** - * This is a small helper bean for the MPCONFIG DbSettingConfigSource. - * As it is a singleton and built at application start (=deployment), it will inject the (stateless) - * settings service into the MPCONFIG POJO once it's ready. - * - * MPCONFIG requires it's sources to be POJOs. No direct dependency injection possible. - */ -@Singleton -@Startup -public class DbSettingConfigHelper { - @EJB - SettingsServiceBean settingsSvc; - - @PostConstruct - public void injectService() { - DbSettingConfigSource.injectSettingsService(settingsSvc); - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/source/DbSettingConfigSource.java b/src/main/java/edu/harvard/iq/dataverse/settings/source/DbSettingConfigSource.java deleted file mode 100644 index 838cd415819..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/settings/source/DbSettingConfigSource.java +++ /dev/null @@ -1,83 +0,0 @@ -package edu.harvard.iq.dataverse.settings.source; - -import edu.harvard.iq.dataverse.settings.Setting; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import org.eclipse.microprofile.config.spi.ConfigSource; - -import java.time.Duration; -import java.time.Instant; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.logging.Logger; - -/** - * A caching wrapper around SettingServiceBean to provide database settings to MicroProfile Config API. - * Please be aware that this class relies on dependency injection during the application startup. - * Values will not be available before and a severe message will be logged to allow monitoring (potential race conditions) - * The settings will be cached for at least one minute, avoiding unnecessary database calls. - */ -public class DbSettingConfigSource implements ConfigSource { - - private static final Logger logger = Logger.getLogger(DbSettingConfigSource.class.getCanonicalName()); - private static final ConcurrentHashMap properties = new ConcurrentHashMap<>(); - private static Instant lastUpdate; - private static SettingsServiceBean settingsSvc; - public static final String PREFIX = "dataverse.settings.fromdb"; - - /** - * Let the SettingsServiceBean be injected by DbSettingConfigHelper with PostConstruct - * @param injected - */ - public static void injectSettingsService(SettingsServiceBean injected) { - settingsSvc = injected; - updateProperties(); - } - - /** - * Retrieve settings from the database via service and update cache. - */ - public static void updateProperties() { - // skip if the service has not been injected yet - if (settingsSvc == null) { - return; - } - properties.clear(); - Set dbSettings = settingsSvc.listAll(); - dbSettings.forEach(s -> properties.put(PREFIX+"."+s.getName().substring(1) + (s.getLang() == null ? "" : "."+s.getLang()), s.getContent())); - lastUpdate = Instant.now(); - } - - @Override - public Map getProperties() { - // if the cache is at least XX number of seconds old, update before serving data. - if (lastUpdate == null || Instant.now().minus(Duration.ofSeconds(60)).isAfter(lastUpdate)) { - updateProperties(); - } - return properties; - } - - @Override - public Set getPropertyNames() { - return getProperties().keySet(); - } - - @Override - public int getOrdinal() { - return 50; - } - - @Override - public String getValue(String key) { - // log usages for which this has been designed, but not yet ready to serve... - if (settingsSvc == null && key.startsWith(PREFIX)) { - logger.severe("MPCONFIG DbSettingConfigSource not ready yet, but requested for '"+key+"'."); - } - return getProperties().getOrDefault(key, null); - } - - @Override - public String getName() { - return "DataverseDB"; - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/spi/DbSettingConfigSourceProvider.java b/src/main/java/edu/harvard/iq/dataverse/settings/spi/DbSettingConfigSourceProvider.java deleted file mode 100644 index 856a2c64a01..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/settings/spi/DbSettingConfigSourceProvider.java +++ /dev/null @@ -1,14 +0,0 @@ -package edu.harvard.iq.dataverse.settings.spi; - -import edu.harvard.iq.dataverse.settings.source.DbSettingConfigSource; -import org.eclipse.microprofile.config.spi.ConfigSource; -import org.eclipse.microprofile.config.spi.ConfigSourceProvider; - -import java.util.Arrays; - -public class DbSettingConfigSourceProvider implements ConfigSourceProvider { - @Override - public Iterable getConfigSources(ClassLoader forClassLoader) { - return Arrays.asList(new DbSettingConfigSource()); - } -} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java index fc97f972f5c..18ec6243d5a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java @@ -1,11 +1,15 @@ package edu.harvard.iq.dataverse.util; import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.logging.Logger; +import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; /** * Simple class to reflectively get an instance of the desired class for @@ -35,4 +39,37 @@ public static AbstractSubmitToArchiveCommand createSubmitToArchiveCommand(String } return null; } -} + + public static boolean onlySingleVersionArchiving(Class clazz, SettingsServiceBean settingsService) { + Method m; + try { + m = clazz.getMethod("isSingleVersion", SettingsServiceBean.class); + Object[] params = { settingsService }; + return (Boolean) m.invoke(null, params); + } catch (NoSuchMethodException e) { + e.printStackTrace(); + } catch (SecurityException e) { + e.printStackTrace(); + } catch (IllegalAccessException e) { + e.printStackTrace(); + } catch (IllegalArgumentException e) { + e.printStackTrace(); + } catch (InvocationTargetException e) { + e.printStackTrace(); + } + return (AbstractSubmitToArchiveCommand.isSingleVersion(settingsService)); + } + + public static boolean isSomeVersionArchived(Dataset dataset) { + boolean someVersionArchived = false; + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.getArchivalCopyLocation() != null) { + someVersionArchived = true; + break; + } + } + + return someVersionArchived; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileTypeDetection.java b/src/main/java/edu/harvard/iq/dataverse/util/FileTypeDetection.java deleted file mode 100644 index 52515c00524..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileTypeDetection.java +++ /dev/null @@ -1,12 +0,0 @@ -package edu.harvard.iq.dataverse.util; - -import java.io.File; -import java.io.IOException; - -public class FileTypeDetection { - - public static String determineFileType(File file) throws IOException { - return FileUtil.determineFileType(file, file.getName()); - } - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 64dadc54a4a..339de904f9e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -28,11 +28,11 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Embargo; import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import edu.harvard.iq.dataverse.ingest.IngestReport; @@ -53,7 +53,7 @@ import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatLink; import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatTableCellAlignRight; import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatTableRow; -import java.awt.image.BufferedImage; + import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; @@ -76,7 +76,6 @@ import java.text.MessageFormat; import java.text.SimpleDateFormat; import java.time.LocalDate; -import java.time.format.DateTimeFormatter; import java.util.Map; import java.util.MissingResourceException; import java.util.ArrayList; @@ -90,11 +89,6 @@ import javax.activation.MimetypesFileTypeMap; import javax.ejb.EJBException; import javax.enterprise.inject.spi.CDI; -import javax.faces.application.FacesMessage; -import javax.faces.component.UIComponent; -import javax.faces.component.UIInput; -import javax.faces.context.FacesContext; -import javax.faces.validator.ValidatorException; import javax.json.JsonArray; import javax.json.JsonObject; import javax.xml.stream.XMLStreamConstants; @@ -108,7 +102,6 @@ import java.util.zip.ZipInputStream; import org.apache.commons.io.FilenameUtils; -import com.amazonaws.AmazonServiceException; import edu.harvard.iq.dataverse.dataaccess.DataAccessOption; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; @@ -487,8 +480,8 @@ public static String determineFileType(File f, String fileName) throws IOExcepti // step 4: // Additional processing; if we haven't gotten much useful information // back from Jhove, we'll try and make an educated guess based on - // the file extension: - + // the file name and extension: + if ( fileExtension != null) { logger.fine("fileExtension="+fileExtension); @@ -496,13 +489,18 @@ public static String determineFileType(File f, String fileName) throws IOExcepti if (fileType != null && fileType.startsWith("text/plain") && STATISTICAL_FILE_EXTENSION.containsKey(fileExtension)) { fileType = STATISTICAL_FILE_EXTENSION.get(fileExtension); } else { - fileType = determineFileTypeByExtension(fileName); + fileType = determineFileTypeByNameAndExtension(fileName); } - + logger.fine("mime type recognized by extension: "+fileType); } } else { logger.fine("fileExtension is null"); + String fileTypeByName = lookupFileTypeFromPropertiesFile(fileName); + if(!StringUtil.isEmpty(fileTypeByName)) { + logger.fine(String.format("mime type: %s recognized by filename: %s", fileTypeByName, fileName)); + fileType = fileTypeByName; + } } // step 5: @@ -548,11 +546,14 @@ public static String determineFileType(File f, String fileName) throws IOExcepti } } + if(fileType==null) { + fileType = MIME_TYPE_UNDETERMINED_DEFAULT; + } logger.fine("returning fileType "+fileType); return fileType; } - public static String determineFileTypeByExtension(String fileName) { + public static String determineFileTypeByNameAndExtension(String fileName) { String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName); logger.fine("MimetypesFileTypeMap type by extension, for " + fileName + ": " + mimetypesFileTypeMapResult); if (mimetypesFileTypeMapResult != null) { @@ -567,14 +568,19 @@ public static String determineFileTypeByExtension(String fileName) { } public static String lookupFileTypeFromPropertiesFile(String fileName) { - String fileExtension = FilenameUtils.getExtension(fileName); + String fileKey = FilenameUtils.getExtension(fileName); String propertyFileName = "MimeTypeDetectionByFileExtension"; + if(fileKey == null || fileKey.isEmpty()) { + fileKey = fileName; + propertyFileName = "MimeTypeDetectionByFileName"; + + } String propertyFileNameOnDisk = propertyFileName + ".properties"; try { - logger.fine("checking " + propertyFileNameOnDisk + " for file extension " + fileExtension); - return BundleUtil.getStringFromPropertyFile(fileExtension, propertyFileName); + logger.fine("checking " + propertyFileNameOnDisk + " for file key " + fileKey); + return BundleUtil.getStringFromPropertyFile(fileKey, propertyFileName); } catch (MissingResourceException ex) { - logger.info(fileExtension + " is a file extension Dataverse doesn't know about. Consider adding it to the " + propertyFileNameOnDisk + " file."); + logger.info(fileKey + " is a filename/extension Dataverse doesn't know about. Consider adding it to the " + propertyFileNameOnDisk + " file."); return null; } } @@ -876,7 +882,7 @@ public static CreateDataFileResult createDataFiles(DatasetVersion version, Input } datafiles.add(datafile); - return CreateDataFileResult.success(finalType, datafiles); + return CreateDataFileResult.success(fileName, finalType, datafiles); } // If it's a ZIP file, we are going to unpack it and create multiple @@ -1052,7 +1058,7 @@ public static CreateDataFileResult createDataFiles(DatasetVersion version, Input logger.warning("Could not remove temp file " + tempFile.getFileName().toString()); } // and return: - return CreateDataFileResult.success(finalType, datafiles); + return CreateDataFileResult.success(fileName, finalType, datafiles); } } else if (finalType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)) { @@ -1068,7 +1074,7 @@ public static CreateDataFileResult createDataFiles(DatasetVersion version, Input boolean didProcessWork = shpIngestHelper.processFile(); if (!(didProcessWork)) { logger.severe("Processing of zipped shapefile failed."); - return CreateDataFileResult.error(finalType); + return CreateDataFileResult.error(fileName, finalType); } try { @@ -1129,11 +1135,11 @@ public static CreateDataFileResult createDataFiles(DatasetVersion version, Input logger.warning("Unable to delete: " + tempFile.toString() + "due to Security Exception: " + se.getMessage()); } - return CreateDataFileResult.success(finalType, datafiles); + return CreateDataFileResult.success(fileName, finalType, datafiles); } else { logger.severe("No files added from directory of rezipped shapefiles"); } - return CreateDataFileResult.error(finalType); + return CreateDataFileResult.error(fileName, finalType); } else if (finalType.equalsIgnoreCase(BagItFileHandler.FILE_TYPE)) { Optional bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler(); @@ -1145,7 +1151,7 @@ public static CreateDataFileResult createDataFiles(DatasetVersion version, Input } else { // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; - String type = determineFileTypeByExtension(fileName); + String type = determineFileTypeByNameAndExtension(fileName); if (!StringUtils.isBlank(type)) { //Use rules for deciding when to trust browser supplied type if (useRecognizedType(finalType, type)) { @@ -1176,14 +1182,14 @@ public static CreateDataFileResult createDataFiles(DatasetVersion version, Input } datafiles.add(datafile); - return CreateDataFileResult.success(finalType, datafiles); + return CreateDataFileResult.success(fileName, finalType, datafiles); } - return CreateDataFileResult.error(finalType); + return CreateDataFileResult.error(fileName, finalType); } // end createDataFiles - private static boolean useRecognizedType(String suppliedContentType, String recognizedType) { + public static boolean useRecognizedType(String suppliedContentType, String recognizedType) { // is it any better than the type that was supplied to us, // if any? // This is not as trivial a task as one might expect... @@ -1413,7 +1419,7 @@ public static void generateS3PackageStorageIdentifier(DataFile dataFile) { String driverId = dataFile.getOwner().getEffectiveStorageDriverId(); String bucketName = System.getProperty("dataverse.files." + driverId + ".bucket-name"); - String storageId = driverId + "://" + bucketName + ":" + dataFile.getFileMetadata().getLabel(); + String storageId = driverId + DataAccess.SEPARATOR + bucketName + ":" + dataFile.getFileMetadata().getLabel(); dataFile.setStorageIdentifier(storageId); } @@ -1532,7 +1538,7 @@ private static Boolean popupDueToStateOrTerms(DatasetVersion datasetVersion) { } // 1. License and Terms of Use: if (datasetVersion.getTermsOfUseAndAccess() != null) { - License license = datasetVersion.getTermsOfUseAndAccess().getLicense(); + License license = DatasetUtil.getLicense(datasetVersion); if ((license == null && StringUtils.isNotBlank(datasetVersion.getTermsOfUseAndAccess().getTermsOfUse())) || (license != null && !license.isDefault())) { logger.fine("Popup required because of license or terms of use."); @@ -1615,32 +1621,33 @@ public static String getPublicDownloadUrl(String dataverseSiteUrl, String persis */ public static String getFileDownloadUrlPath(String downloadType, Long fileId, boolean gbRecordsWritten, Long fileMetadataId) { String fileDownloadUrl = "/api/access/datafile/" + fileId; - if (downloadType != null && downloadType.equals("bundle")) { - if (fileMetadataId == null) { - fileDownloadUrl = "/api/access/datafile/bundle/" + fileId; - } else { - fileDownloadUrl = "/api/access/datafile/bundle/" + fileId + "?fileMetadataId=" + fileMetadataId; - } - } - if (downloadType != null && downloadType.equals("original")) { - fileDownloadUrl = "/api/access/datafile/" + fileId + "?format=original"; - } - if (downloadType != null && downloadType.equals("RData")) { - fileDownloadUrl = "/api/access/datafile/" + fileId + "?format=RData"; - } - if (downloadType != null && downloadType.equals("var")) { - if (fileMetadataId == null) { - fileDownloadUrl = "/api/access/datafile/" + fileId + "/metadata"; - } else { - fileDownloadUrl = "/api/access/datafile/" + fileId + "/metadata?fileMetadataId=" + fileMetadataId; + if (downloadType != null) { + switch(downloadType) { + case "original": + case"RData": + case "tab": + case "GlobusTransfer": + fileDownloadUrl = "/api/access/datafile/" + fileId + "?format=" + downloadType; + break; + case "bundle": + if (fileMetadataId == null) { + fileDownloadUrl = "/api/access/datafile/bundle/" + fileId; + } else { + fileDownloadUrl = "/api/access/datafile/bundle/" + fileId + "?fileMetadataId=" + fileMetadataId; + } + break; + case "var": + if (fileMetadataId == null) { + fileDownloadUrl = "/api/access/datafile/" + fileId + "/metadata"; + } else { + fileDownloadUrl = "/api/access/datafile/" + fileId + "/metadata?fileMetadataId=" + fileMetadataId; + } + break; + } + } - } - if (downloadType != null && downloadType.equals("tab")) { - fileDownloadUrl = "/api/access/datafile/" + fileId + "?format=tab"; - } if (gbRecordsWritten) { - if (downloadType != null && ((downloadType.equals("original") || downloadType.equals("RData") || downloadType.equals("tab")) || - ((downloadType.equals("var") || downloadType.equals("bundle") ) && fileMetadataId != null))) { + if (fileDownloadUrl.contains("?")) { fileDownloadUrl += "&gbrecs=true"; } else { fileDownloadUrl += "?gbrecs=true"; @@ -1778,10 +1785,10 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio StorageIO storage = dataFile.getStorageIO(); InputStream in = null; - + try { storage.open(DataAccessOption.READ_ACCESS); - + if (!dataFile.isTabularData()) { in = storage.getInputStream(); } else { @@ -1836,7 +1843,7 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio } finally { IOUtils.closeQuietly(in); } - // try again: + // try again: if (recalculatedChecksum.equals(dataFile.getChecksumValue())) { fixed = true; try { @@ -1847,10 +1854,11 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio } } } - + if (!fixed) { String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString())); logger.log(Level.INFO, info); + logger.fine("Expected: " + dataFile.getChecksumValue() +", calculated: " + recalculatedChecksum); throw new IOException(info); } } @@ -1859,7 +1867,7 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio } public static String getStorageIdentifierFromLocation(String location) { - int driverEnd = location.indexOf("://") + 3; + int driverEnd = location.indexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length(); int bucketEnd = driverEnd + location.substring(driverEnd).indexOf("/"); return location.substring(0,bucketEnd) + ":" + location.substring(location.lastIndexOf("/") + 1); } @@ -1895,7 +1903,7 @@ public static void deleteTempFile(DataFile dataFile, Dataset dataset, IngestServ } } String si = dataFile.getStorageIdentifier(); - if (si.contains("://")) { + if (si.contains(DataAccess.SEPARATOR)) { //Direct upload files will already have a store id in their storageidentifier //but they need to be associated with a dataset for the overall storagelocation to be calculated //so we temporarily set the owner @@ -1914,7 +1922,7 @@ public static void deleteTempFile(DataFile dataFile, Dataset dataset, IngestServ } catch (IOException ioEx) { // safe to ignore - it's just a temp file. logger.warning(ioEx.getMessage()); - if(dataFile.getStorageIdentifier().contains("://")) { + if(dataFile.getStorageIdentifier().contains(DataAccess.SEPARATOR)) { logger.warning("Failed to delete temporary file " + dataFile.getStorageIdentifier()); } else { logger.warning("Failed to delete temporary file " + FileUtil.getFilesTempDirectory() + "/" diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java index 55c6f4d83d6..e76e2c5696b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java @@ -75,6 +75,38 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti } catch (Exception e) { return BundleUtil.getStringFromBundle("notification.email.import.filesystem.subject", rootDvNameAsList); } + case GLOBUSUPLOADCOMPLETED: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.globus.uploadCompleted.subject", dsNameAsList); + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.globus.uploadCompleted.subject", rootDvNameAsList); + } + case GLOBUSDOWNLOADCOMPLETED: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.globus.downloadCompleted.subject", dsNameAsList); + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.globus.downloadCompleted.subject", rootDvNameAsList); + } + case GLOBUSUPLOADCOMPLETEDWITHERRORS: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.globus.uploadCompletedWithErrors.subject", dsNameAsList); + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.globus.uploadCompletedWithErrors.subject", rootDvNameAsList); + } + case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.globus.downloadCompletedWithErrors.subject", dsNameAsList); + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.globus.downloadCompletedWithErrors.subject", rootDvNameAsList); + } case CHECKSUMIMPORT: return BundleUtil.getStringFromBundle("notification.email.import.checksum.subject", rootDvNameAsList); @@ -86,8 +118,10 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti return BundleUtil.getStringFromBundle("notification.email.ingestCompleted.subject", rootDvNameAsList); case INGESTCOMPLETEDWITHERRORS: return BundleUtil.getStringFromBundle("notification.email.ingestCompletedWithErrors.subject", rootDvNameAsList); + case DATASETMENTIONED: + return BundleUtil.getStringFromBundle("notification.email.datasetWasMentioned.subject", rootDvNameAsList); } return ""; } -} +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MarkupChecker.java b/src/main/java/edu/harvard/iq/dataverse/util/MarkupChecker.java index 3131afbf010..ef74819f073 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MarkupChecker.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MarkupChecker.java @@ -7,7 +7,7 @@ import org.apache.commons.text.StringEscapeUtils; import org.jsoup.Jsoup; -import org.jsoup.safety.Whitelist; +import org.jsoup.safety.Safelist; import org.jsoup.parser.Parser; /** @@ -20,8 +20,8 @@ public class MarkupChecker { /** - * Wrapper around Jsoup clean method with the basic White list - * http://jsoup.org/cookbook/cleaning-html/whitelist-sanitizer + * Wrapper around Jsoup clean method with the basic Safe list + * http://jsoup.org/cookbook/cleaning-html/safelist-sanitizer * @param unsafe * @return */ @@ -33,18 +33,18 @@ public static String sanitizeBasicHTML(String unsafe) { // basic includes: a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li, ol, p, pre, q, small, span, strike, strong, sub, sup, u, ul //Whitelist wl = Whitelist.basic().addTags("img", "h1", "h2", "h3", "kbd", "hr", "s", "del"); - Whitelist wl = Whitelist.basicWithImages().addTags("h1", "h2", "h3", "kbd", "hr", "s", "del", "map", "area").addAttributes("img", "usemap") + Safelist sl = Safelist.basicWithImages().addTags("h1", "h2", "h3", "kbd", "hr", "s", "del", "map", "area").addAttributes("img", "usemap") .addAttributes("map", "name").addAttributes("area", "shape", "coords", "href", "title", "alt") .addEnforcedAttribute("a", "target", "_blank"); - return Jsoup.clean(unsafe, wl); + return Jsoup.clean(unsafe, sl); } /** * Strip all HTMl tags * - * http://jsoup.org/apidocs/org/jsoup/safety/Whitelist.html#none%28%29 + * http://jsoup.org/apidocs/org/jsoup/safety/Safelist.html#none * * @param unsafe * @return @@ -55,7 +55,7 @@ public static String stripAllTags(String unsafe) { return null; } - return Parser.unescapeEntities(Jsoup.clean(unsafe, Whitelist.none()), true); + return Parser.unescapeEntities(Jsoup.clean(unsafe, Safelist.none()), true); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index bd27405fae5..7abd0d02065 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -422,7 +422,7 @@ public static long getLongLimitFromStringOrDefault(String limitSetting, Long def if (limitSetting != null && !limitSetting.equals("")) { try { - limit = new Long(limitSetting); + limit = Long.valueOf(limitSetting); } catch (NumberFormatException nfe) { limit = null; } @@ -431,12 +431,12 @@ public static long getLongLimitFromStringOrDefault(String limitSetting, Long def return limit != null ? limit : defaultValue; } - static int getIntLimitFromStringOrDefault(String limitSetting, Integer defaultValue) { + public static int getIntLimitFromStringOrDefault(String limitSetting, Integer defaultValue) { Integer limit = null; if (limitSetting != null && !limitSetting.equals("")) { try { - limit = new Integer(limitSetting); + limit = Integer.valueOf(limitSetting); } catch (NumberFormatException nfe) { limit = null; } @@ -579,7 +579,7 @@ public Integer getSearchHighlightFragmentSize() { } return null; } - + public long getTabularIngestSizeLimit() { // This method will return the blanket ingestable size limit, if // set on the system. I.e., the universal limit that applies to all @@ -856,7 +856,14 @@ public enum FileUploadMethods { * Traditional Dataverse file handling, which tends to involve users * uploading and downloading files using a browser or APIs. */ - NATIVE("native/http"); + NATIVE("native/http"), + + /** + * Upload through Globus of large files + */ + + GLOBUS("globus") + ; private final String text; @@ -896,7 +903,9 @@ public enum FileDownloadMethods { * go through Glassfish. */ RSYNC("rsal/rsync"), - NATIVE("native/http"); + NATIVE("native/http"), + GLOBUS("globus") + ; private final String text; private FileDownloadMethods(final String text) { @@ -984,15 +993,19 @@ public boolean isPublicInstall(){ } public boolean isRsyncUpload(){ - return getUploadMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString()); + return getMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString(), true); } - + + public boolean isGlobusUpload(){ + return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), true); + } + // Controls if HTTP upload is enabled for both GUI and API. public boolean isHTTPUpload(){ - return getUploadMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString()); + return getMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString(), true); } - public boolean isRsyncOnly(){ + public boolean isRsyncOnly(){ String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods); if(downloadMethods == null){ return false; @@ -1005,26 +1018,37 @@ public boolean isRsyncOnly(){ return false; } else { return Arrays.asList(uploadMethods.toLowerCase().split("\\s*,\\s*")).size() == 1 && uploadMethods.toLowerCase().equals(SystemConfig.FileUploadMethods.RSYNC.toString()); - } + } } public boolean isRsyncDownload() { - String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods); - return downloadMethods !=null && downloadMethods.toLowerCase().contains(SystemConfig.FileDownloadMethods.RSYNC.toString()); + return getMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString(), false); } public boolean isHTTPDownload() { - String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods); - logger.warning("Download Methods:" + downloadMethods); - return downloadMethods !=null && downloadMethods.toLowerCase().contains(SystemConfig.FileDownloadMethods.NATIVE.toString()); + return getMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString(), false); + } + + public boolean isGlobusDownload() { + return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), false); } - private Boolean getUploadMethodAvailable(String method){ - String uploadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.UploadMethods); - if (uploadMethods==null){ + public boolean isGlobusFileDownload() { + return (isGlobusDownload() && settingsService.isTrueForKey(SettingsServiceBean.Key.GlobusSingleFileTransfer, false)); + } + + public List getGlobusStoresList() { + String globusStores = settingsService.getValueForKey(SettingsServiceBean.Key.GlobusStores, ""); + return Arrays.asList(globusStores.split("\\s*,\\s*")); + } + + private Boolean getMethodAvailable(String method, boolean upload) { + String methods = settingsService.getValueForKey( + upload ? SettingsServiceBean.Key.UploadMethods : SettingsServiceBean.Key.DownloadMethods); + if (methods == null) { return false; } else { - return Arrays.asList(uploadMethods.toLowerCase().split("\\s*,\\s*")).contains(method); + return Arrays.asList(methods.toLowerCase().split("\\s*,\\s*")).contains(method); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java new file mode 100644 index 00000000000..b3d5f9d6b74 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -0,0 +1,250 @@ +package edu.harvard.iq.dataverse.util; + +import java.util.Arrays; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; + +public class URLTokenUtil { + + protected static final Logger logger = Logger.getLogger(URLTokenUtil.class.getCanonicalName()); + protected final DataFile dataFile; + protected final Dataset dataset; + protected final FileMetadata fileMetadata; + protected ApiToken apiToken; + protected String localeCode; + + /** + * File level + * + * @param dataFile Required. + * @param apiToken The apiToken can be null + * @param fileMetadata Required. + * @param localeCode optional. + * + */ + public URLTokenUtil(DataFile dataFile, ApiToken apiToken, FileMetadata fileMetadata, String localeCode) + throws IllegalArgumentException { + if (dataFile == null) { + String error = "A DataFile is required."; + logger.warning("Error in URLTokenUtil constructor: " + error); + throw new IllegalArgumentException(error); + } + if (fileMetadata == null) { + String error = "A FileMetadata is required."; + logger.warning("Error in URLTokenUtil constructor: " + error); + throw new IllegalArgumentException(error); + } + this.dataFile = dataFile; + this.dataset = fileMetadata.getDatasetVersion().getDataset(); + this.fileMetadata = fileMetadata; + this.apiToken = apiToken; + this.localeCode = localeCode; + } + + /** + * Dataset level + * + * @param dataset Required. + * @param apiToken The apiToken can be null + */ + public URLTokenUtil(Dataset dataset, ApiToken apiToken, String localeCode) { + this(dataset, null, apiToken, localeCode); + } + + /** + * Dataset level + * + * @param dataset Required. + * @param datafile Optional. + * @param apiToken Optional The apiToken can be null + * @localeCode Optional + * + */ + public URLTokenUtil(Dataset dataset, DataFile datafile, ApiToken apiToken, String localeCode) { + if (dataset == null) { + String error = "A Dataset is required."; + logger.warning("Error in URLTokenUtil constructor: " + error); + throw new IllegalArgumentException(error); + } + this.dataset = dataset; + this.dataFile = datafile; + this.fileMetadata = null; + this.apiToken = apiToken; + this.localeCode = localeCode; + } + + public DataFile getDataFile() { + return dataFile; + } + + public FileMetadata getFileMetadata() { + return fileMetadata; + } + + public ApiToken getApiToken() { + return apiToken; + } + + public String getLocaleCode() { + return localeCode; + } + + public String getQueryParam(String key, String value) { + String tokenValue = null; + tokenValue = getTokenValue(value); + if (tokenValue != null) { + return key + '=' + tokenValue; + } else { + return null; + } + } + + /** + * Tries to replace all occurrences of {} with the value for the + * corresponding ReservedWord + * + * @param url - the input string in which to replace tokens, normally a url + * @throws IllegalArgumentException if there is no matching ReservedWord or if + * the configuation of this instance doesn't + * have values for this ReservedWord (e.g. + * asking for FILE_PID when using the dataset + * constructor, etc.) + */ + public String replaceTokensWithValues(String url) { + String newUrl = url; + Pattern pattern = Pattern.compile("(\\{.*?\\})"); + Matcher matcher = pattern.matcher(url); + while(matcher.find()) { + String token = matcher.group(1); + ReservedWord reservedWord = ReservedWord.fromString(token); + String tValue = getTokenValue(token); + logger.fine("Replacing " + reservedWord.toString() + " with " + tValue + " in " + newUrl); + newUrl = newUrl.replace(reservedWord.toString(), tValue); + } + return newUrl; + } + + private String getTokenValue(String value) { + ReservedWord reservedWord = ReservedWord.fromString(value); + switch (reservedWord) { + case FILE_ID: + // getDataFile is never null for file tools because of the constructor + return getDataFile().getId().toString(); + case FILE_PID: + GlobalId filePid = getDataFile().getGlobalId(); + if (filePid != null) { + return getDataFile().getGlobalId().asString(); + } + break; + case SITE_URL: + return SystemConfig.getDataverseSiteUrlStatic(); + case API_TOKEN: + String apiTokenString = null; + ApiToken theApiToken = getApiToken(); + if (theApiToken != null) { + apiTokenString = theApiToken.getTokenString(); + } + return apiTokenString; + case DATASET_ID: + return dataset.getId().toString(); + case DATASET_PID: + return dataset.getGlobalId().asString(); + case DATASET_VERSION: + String versionString = null; + if (fileMetadata != null) { // true for file case + versionString = fileMetadata.getDatasetVersion().getFriendlyVersionNumber(); + } else { // Dataset case - return the latest visible version (unless/until the dataset + // case allows specifying a version) + if (getApiToken() != null) { + versionString = dataset.getLatestVersion().getFriendlyVersionNumber(); + } else { + versionString = dataset.getLatestVersionForCopy().getFriendlyVersionNumber(); + } + } + if (("DRAFT").equals(versionString)) { + versionString = ":draft"; // send the token needed in api calls that can be substituted for a numeric + // version. + } + return versionString; + case FILE_METADATA_ID: + if (fileMetadata != null) { // true for file case + return fileMetadata.getId().toString(); + } + case LOCALE_CODE: + return getLocaleCode(); + default: + break; + } + throw new IllegalArgumentException("Cannot replace reserved word: " + value); + } + + public static String getScriptForUrl(String url) { + String msg = BundleUtil.getStringFromBundle("externaltools.enable.browser.popups"); + String script = "const newWin = window.open('" + url + "', target='_blank'); if (!newWin || newWin.closed || typeof newWin.closed == \"undefined\") {alert(\"" + msg + "\");}"; + return script; + } + + public enum ReservedWord { + + // TODO: Research if a format like "{reservedWord}" is easily parse-able or if + // another format would be + // better. The choice of curly braces is somewhat arbitrary, but has been + // observed in documentation for + // various REST APIs. For example, "Variable substitutions will be made when a + // variable is named in {brackets}." + // from https://swagger.io/specification/#fixed-fields-29 but that's for URLs. + FILE_ID("fileId"), FILE_PID("filePid"), SITE_URL("siteUrl"), API_TOKEN("apiToken"), + // datasetId is the database id + DATASET_ID("datasetId"), + // datasetPid is the DOI or Handle + DATASET_PID("datasetPid"), DATASET_VERSION("datasetVersion"), FILE_METADATA_ID("fileMetadataId"), + LOCALE_CODE("localeCode"); + + private final String text; + private final String START = "{"; + private final String END = "}"; + + private ReservedWord(final String text) { + this.text = START + text + END; + } + + /** + * This is a centralized method that enforces that only reserved words are + * allowed to be used by external tools. External tool authors cannot pass their + * own query parameters through Dataverse such as "mode=mode1". + * + * @throws IllegalArgumentException + */ + public static ReservedWord fromString(String text) throws IllegalArgumentException { + if (text != null) { + for (ReservedWord reservedWord : ReservedWord.values()) { + if (text.equals(reservedWord.text)) { + return reservedWord; + } + } + } + // TODO: Consider switching to a more informative message that enumerates the + // valid reserved words. + boolean moreInformativeMessage = false; + if (moreInformativeMessage) { + throw new IllegalArgumentException( + "Unknown reserved word: " + text + ". A reserved word must be one of these values: " + + Arrays.asList(ReservedWord.values()) + "."); + } else { + throw new IllegalArgumentException("Unknown reserved word: " + text); + } + } + + @Override + public String toString() { + return text; + } + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java new file mode 100644 index 00000000000..b11334520e6 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -0,0 +1,151 @@ +package edu.harvard.iq.dataverse.util; + +import java.net.URL; +import java.nio.charset.Charset; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.http.NameValuePair; +import org.apache.http.client.utils.URLEncodedUtils; +import org.joda.time.LocalDateTime; + +/** + * Simple class to sign/validate URLs. + * + */ +public class UrlSignerUtil { + + private static final Logger logger = Logger.getLogger(UrlSignerUtil.class.getName()); + + /** + * + * @param baseUrl - the URL to sign - cannot contain query params + * "until","user", "method", or "token" + * @param timeout - how many minutes to make the URL valid for (note - time skew + * between the creator and receiver could affect the validation + * @param user - a string representing the user - should be understood by the + * creator/receiver + * @param method - one of the HTTP methods + * @param key - a secret key shared by the creator/receiver. In Dataverse + * this could be an APIKey (when sending URL to a tool that will + * use it to retrieve info from Dataverse) + * @return - the signed URL + */ + public static String signUrl(String baseUrl, Integer timeout, String user, String method, String key) { + StringBuilder signedUrl = new StringBuilder(baseUrl); + + boolean firstParam = true; + if (baseUrl.contains("?")) { + firstParam = false; + } + if (timeout != null) { + LocalDateTime validTime = LocalDateTime.now(); + validTime = validTime.plusMinutes(timeout); + validTime.toString(); + signedUrl.append(firstParam ? "?" : "&").append("until=").append(validTime); + firstParam = false; + } + if (user != null) { + signedUrl.append(firstParam ? "?" : "&").append("user=").append(user); + firstParam = false; + } + if (method != null) { + signedUrl.append(firstParam ? "?" : "&").append("method=").append(method); + firstParam=false; + } + signedUrl.append(firstParam ? "?" : "&").append("token="); + logger.fine("String to sign: " + signedUrl.toString() + ""); + signedUrl.append(DigestUtils.sha512Hex(signedUrl.toString() + key)); + logger.fine("Generated Signed URL: " + signedUrl.toString()); + if (logger.isLoggable(Level.FINE)) { + logger.fine( + "URL signature is " + (isValidUrl(signedUrl.toString(), user, method, key) ? "valid" : "invalid")); + } + return signedUrl.toString(); + } + + /** + * This method will only return true if the URL and parameters except the + * "token" are unchanged from the original/match the values sent to this method, + * and the "token" parameter matches what this method recalculates using the + * shared key THe method also assures that the "until" timestamp is after the + * current time. + * + * @param signedUrl - the signed URL as received from Dataverse + * @param method - an HTTP method. If provided, the method in the URL must + * match + * @param user - a string representing the user, if provided the value must + * match the one in the url + * @param key - the shared secret key to be used in validation + * @return - true if valid, false if not: e.g. the key is not the same as the + * one used to generate the "token" any part of the URL preceding the + * "token" has been altered the method doesn't match (e.g. the server + * has received a POST request and the URL only allows GET) the user + * string doesn't match (e.g. the server knows user A is logged in, but + * the URL is only for user B) the url has expired (was used after the + * until timestamp) + */ + public static boolean isValidUrl(String signedUrl, String user, String method, String key) { + boolean valid = true; + try { + URL url = new URL(signedUrl); + List params = URLEncodedUtils.parse(url.getQuery(), Charset.forName("UTF-8")); + String hash = null; + String dateString = null; + String allowedMethod = null; + String allowedUser = null; + for (NameValuePair nvp : params) { + if (nvp.getName().equals("token")) { + hash = nvp.getValue(); + logger.fine("Hash: " + hash); + } + if (nvp.getName().equals("until")) { + dateString = nvp.getValue(); + logger.fine("Until: " + dateString); + } + if (nvp.getName().equals("method")) { + allowedMethod = nvp.getValue(); + logger.fine("Method: " + allowedMethod); + } + if (nvp.getName().equals("user")) { + allowedUser = nvp.getValue(); + logger.fine("User: " + allowedUser); + } + } + + int index = signedUrl.indexOf(((dateString==null && allowedMethod==null && allowedUser==null) ? "?":"&") + "token="); + // Assuming the token is last - doesn't have to be, but no reason for the URL + // params to be rearranged either, and this should only cause false negatives if + // it does happen + String urlToHash = signedUrl.substring(0, index + 7); + logger.fine("String to hash: " + urlToHash + ""); + String newHash = DigestUtils.sha512Hex(urlToHash + key); + logger.fine("Calculated Hash: " + newHash); + if (!hash.equals(newHash)) { + logger.fine("Hash doesn't match"); + valid = false; + } + if (dateString != null && LocalDateTime.parse(dateString).isBefore(LocalDateTime.now())) { + logger.fine("Url is expired"); + valid = false; + } + if (method != null && !method.equals(allowedMethod)) { + logger.fine("Method doesn't match"); + valid = false; + } + if (user != null && !user.equals(allowedUser)) { + logger.fine("User doesn't match"); + valid = false; + } + } catch (Throwable t) { + // Want to catch anything like null pointers, etc. to force valid=false upon any + // error + logger.warning("Bad URL: " + signedUrl + " : " + t.getMessage()); + valid = false; + } + return valid; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 56676e3d00a..920f80901be 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -47,6 +47,7 @@ import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.text.WordUtils; import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -58,7 +59,7 @@ import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.TrustSelfSignedStrategy; import org.apache.http.ssl.SSLContextBuilder; - +import org.apache.http.util.EntityUtils; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; @@ -72,6 +73,7 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFile.ChecksumType; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; public class BagGenerator { @@ -89,7 +91,8 @@ public class BagGenerator { private int timeout = 60; private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000).build(); + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).build(); protected CloseableHttpClient client; private PoolingHttpClientConnectionManager cm = null; @@ -204,7 +207,9 @@ public boolean generateBag(OutputStream outputStream) throws Exception { // The oremapObject is javax.json.JsonObject and we need com.google.gson.JsonObject for the aggregation object aggregation = (JsonObject) new JsonParser().parse(oremapObject.getJsonObject(JsonLDTerm.ore("describes").getLabel()).toString()); - bagID = aggregation.get("@id").getAsString() + "v." + String pidUrlString = aggregation.get("@id").getAsString(); + String pidString=GlobalId.getInternalFormOfPID(pidUrlString); + bagID = pidString + "v." + aggregation.get(JsonLDTerm.schemaOrg("version").getLabel()).getAsString(); logger.info("Generating Bag: " + bagID); @@ -278,7 +283,8 @@ public boolean generateBag(OutputStream outputStream) throws Exception { } createFileFromString(manifestName, sha1StringBuffer.toString()); } else { - logger.warning("No Hash values sent - Bag File does not meet BagIT specification requirement"); + logger.warning("No Hash values (no files?) sending empty manifest to nominally comply with BagIT specification requirement"); + createFileFromString("manifest-md5.txt", ""); } // bagit.txt - Required by spec createFileFromString("bagit.txt", "BagIt-Version: 1.0\r\nTag-File-Character-Encoding: UTF-8"); @@ -360,6 +366,7 @@ public boolean generateBag(String bagName, boolean temp) { // Create an output stream backed by the file bagFileOS = new FileOutputStream(bagFile); if (generateBag(bagFileOS)) { + //The generateBag call sets this.bagName to the correct value validateBagFile(bagFile); if (usetemp) { logger.fine("Moving tmp zip"); @@ -385,7 +392,8 @@ public void validateBag(String bagId) { ZipFile zf = null; InputStream is = null; try { - zf = new ZipFile(getBagFile(bagId)); + File bagFile = getBagFile(bagId); + zf = new ZipFile(bagFile); ZipArchiveEntry entry = zf.getEntry(getValidName(bagId) + "/manifest-sha1.txt"); if (entry != null) { logger.info("SHA1 hashes used"); @@ -425,7 +433,7 @@ public void validateBag(String bagId) { } IOUtils.closeQuietly(is); logger.info("HashMap Map contains: " + checksumMap.size() + " entries"); - checkFiles(checksumMap, zf); + checkFiles(checksumMap, bagFile); } catch (IOException io) { logger.log(Level.SEVERE,"Could not validate Hashes", io); } catch (Exception e) { @@ -454,14 +462,13 @@ public File getBagFile(String bagID) throws Exception { private void validateBagFile(File bagFile) throws IOException { // Run a confirmation test - should verify all files and hashes - ZipFile zf = new ZipFile(bagFile); + // Check files calculates the hashes and file sizes and reports on // whether hashes are correct - checkFiles(checksumMap, zf); + checkFiles(checksumMap, bagFile); logger.info("Data Count: " + dataCount); logger.info("Data Size: " + totalDataSize); - zf.close(); } public static String getValidName(String bagName) { @@ -478,7 +485,7 @@ private void processContainer(JsonObject item, String currentPath) throws IOExce } else if (item.has(JsonLDTerm.schemaOrg("name").getLabel())) { title = item.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString(); } - + logger.fine("Adding " + title + "/ to path " + currentPath); currentPath = currentPath + title + "/"; int containerIndex = -1; try { @@ -554,6 +561,7 @@ private void processContainer(JsonObject item, String currentPath) throws IOExce logger.warning("Duplicate/Collision: " + child.get("@id").getAsString() + " has SHA1 Hash: " + childHash + " in: " + bagID); } + logger.fine("Adding " + childPath + " with hash " + childHash + " to checksumMap"); checksumMap.put(childPath, childHash); } } @@ -697,29 +705,39 @@ private void createFileFromURL(final String relPath, final String uri) addEntry(archiveEntry, supp); } - private void checkFiles(HashMap shaMap, ZipFile zf) { + private void checkFiles(HashMap shaMap, File bagFile) { ExecutorService executor = Executors.newFixedThreadPool(numConnections); - BagValidationJob.setZipFile(zf); - BagValidationJob.setBagGenerator(this); - logger.fine("Validating hashes for zipped data files"); - int i = 0; - for (Entry entry : shaMap.entrySet()) { - BagValidationJob vj = new BagValidationJob(entry.getValue(), entry.getKey()); - executor.execute(vj); - i++; - if (i % 1000 == 0) { - logger.info("Queuing Hash Validations: " + i); - } - } - logger.fine("All Hash Validations Queued: " + i); - - executor.shutdown(); + ZipFile zf = null; try { - while (!executor.awaitTermination(10, TimeUnit.MINUTES)) { - logger.fine("Awaiting completion of hash calculations."); + zf = new ZipFile(bagFile); + + BagValidationJob.setZipFile(zf); + BagValidationJob.setBagGenerator(this); + logger.fine("Validating hashes for zipped data files"); + int i = 0; + for (Entry entry : shaMap.entrySet()) { + BagValidationJob vj = new BagValidationJob(bagName, entry.getValue(), entry.getKey()); + executor.execute(vj); + i++; + if (i % 1000 == 0) { + logger.info("Queuing Hash Validations: " + i); + } } - } catch (InterruptedException e) { - logger.log(Level.SEVERE,"Hash Calculations interrupted", e); + logger.fine("All Hash Validations Queued: " + i); + + executor.shutdown(); + try { + while (!executor.awaitTermination(10, TimeUnit.MINUTES)) { + logger.fine("Awaiting completion of hash calculations."); + } + } catch (InterruptedException e) { + logger.log(Level.SEVERE, "Hash Calculations interrupted", e); + } + } catch (IOException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + } finally { + IOUtils.closeQuietly(zf); } logger.fine("Hash Validations Completed"); @@ -904,8 +922,8 @@ public void incrementTotalDataSize(long inc) { totalDataSize += inc; } - public String getHashtype() { - return hashtype.toString(); + public ChecksumType getHashtype() { + return hashtype; } // Get's all "Has Part" children, standardized to send an array with 0,1, or @@ -990,46 +1008,70 @@ private HttpGet createNewGetRequest(URI url, String returnType) { return request; } - InputStreamSupplier getInputStreamSupplier(final String uri) { + InputStreamSupplier getInputStreamSupplier(final String uriString) { return new InputStreamSupplier() { public InputStream get() { - int tries = 0; - while (tries < 5) { - try { - logger.fine("Get # " + tries + " for " + uri); - HttpGet getMap = createNewGetRequest(new URI(uri), null); - logger.finest("Retrieving " + tries + ": " + uri); - CloseableHttpResponse response; - //Note - if we ever need to pass an HttpClientContext, we need a new one per thread. - response = client.execute(getMap); - if (response.getStatusLine().getStatusCode() == 200) { - logger.finest("Retrieved: " + uri); - return response.getEntity().getContent(); - } - logger.fine("Status: " + response.getStatusLine().getStatusCode()); - tries++; - - } catch (ClientProtocolException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (IOException e) { - // Retry if this is a potentially temporary error such - // as a timeout - tries++; - logger.log(Level.WARNING,"Attempt# " + tries + " : Unable to retrieve file: " + uri, e); - if (tries == 5) { - logger.severe("Final attempt failed for " + uri); + try { + URI uri = new URI(uriString); + + int tries = 0; + while (tries < 5) { + + logger.fine("Get # " + tries + " for " + uriString); + HttpGet getFile = createNewGetRequest(uri, null); + logger.finest("Retrieving " + tries + ": " + uriString); + CloseableHttpResponse response = null; + try { + response = client.execute(getFile); + // Note - if we ever need to pass an HttpClientContext, we need a new one per + // thread. + int statusCode = response.getStatusLine().getStatusCode(); + if (statusCode == 200) { + logger.finest("Retrieved: " + uri); + return response.getEntity().getContent(); + } + logger.warning("Attempt: " + tries + " - Unexpected Status when retrieving " + uriString + + " : " + statusCode); + if (statusCode < 500) { + logger.fine("Will not retry for 40x errors"); + tries += 5; + } else { + tries++; + } + // Error handling + if (response != null) { + try { + EntityUtils.consumeQuietly(response.getEntity()); + response.close(); + } catch (IOException io) { + logger.warning( + "Exception closing response after status: " + statusCode + " on " + uri); + } + } + } catch (ClientProtocolException e) { + tries += 5; + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // Retry if this is a potentially temporary error such + // as a timeout + tries++; + logger.log(Level.WARNING, "Attempt# " + tries + " : Unable to retrieve file: " + uriString, + e); + if (tries == 5) { + logger.severe("Final attempt failed for " + uriString); + } + e.printStackTrace(); } - e.printStackTrace(); - } catch (URISyntaxException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); + } + + } catch (URISyntaxException e) { + // TODO Auto-generated catch block + e.printStackTrace(); } - logger.severe("Could not read: " + uri); + logger.severe("Could not read: " + uriString); return null; } }; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidation.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidation.java index 7535c6d98c0..172e384432f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidation.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidation.java @@ -3,8 +3,11 @@ import java.nio.file.Path; import java.util.Collections; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * @@ -16,7 +19,7 @@ public class BagValidation { private final Map fileResults; public BagValidation(Optional errorMessage) { - this.errorMessage = errorMessage; + this.errorMessage = errorMessage == null ? Optional.empty() : errorMessage; this.fileResults = new LinkedHashMap<>(); } @@ -34,6 +37,12 @@ public Map getFileResults() { return Collections.unmodifiableMap(fileResults); } + public List getAllErrors() { + Stream mainError = getErrorMessage().stream(); + Stream fileErrors = getFileResults().values().stream().filter(result -> result.isError()).map(result -> result.getMessage()); + return Stream.concat(mainError, fileErrors).collect(Collectors.toList()); + } + public long errors() { return fileResults.values().stream().filter(result -> result.isError()).count(); } @@ -76,8 +85,9 @@ public void setSuccess() { this.status = Status.SUCCESS; } - public void setError() { + public void setError(String message) { this.status = Status.ERROR; + this.message = message; } public boolean isPending() { @@ -92,10 +102,6 @@ public boolean isError() { return status.equals(Status.ERROR); } - public void setMessage(String message) { - this.message = message; - } - public String getMessage() { return message; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java index 7a32b96f4a0..7ac9fd701b8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java @@ -25,6 +25,7 @@ import org.apache.commons.compress.archivers.zip.ZipFile; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataFile.ChecksumType; import org.apache.commons.compress.utils.IOUtils; @@ -41,13 +42,15 @@ public class BagValidationJob implements Runnable { private String hash; private String name; - private static String hashtype; + private String basePath; + private static ChecksumType hashtype; - public BagValidationJob(String value, String key) throws IllegalStateException { + public BagValidationJob(String bagName, String value, String key) throws IllegalStateException { if (zf == null || bagGenerator == null) { throw new IllegalStateException( "Static Zipfile and BagGenerator must be set before creating ValidationJobs"); } + basePath=bagName; hash = value; name = key; @@ -60,11 +63,11 @@ public BagValidationJob(String value, String key) throws IllegalStateException { */ public void run() { - String realHash = generateFileHash(name, zf); + String realHash = generateFileHash(basePath + "/" + name, zf); if (hash.equals(realHash)) { log.fine("Valid hash for " + name); } else { - log.severe("Invalid " + bagGenerator.getHashtype() + " for " + name); + log.severe("Invalid " + bagGenerator.getHashtype().name() + " for " + name); log.fine("As sent: " + hash); log.fine("As calculated: " + realHash); } @@ -72,12 +75,16 @@ public void run() { private String generateFileHash(String name, ZipFile zf) { + String realHash = null; + ZipArchiveEntry archiveEntry1 = zf.getEntry(name); + + if(archiveEntry1 != null) { // Error check - add file sizes to compare against supplied stats - + log.fine("Getting stream for " + name); long start = System.currentTimeMillis(); InputStream inputStream = null; - String realHash = null; + try { inputStream = zf.getInputStream(archiveEntry1); if (hashtype.equals(DataFile.ChecksumType.SHA1)) { @@ -89,7 +96,7 @@ private String generateFileHash(String name, ZipFile zf) { } else if (hashtype.equals(DataFile.ChecksumType.MD5)) { realHash = DigestUtils.md5Hex(inputStream); } else { - log.warning("Unknown hash type: " + hashtype); + log.warning("Unknown hash type: " + hashtype.name()); } } catch (ZipException e) { @@ -104,6 +111,9 @@ private String generateFileHash(String name, ZipFile zf) { log.fine("Retrieve/compute time = " + (System.currentTimeMillis() - start) + " ms"); // Error check - add file sizes to compare against supplied stats bagGenerator.incrementTotalDataSize(archiveEntry1.getSize()); + } else { + log.warning("Entry " + name + " not found in zipped bag: not validated"); + } return realHash; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidator.java index 14a813ec618..a9052bf4c80 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidator.java @@ -118,8 +118,7 @@ private BagValidation validateChecksums(FileDataProvider fileDataProvider, Manif FileChecksumValidationJob validationJob = new FileChecksumValidationJob(inputStreamProvider.get(), filePath, fileChecksum, manifestChecksums.getType(), fileValidationResult); executor.execute(validationJob); } else { - fileValidationResult.setError(); - fileValidationResult.setMessage(getMessage("bagit.validation.file.not.found", filePath, fileDataProvider.getName())); + fileValidationResult.setError(getMessage("bagit.validation.file.not.found", filePath, fileDataProvider.getName())); } } @@ -148,7 +147,8 @@ ExecutorService getExecutorService() { return Executors.newFixedThreadPool(validatorJobPoolSize); } - private String getMessage(String propertyKey, Object... parameters){ + // Visible for testing + String getMessage(String propertyKey, Object... parameters){ List parameterList = Arrays.stream(parameters).map(param -> param.toString()).collect(Collectors.toList()); return BundleUtil.getStringFromBundle(propertyKey, parameterList); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/FileChecksumValidationJob.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/FileChecksumValidationJob.java index 8bf133248ea..5bea2fe3ced 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/FileChecksumValidationJob.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/FileChecksumValidationJob.java @@ -43,12 +43,10 @@ public void run() { if (fileChecksum.equals(calculatedChecksum)) { result.setSuccess(); } else { - result.setError(); - result.setMessage(getMessage("bagit.checksum.validation.error", filePath, bagChecksumType, fileChecksum, calculatedChecksum)); + result.setError(getMessage("bagit.checksum.validation.error", filePath, bagChecksumType, fileChecksum, calculatedChecksum)); } } catch (Exception e) { - result.setError(); - result.setMessage(getMessage("bagit.checksum.validation.exception", filePath, bagChecksumType, e.getMessage())); + result.setError(getMessage("bagit.checksum.validation.exception", filePath, bagChecksumType, e.getMessage())); logger.log(Level.WARNING, String.format("action=validate-checksum result=error filePath=%s type=%s", filePath, bagChecksumType), e); } finally { IOUtils.closeQuietly(inputStream); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index 19d9c2931ae..a6d85e1addb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetFieldType; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; @@ -86,7 +87,7 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except localContext.putIfAbsent(JsonLDNamespace.schema.getPrefix(), JsonLDNamespace.schema.getUrl()); Dataset dataset = version.getDataset(); - String id = dataset.getGlobalId().asString(); + String id = dataset.getGlobalId().toURL().toExternalForm(); JsonArrayBuilder fileArray = Json.createArrayBuilder(); // The map describes an aggregation JsonObjectBuilder aggBuilder = Json.createObjectBuilder(); @@ -96,82 +97,11 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except for (DatasetField field : fields) { if (!field.isEmpty()) { DatasetFieldType dfType = field.getDatasetFieldType(); - if (excludeEmail && DatasetFieldType.FieldType.EMAIL.equals(dfType.getFieldType())) { - continue; - } JsonLDTerm fieldName = dfType.getJsonLDTerm(); - if (fieldName.inNamespace()) { - localContext.putIfAbsent(fieldName.getNamespace().getPrefix(), fieldName.getNamespace().getUrl()); - } else { - localContext.putIfAbsent(fieldName.getLabel(), fieldName.getUrl()); - } - JsonArrayBuilder vals = Json.createArrayBuilder(); - if (!dfType.isCompound()) { - for (String val : field.getValues_nondisplay()) { - if (cvocMap.containsKey(dfType.getId())) { - try { - JsonObject cvocEntry = cvocMap.get(dfType.getId()); - if (cvocEntry.containsKey("retrieval-filtering")) { - JsonObject filtering = cvocEntry.getJsonObject("retrieval-filtering"); - JsonObject context = filtering.getJsonObject("@context"); - for (String prefix : context.keySet()) { - localContext.putIfAbsent(prefix, context.getString(prefix)); - } - vals.add(datasetFieldService.getExternalVocabularyValue(val)); - } else { - vals.add(val); - } - } catch(Exception e) { - logger.warning("Couldn't interpret value for : " + val + " : " + e.getMessage()); - logger.log(Level.FINE, ExceptionUtils.getStackTrace(e)); - vals.add(val); - } - } else { - vals.add(val); - } - } - } else { - // ToDo: Needs to be recursive (as in JsonPrinter?) - for (DatasetFieldCompoundValue dscv : field.getDatasetFieldCompoundValues()) { - // compound values are of different types - JsonObjectBuilder child = Json.createObjectBuilder(); - - for (DatasetField dsf : dscv.getChildDatasetFields()) { - DatasetFieldType dsft = dsf.getDatasetFieldType(); - if (excludeEmail && DatasetFieldType.FieldType.EMAIL.equals(dsft.getFieldType())) { - continue; - } - // which may have multiple values - if (!dsf.isEmpty()) { - // Add context entry - // ToDo - also needs to recurse here? - JsonLDTerm subFieldName = dsft.getJsonLDTerm(); - if (subFieldName.inNamespace()) { - localContext.putIfAbsent(subFieldName.getNamespace().getPrefix(), - subFieldName.getNamespace().getUrl()); - } else { - localContext.putIfAbsent(subFieldName.getLabel(), subFieldName.getUrl()); - } - - List values = dsf.getValues_nondisplay(); - if (values.size() > 1) { - JsonArrayBuilder childVals = Json.createArrayBuilder(); - - for (String val : dsf.getValues_nondisplay()) { - childVals.add(val); - } - child.add(subFieldName.getLabel(), childVals); - } else { - child.add(subFieldName.getLabel(), values.get(0)); - } - } - } - vals.add(child); - } + JsonValue jv = getJsonLDForField(field, excludeEmail, cvocMap, localContext); + if(jv!=null) { + aggBuilder.add(fieldName.getLabel(), jv); } - // Add metadata value to aggregation, suppress array when only one value - JsonArray valArray = vals.build(); - aggBuilder.add(fieldName.getLabel(), (valArray.size() != 1) ? valArray : valArray.get(0)); } } // Add metadata related to the Dataset/DatasetVersion @@ -214,9 +144,11 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except } aggBuilder.add(JsonLDTerm.schemaOrg("includedInDataCatalog").getLabel(), - BrandingUtil.getRootDataverseCollectionName()); + BrandingUtil.getInstallationBrandName()); + + aggBuilder.add(JsonLDTerm.schemaOrg("isPartOf").getLabel(), getDataverseDescription(dataset.getOwner())); String mdl = dataset.getMetadataLanguage(); - if(!mdl.equals(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE)) { + if (DvObjectContainer.isMetadataLanguageSet(mdl)) { aggBuilder.add(JsonLDTerm.schemaOrg("inLanguage").getLabel(), mdl); } @@ -234,7 +166,32 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except } else { addIfNotNull(aggRes, JsonLDTerm.schemaOrg("description"), df.getDescription()); } - addIfNotNull(aggRes, JsonLDTerm.schemaOrg("name"), fmd.getLabel()); // "label" is the filename + String fileName = fmd.getLabel();// "label" is the filename + long fileSize = df.getFilesize(); + String mimeType = df.getContentType(); + String currentIngestedName = null; + boolean ingested=df.getOriginalFileName()!= null || df.getOriginalFileSize()!=null || df.getOriginalFileFormat()!=null; + if(ingested) { + if(df.getOriginalFileName()!=null) { + currentIngestedName= fileName; + fileName = df.getOriginalFileName(); + } else { + logger.warning("Missing Original file name for id: " + df.getId()); + } + if(df.getOriginalFileSize()!=null) { + fileSize = df.getOriginalFileSize(); + } else { + logger.warning("Missing Original file size for id: " + df.getId()); + } + if(df.getOriginalFileFormat()!=null) { + mimeType = df.getOriginalFileFormat(); + } else { + logger.warning("Missing Original file format for id: " + df.getId()); + } + + + } + addIfNotNull(aggRes, JsonLDTerm.schemaOrg("name"), fileName); addIfNotNull(aggRes, JsonLDTerm.restricted, fmd.isRestricted()); addIfNotNull(aggRes, JsonLDTerm.directoryLabel, fmd.getDirectoryLabel()); addIfNotNull(aggRes, JsonLDTerm.schemaOrg("version"), fmd.getVersion()); @@ -257,21 +214,20 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except if (df.getGlobalId().asString().length() != 0) { fileId = df.getGlobalId().asString(); fileSameAs = SystemConfig.getDataverseSiteUrlStatic() - + "/api/access/datafile/:persistentId?persistentId=" + fileId; + + "/api/access/datafile/:persistentId?persistentId=" + fileId + (ingested ? "&format=original":""); } else { fileId = SystemConfig.getDataverseSiteUrlStatic() + "/file.xhtml?fileId=" + df.getId(); - fileSameAs = SystemConfig.getDataverseSiteUrlStatic() + "/api/access/datafile/" + df.getId(); + fileSameAs = SystemConfig.getDataverseSiteUrlStatic() + "/api/access/datafile/" + df.getId() + (ingested ? "?format=original":""); } aggRes.add("@id", fileId); aggRes.add(JsonLDTerm.schemaOrg("sameAs").getLabel(), fileSameAs); fileArray.add(fileId); aggRes.add("@type", JsonLDTerm.ore("AggregatedResource").getLabel()); - addIfNotNull(aggRes, JsonLDTerm.schemaOrg("fileFormat"), df.getContentType()); - addIfNotNull(aggRes, JsonLDTerm.filesize, df.getFilesize()); + addIfNotNull(aggRes, JsonLDTerm.schemaOrg("fileFormat"), mimeType); + addIfNotNull(aggRes, JsonLDTerm.filesize, fileSize); addIfNotNull(aggRes, JsonLDTerm.storageIdentifier, df.getStorageIdentifier()); - addIfNotNull(aggRes, JsonLDTerm.originalFileFormat, df.getOriginalFileFormat()); - addIfNotNull(aggRes, JsonLDTerm.originalFormatLabel, df.getOriginalFormatLabel()); + addIfNotNull(aggRes, JsonLDTerm.currentIngestedName, currentIngestedName); addIfNotNull(aggRes, JsonLDTerm.UNF, df.getUnf()); addIfNotNull(aggRes, JsonLDTerm.rootDataFileId, df.getRootDataFileId()); addIfNotNull(aggRes, JsonLDTerm.previousDataFileId, df.getPreviousDataFileId()); @@ -320,6 +276,17 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except } } + private JsonObjectBuilder getDataverseDescription(Dataverse dv) { + //Schema.org is already in local context, no updates needed as long as we only use chemaOrg and "@id" here + JsonObjectBuilder dvjob = Json.createObjectBuilder().add(JsonLDTerm.schemaOrg("name").getLabel(), dv.getCurrentName()).add("@id", dv.getLocalURL()); + addIfNotNull(dvjob, JsonLDTerm.schemaOrg("description"), dv.getDescription()); + Dataverse owner = dv.getOwner(); + if(owner!=null) { + dvjob.add(JsonLDTerm.schemaOrg("isPartOf").getLabel(), getDataverseDescription(owner)); + } + return dvjob; + } + /* * Simple methods to only add an entry to JSON if the value of the term is * non-null. Methods created for string, JsonValue, boolean, and long @@ -389,6 +356,89 @@ private JsonLDTerm getTermFor(String fieldTypeName) { } return null; } + + public static JsonValue getJsonLDForField(DatasetField field, Boolean excludeEmail, Map cvocMap, + Map localContext) { + + DatasetFieldType dfType = field.getDatasetFieldType(); + if (excludeEmail && DatasetFieldType.FieldType.EMAIL.equals(dfType.getFieldType())) { + return null; + } + + JsonLDTerm fieldName = dfType.getJsonLDTerm(); + if (fieldName.inNamespace()) { + localContext.putIfAbsent(fieldName.getNamespace().getPrefix(), fieldName.getNamespace().getUrl()); + } else { + localContext.putIfAbsent(fieldName.getLabel(), fieldName.getUrl()); + } + JsonArrayBuilder vals = Json.createArrayBuilder(); + if (!dfType.isCompound()) { + for (String val : field.getValues_nondisplay()) { + if (cvocMap.containsKey(dfType.getId())) { + try { + JsonObject cvocEntry = cvocMap.get(dfType.getId()); + if (cvocEntry.containsKey("retrieval-filtering")) { + JsonObject filtering = cvocEntry.getJsonObject("retrieval-filtering"); + JsonObject context = filtering.getJsonObject("@context"); + for (String prefix : context.keySet()) { + localContext.putIfAbsent(prefix, context.getString(prefix)); + } + vals.add(datasetFieldService.getExternalVocabularyValue(val)); + } else { + vals.add(val); + } + } catch (Exception e) { + logger.warning("Couldn't interpret value for : " + val + " : " + e.getMessage()); + logger.log(Level.FINE, ExceptionUtils.getStackTrace(e)); + vals.add(val); + } + } else { + vals.add(val); + } + } + } else { + // ToDo: Needs to be recursive (as in JsonPrinter?) + for (DatasetFieldCompoundValue dscv : field.getDatasetFieldCompoundValues()) { + // compound values are of different types + JsonObjectBuilder child = Json.createObjectBuilder(); + + for (DatasetField dsf : dscv.getChildDatasetFields()) { + DatasetFieldType dsft = dsf.getDatasetFieldType(); + if (excludeEmail && DatasetFieldType.FieldType.EMAIL.equals(dsft.getFieldType())) { + continue; + } + // which may have multiple values + if (!dsf.isEmpty()) { + // Add context entry + // ToDo - also needs to recurse here? + JsonLDTerm subFieldName = dsft.getJsonLDTerm(); + if (subFieldName.inNamespace()) { + localContext.putIfAbsent(subFieldName.getNamespace().getPrefix(), + subFieldName.getNamespace().getUrl()); + } else { + localContext.putIfAbsent(subFieldName.getLabel(), subFieldName.getUrl()); + } + + List values = dsf.getValues_nondisplay(); + if (values.size() > 1) { + JsonArrayBuilder childVals = Json.createArrayBuilder(); + + for (String val : dsf.getValues_nondisplay()) { + childVals.add(val); + } + child.add(subFieldName.getLabel(), childVals); + } else { + child.add(subFieldName.getLabel(), values.get(0)); + } + } + } + vals.add(child); + } + } + // Add metadata value to aggregation, suppress array when only one value + JsonArray valArray = vals.build(); + return (valArray.size() != 1) ? valArray : valArray.get(0); + } public static void injectSettingsService(SettingsServiceBean settingsSvc, DatasetFieldServiceBean datasetFieldSvc) { settingsService = settingsSvc; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java index 701b56d90e3..6162df049f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java @@ -21,7 +21,6 @@ import java.util.List; import java.util.Optional; import java.util.logging.Logger; -import java.util.stream.Collectors; /** * @@ -58,25 +57,25 @@ public CreateDataFileResult handleBagItPackage(SystemConfig systemConfig, Datase try { List packageDataFiles = processBagItPackage(systemConfig, datasetVersion, uploadedFilename, bagItPackageFile); if(packageDataFiles.isEmpty()) { - return CreateDataFileResult.error(FILE_TYPE, Collections.emptyList()); + return CreateDataFileResult.error(uploadedFilename, FILE_TYPE, Collections.emptyList()); } BagValidation bagValidation = validateBagItPackage(uploadedFilename, packageDataFiles); if(bagValidation.success()) { List finalItems = postProcessor.process(packageDataFiles); logger.info(String.format("action=handleBagItPackage result=success uploadedFilename=%s file=%s", uploadedFilename, bagItPackageFile.getName())); - return CreateDataFileResult.success(FILE_TYPE, finalItems); + return CreateDataFileResult.success(uploadedFilename, FILE_TYPE, finalItems); } // BagIt package has errors // Capture errors and return to caller - List errors = bagValidation.getFileResults().values().stream().filter(result -> result.isError()).map(result -> result.getMessage()).collect(Collectors.toList()); + List errors = bagValidation.getAllErrors(); logger.info(String.format("action=handleBagItPackage result=errors uploadedFilename=%s file=%s errors=%s", uploadedFilename, bagItPackageFile.getName(), errors.size())); - return CreateDataFileResult.error(FILE_TYPE, errors); + return CreateDataFileResult.error(uploadedFilename, FILE_TYPE, errors); } catch (BagItFileHandlerException e) { logger.severe(String.format("action=handleBagItPackage result=error uploadedFilename=%s file=%s message=%s", uploadedFilename, bagItPackageFile.getName(), e.getMessage())); - return CreateDataFileResult.error(FILE_TYPE, Arrays.asList(e.getMessage())); + return CreateDataFileResult.error(uploadedFilename, FILE_TYPE, Arrays.asList(e.getMessage())); } finally { fileUtil.deleteFile(bagItPackageFile.toPath()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessor.java b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessor.java index e8dcb3ad2fe..ba233f3f364 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessor.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessor.java @@ -15,7 +15,7 @@ public class BagItFileHandlerPostProcessor { private static final Logger logger = Logger.getLogger(BagItFileHandlerPostProcessor.class.getCanonicalName()); - public static final List FILES_TO_IGNORE = Arrays.asList("__", "._", ".DS_Store", "._.DS_Store"); + public static final List FILES_TO_IGNORE = Arrays.asList("__", "._", ".DS_Store"); public List process(List items) { if(items == null) { @@ -26,7 +26,11 @@ public List process(List items) { for(DataFile item: items) { String fileName = item.getCurrentName(); - if(FILES_TO_IGNORE.contains(fileName)) { + if(fileName == null || fileName.isEmpty()) { + continue; + } + + if(FILES_TO_IGNORE.stream().anyMatch(prefix -> fileName.startsWith(prefix))) { logger.fine(String.format("action=BagItFileHandlerPostProcessor result=ignore-entry file=%s", fileName)); continue; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/file/CreateDataFileResult.java b/src/main/java/edu/harvard/iq/dataverse/util/file/CreateDataFileResult.java index 046ebb10c0f..4e33ffbc75d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/file/CreateDataFileResult.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/file/CreateDataFileResult.java @@ -13,26 +13,32 @@ public class CreateDataFileResult { private static final String BUNDLE_KEY_PREFIX = "dataset.file.error"; + private final String filename; private final String type; private final List dataFiles; private final List errors; - public CreateDataFileResult(String type, List dataFiles, List errors) { + public CreateDataFileResult(String filename, String type, List dataFiles, List errors) { + this.filename = filename; this.type = type; this.dataFiles = dataFiles == null ? null : Collections.unmodifiableList(dataFiles); this.errors = errors == null ? Collections.emptyList() : Collections.unmodifiableList(errors); } - public static CreateDataFileResult success(String type, List dataFiles) { - return new CreateDataFileResult(type, dataFiles, null); + public static CreateDataFileResult success(String filename, String type, List dataFiles) { + return new CreateDataFileResult(filename, type, dataFiles, null); } - public static CreateDataFileResult error(String type) { - return new CreateDataFileResult(type, null, Collections.emptyList()); + public static CreateDataFileResult error(String filename, String type) { + return new CreateDataFileResult(filename, type, null, Collections.emptyList()); } - public static CreateDataFileResult error(String type, List errors) { - return new CreateDataFileResult(type, null, errors); + public static CreateDataFileResult error(String filename, String type, List errors) { + return new CreateDataFileResult(filename, type, null, errors); + } + + public String getFilename() { + return filename; } public String getType() { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java index 465360f84cc..127632bf711 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java @@ -45,7 +45,7 @@ import org.apache.commons.lang3.StringUtils; import com.apicatalog.jsonld.JsonLd; -import com.apicatalog.jsonld.api.JsonLdError; +import com.apicatalog.jsonld.JsonLdError; import com.apicatalog.jsonld.document.JsonDocument; import edu.harvard.iq.dataverse.DatasetVersion.VersionState; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonLDTerm.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonLDTerm.java index 20aeceda7de..065097709cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonLDTerm.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonLDTerm.java @@ -34,8 +34,11 @@ public class JsonLDTerm { public static JsonLDTerm categories = JsonLDTerm.DVCore("categories"); public static JsonLDTerm filesize = JsonLDTerm.DVCore("filesize"); public static JsonLDTerm storageIdentifier = JsonLDTerm.DVCore("storageIdentifier"); + @Deprecated public static JsonLDTerm originalFileFormat = JsonLDTerm.DVCore("originalFileFormat"); + @Deprecated public static JsonLDTerm originalFormatLabel = JsonLDTerm.DVCore("originalFormatLabel"); + public static JsonLDTerm currentIngestedName= JsonLDTerm.DVCore("currentIngestedName"); public static JsonLDTerm UNF = JsonLDTerm.DVCore("UNF"); public static JsonLDTerm rootDataFileId = JsonLDTerm.DVCore("rootDataFileId"); public static JsonLDTerm previousDataFileId = JsonLDTerm.DVCore("previousDataFileId"); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index a2becb20d7d..4ecdc73ae6e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -29,6 +29,7 @@ import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.workflow.Workflow; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepData; import org.apache.commons.validator.routines.DomainValidator; @@ -37,6 +38,7 @@ import java.sql.Timestamp; import java.text.ParseException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Date; import java.util.HashMap; @@ -385,7 +387,11 @@ public DatasetVersion parseDatasetVersion(JsonObject obj, DatasetVersion dsv) th terms.setFileAccessRequest(obj.getBoolean("fileAccessRequest", false)); dsv.setTermsOfUseAndAccess(terms); terms.setDatasetVersion(dsv); - dsv.setDatasetFields(parseMetadataBlocks(obj.getJsonObject("metadataBlocks"))); + JsonObject metadataBlocks = obj.getJsonObject("metadataBlocks"); + if (metadataBlocks == null){ + throw new JsonParseException(BundleUtil.getStringFromBundle("jsonparser.error.metadatablocks.not.found")); + } + dsv.setDatasetFields(parseMetadataBlocks(metadataBlocks)); JsonArray filesJson = obj.getJsonArray("files"); if (filesJson == null) { @@ -395,11 +401,10 @@ public DatasetVersion parseDatasetVersion(JsonObject obj, DatasetVersion dsv) th dsv.setFileMetadatas(parseFiles(filesJson, dsv)); } return dsv; - - } catch (ParseException ex) { - throw new JsonParseException("Error parsing date:" + ex.getMessage(), ex); + } catch (ParseException ex) { + throw new JsonParseException(BundleUtil.getStringFromBundle("jsonparser.error.parsing.date", Arrays.asList(ex.getMessage())) , ex); } catch (NumberFormatException ex) { - throw new JsonParseException("Error parsing number:" + ex.getMessage(), ex); + throw new JsonParseException(BundleUtil.getStringFromBundle("jsonparser.error.parsing.number", Arrays.asList(ex.getMessage())), ex); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 91f1ac2cfbc..e088122419d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -1,6 +1,23 @@ package edu.harvard.iq.dataverse.util.json; import edu.harvard.iq.dataverse.*; +import edu.harvard.iq.dataverse.AuxiliaryFile; +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataFileTag; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetDistributor; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; +import edu.harvard.iq.dataverse.DatasetFieldValue; +import edu.harvard.iq.dataverse.DatasetLock; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DataverseContact; +import edu.harvard.iq.dataverse.DataverseFacet; +import edu.harvard.iq.dataverse.DataverseTheme; +import edu.harvard.iq.dataverse.api.Datasets; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.groups.impl.maildomain.MailDomainGroup; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUser; @@ -19,6 +36,7 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.license.License; +import edu.harvard.iq.dataverse.globus.FileDetailsHolder; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.DatasetFieldWalker; @@ -60,7 +78,7 @@ public class JsonPrinter { @EJB static DatasetFieldServiceBean datasetFieldService; - + public static void injectSettingsService(SettingsServiceBean ssb, DatasetFieldServiceBean dfsb) { settingsService = ssb; datasetFieldService = dfsb; @@ -314,7 +332,7 @@ public static JsonObjectBuilder json(BuiltinUser user) { } public static JsonObjectBuilder json(Dataset ds) { - return jsonObjectBuilder() + JsonObjectBuilder bld = jsonObjectBuilder() .add("id", ds.getId()) .add("identifier", ds.getIdentifier()) .add("persistentUrl", ds.getPersistentURL()) @@ -322,8 +340,19 @@ public static JsonObjectBuilder json(Dataset ds) { .add("authority", ds.getAuthority()) .add("publisher", BrandingUtil.getInstallationBrandName()) .add("publicationDate", ds.getPublicationDateFormattedYYYYMMDD()) - .add("storageIdentifier", ds.getStorageIdentifier()) - .add("metadataLanguage", ds.getMetadataLanguage()); + .add("storageIdentifier", ds.getStorageIdentifier()); + if (DvObjectContainer.isMetadataLanguageSet(ds.getMetadataLanguage())) { + bld.add("metadataLanguage", ds.getMetadataLanguage()); + } + return bld; + } + + public static JsonObjectBuilder json(FileDetailsHolder ds) { + return Json.createObjectBuilder().add(ds.getStorageID() , + Json.createObjectBuilder() + .add("id", ds.getStorageID() ) + .add("hash", ds.getHash()) + .add("mime",ds.getMime())); } public static JsonObjectBuilder json(DatasetVersion dsv) { @@ -338,7 +367,7 @@ public static JsonObjectBuilder json(DatasetVersion dsv) { .add("UNF", dsv.getUNF()).add("archiveTime", format(dsv.getArchiveTime())) .add("lastUpdateTime", format(dsv.getLastUpdateTime())).add("releaseTime", format(dsv.getReleaseTime())) .add("createTime", format(dsv.getCreateTime())); - License license = dsv.getTermsOfUseAndAccess().getLicense(); + License license = DatasetUtil.getLicense(dsv);; if (license != null) { // Standard license bld.add("license", jsonObjectBuilder() @@ -468,7 +497,7 @@ public static JsonObjectBuilder json(MetadataBlock block, List fie blockBld.add("name", block.getName()); final JsonArrayBuilder fieldsArray = Json.createArrayBuilder(); - Map cvocMap = (datasetFieldService==null) ? new HashMap() :datasetFieldService.getCVocConf(false); + Map cvocMap = (datasetFieldService==null) ? new HashMap() :datasetFieldService.getCVocConf(false); DatasetFieldWalker.walk(fields, settingsService, cvocMap, new DatasetFieldsToJson(fieldsArray)); blockBld.add("fields", fieldsArray); @@ -684,7 +713,7 @@ public void startField(DatasetField f) { objectStack.peek().add("multiple", typ.isAllowMultiples()); objectStack.peek().add("typeClass", typeClassString(typ)); } - + @Override public void addExpandedValuesArray(DatasetField f) { // Invariant: all values are multiple. Diffrentiation between multiple and single is done at endField. @@ -705,7 +734,7 @@ public void endField(DatasetField f) { f.getDatasetFieldType().isAllowMultiples() ? expandedValues : expandedValues.get(0)); } - + valueArrStack.peek().add(jsonField); } } @@ -721,7 +750,7 @@ public void externalVocabularyValue(DatasetFieldValue dsfv, JsonObject cvocEntry } } } - + @Override public void primitiveValue(DatasetFieldValue dsfv) { if (dsfv.getValue() != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index ae6935945e8..f4a3c635f8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -3,6 +3,8 @@ import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonObject; + +import java.io.StringReader; import java.io.StringWriter; import java.util.HashMap; import java.util.Map; @@ -56,4 +58,9 @@ public static String prettyPrint(javax.json.JsonObject jsonObject) { return stringWriter.toString(); } + public static javax.json.JsonObject getJsonObject(String serializedJson) { + try (StringReader rdr = new StringReader(serializedJson)) { + return Json.createReader(rdr).readObject(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/InternalWorkflowStepSP.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/InternalWorkflowStepSP.java index ef11d306cd3..d99e0901d3c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/InternalWorkflowStepSP.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/InternalWorkflowStepSP.java @@ -25,6 +25,8 @@ public WorkflowStep getStep(String stepType, Map stepParameters) return new AuthorizedExternalStep(stepParameters); case "archiver": return new ArchivalSubmissionWorkflowStep(stepParameters); + case "ldnannounce": + return new LDNAnnounceDatasetVersionStep(stepParameters); default: throw new IllegalArgumentException("Unsupported step type: '" + stepType + "'."); } diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java new file mode 100644 index 00000000000..3478d9398f0 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java @@ -0,0 +1,279 @@ +package edu.harvard.iq.dataverse.workflow.internalspi; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.workflow.WorkflowContext; +import edu.harvard.iq.dataverse.workflow.step.Failure; +import edu.harvard.iq.dataverse.workflow.step.WorkflowStep; +import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; +import static edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult.OK; + +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.UUID; +import java.util.logging.Level; +import java.util.logging.Logger; +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonArrayBuilder; +import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; +import javax.json.JsonValue; + +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; + +/** + * A workflow step that generates and sends an LDN Announcement message to the + * inbox of a configured target. THe initial use case is for Dataverse to + * anounce new dataset versions to the Harvard DASH preprint repository so that + * a DASH admin can create a backlink for any dataset versions that reference a + * DASH deposit or a paper with a DOI where DASH has a preprint copy. + * + * @author qqmyers + */ + +public class LDNAnnounceDatasetVersionStep implements WorkflowStep { + private static final Logger logger = Logger.getLogger(LDNAnnounceDatasetVersionStep.class.getName()); + private static final String REQUIRED_FIELDS = ":LDNAnnounceRequiredFields"; + private static final String LDN_TARGET = ":LDNTarget"; + private static final String RELATED_PUBLICATION = "publication"; + + JsonLDTerm publicationIDType = null; + JsonLDTerm publicationIDNumber = null; + JsonLDTerm publicationURL = null; + + public LDNAnnounceDatasetVersionStep(Map paramSet) { + new HashMap<>(paramSet); + } + + @Override + public WorkflowStepResult run(WorkflowContext context) { + + JsonObject target = JsonUtil.getJsonObject((String) context.getSettings().get(LDN_TARGET)); + if (target != null) { + String inboxUrl = target.getString("inbox"); + + CloseableHttpClient client = HttpClients.createDefault(); + + // build method + + HttpPost announcement; + try { + announcement = buildAnnouncement(false, context, target); + } catch (URISyntaxException e) { + return new Failure("LDNAnnounceDatasetVersion workflow step failed: unable to parse inbox in :LDNTarget setting."); + } + if(announcement==null) { + logger.info(context.getDataset().getGlobalId().asString() + "does not have metadata required to send LDN message. Nothing sent."); + return OK; + } + // execute + try (CloseableHttpResponse response = client.execute(announcement)) { + int code = response.getStatusLine().getStatusCode(); + if (code >= 200 && code < 300) { + // HTTP OK range + return OK; + } else { + String responseBody = new String(response.getEntity().getContent().readAllBytes(), + StandardCharsets.UTF_8); + ; + return new Failure("Error communicating with " + inboxUrl + ". Server response: " + responseBody + + " (" + response + ")."); + } + + } catch (Exception ex) { + logger.log(Level.SEVERE, "Error communicating with remote server: " + ex.getMessage(), ex); + return new Failure("Error executing request: " + ex.getLocalizedMessage(), + "Cannot communicate with remote server."); + } + } + return new Failure("LDNAnnounceDatasetVersion workflow step failed: :LDNTarget setting missing or invalid."); + } + + @Override + public WorkflowStepResult resume(WorkflowContext context, Map internalData, String externalData) { + throw new UnsupportedOperationException("Not supported yet."); // This class does not need to resume. + } + + @Override + public void rollback(WorkflowContext context, Failure reason) { + throw new UnsupportedOperationException("Not supported yet."); // This class does not need to resume. + } + + HttpPost buildAnnouncement(boolean qb, WorkflowContext ctxt, JsonObject target) throws URISyntaxException { + + // First check that we have what is required + DatasetVersion dv = ctxt.getDataset().getReleasedVersion(); + List dvf = dv.getDatasetFields(); + Map fields = new HashMap(); + String[] requiredFields = ((String) ctxt.getSettings().getOrDefault(REQUIRED_FIELDS, "")).split(",\\s*"); + for (String field : requiredFields) { + fields.put(field, null); + } + Set reqFields = fields.keySet(); + for (DatasetField df : dvf) { + if(!df.isEmpty() && reqFields.contains(df.getDatasetFieldType().getName())) { + fields.put(df.getDatasetFieldType().getName(), df); + } + } + if (fields.containsValue(null)) { + logger.fine("DatasetVersion doesn't contain metadata required to trigger announcement"); + return null; + } + // We do, so construct the json-ld body and method + + Map localContext = new HashMap(); + JsonObjectBuilder coarContext = Json.createObjectBuilder(); + Map emptyCvocMap = new HashMap(); + boolean includeLocalContext = false; + for (Entry entry : fields.entrySet()) { + DatasetField field = entry.getValue(); + DatasetFieldType dft = field.getDatasetFieldType(); + String dfTypeName = entry.getKey(); + JsonValue jv = OREMap.getJsonLDForField(field, false, emptyCvocMap, localContext); + switch (dfTypeName) { + case RELATED_PUBLICATION: + JsonArrayBuilder relArrayBuilder = Json.createArrayBuilder(); + publicationIDType = null; + publicationIDNumber = null; + publicationURL = null; + Collection childTypes = dft.getChildDatasetFieldTypes(); + for (DatasetFieldType cdft : childTypes) { + switch (cdft.getName()) { + case "publicationURL": + publicationURL = cdft.getJsonLDTerm(); + break; + case "publicationIDType": + publicationIDType = cdft.getJsonLDTerm(); + break; + case "publicationIDNumber": + publicationIDNumber = cdft.getJsonLDTerm(); + break; + } + + } + + if (jv != null) { + if (jv instanceof JsonArray) { + JsonArray rels = (JsonArray) jv; + for (JsonObject jo : rels.getValuesAs(JsonObject.class)) { + String id = getBestPubId(jo); + relArrayBuilder.add(Json.createObjectBuilder().add("id", id).add("ietf:cite-as", id) + .add("type", "sorg:ScholaryArticle").build()); + } + } + + else { // JsonObject + String id = getBestPubId((JsonObject) jv); + relArrayBuilder.add(Json.createObjectBuilder().add("id", id).add("ietf:cite-as", id) + .add("type", "sorg:ScholaryArticle").build()); + } + } + coarContext.add("IsSupplementTo", relArrayBuilder); + break; + default: + if (jv != null) { + includeLocalContext = true; + coarContext.add(dft.getJsonLDTerm().getLabel(), jv); + } + + } + } + dvf.get(0).getDatasetFieldType().getName(); + JsonObjectBuilder job = Json.createObjectBuilder(); + JsonArrayBuilder context = Json.createArrayBuilder().add("https://purl.org/coar/notify") + .add("https://www.w3.org/ns/activitystreams"); + if (includeLocalContext && !localContext.isEmpty()) { + JsonObjectBuilder contextBuilder = Json.createObjectBuilder(); + for (Entry e : localContext.entrySet()) { + contextBuilder.add(e.getKey(), e.getValue()); + } + context.add(contextBuilder); + } + job.add("@context", context); + job.add("id", "urn:uuid:" + UUID.randomUUID().toString()); + job.add("actor", Json.createObjectBuilder().add("id", SystemConfig.getDataverseSiteUrlStatic()) + .add("name", BrandingUtil.getInstallationBrandName()).add("type", "Service")); + job.add("context", coarContext); + Dataset d = ctxt.getDataset(); + job.add("object", + Json.createObjectBuilder().add("id", d.getLocalURL()) + .add("ietf:cite-as", d.getGlobalId().toURL().toExternalForm()) + .add("sorg:name", d.getDisplayName()).add("type", "sorg:Dataset")); + job.add("origin", Json.createObjectBuilder().add("id", SystemConfig.getDataverseSiteUrlStatic()) + .add("inbox", SystemConfig.getDataverseSiteUrlStatic() + "/api/inbox").add("type", "Service")); + job.add("target", target); + job.add("type", Json.createArrayBuilder().add("Announce").add("coar-notify:ReleaseAction")); + + HttpPost annPost = new HttpPost(); + annPost.setURI(new URI(target.getString("inbox"))); + String body = JsonUtil.prettyPrint(job.build()); + logger.fine("Body: " + body); + annPost.setEntity(new StringEntity(JsonUtil.prettyPrint(body), "utf-8")); + annPost.setHeader("Content-Type", "application/ld+json"); + return annPost; + } + + private String getBestPubId(JsonObject jo) { + String id = null; + if (jo.containsKey(publicationURL.getLabel())) { + id = jo.getString(publicationURL.getLabel()); + } else if (jo.containsKey(publicationIDType.getLabel())) { + if ((jo.containsKey(publicationIDNumber.getLabel()))) { + String number = jo.getString(publicationIDNumber.getLabel()); + + switch (jo.getString(publicationIDType.getLabel())) { + case "doi": + if (number.startsWith("https://doi.org/")) { + id = number; + } else if (number.startsWith("doi:")) { + id = "https://doi.org/" + number.substring(4); + } + + break; + case "DASH-URN": + if (number.startsWith("http")) { + id = number; + } + break; + } + } + } + return id; + } + + String process(String template, Map values) { + String curValue = template; + for (Map.Entry ent : values.entrySet()) { + String val = ent.getValue(); + if (val == null) { + val = ""; + } + String varRef = "${" + ent.getKey() + "}"; + while (curValue.contains(varRef)) { + curValue = curValue.replace(varRef, val); + } + } + + return curValue; + } + +} diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index a7c40be7ec8..8a4fdeb9e28 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -216,6 +216,7 @@ notification.publishFailedPidReg={0} in {1} could not be published due to a fail notification.workflowFailed=An external workflow run on {0} in {1} has failed. Check your email and/or view the Dataset page which may have additional details. Contact support if this continues to happen. notification.workflowSucceeded=An external workflow run on {0} in {1} has succeeded. Check your email and/or view the Dataset page which may have additional details. notification.statusUpdated=The status of dataset {0} has been updated to {1}. +notification.datasetMentioned=Announcement Received: Newly released {0} {2} {3} Dataset {4}. notification.ingestCompleted=Dataset {1} has one or more tabular files that completed the tabular ingest process and are available in archival formats. notification.ingestCompletedWithErrors=Dataset {1} has one or more tabular files that are available but are not supported for tabular ingest. @@ -231,8 +232,16 @@ notification.access.revoked.datafile=You have been removed from a role in {0}. notification.checksumfail=One or more files in your upload failed checksum validation for dataset {1}. Please re-run the upload script. If the problem persists, please contact support. notification.ingest.completed=Your Dataset {2} has one or more tabular files that completed the tabular ingest process. These files will be available for download in their original formats and other formats for enhanced archival purposes after you publish the dataset. The archival .tab files are displayed in the file table. Please see the guides for more information about ingest and support for tabular files. notification.ingest.completedwitherrors=Your Dataset {2} has one or more tabular files that have been uploaded successfully but are not supported for tabular ingest. After you publish the dataset, these files will not have additional archival features. Please see the guides for more information about ingest and support for tabular files.

        Files with incomplete ingest:{5} -notification.mail.import.filesystem=Dataset {2} ({0}/dataset.xhtml?persistentId={1}) has been successfully uploaded and verified. -notification.import.filesystem=Dataset {1} has been successfully uploaded and verified. +notification.mail.import.filesystem=Globus transfer to Dataset {2} ({0}/dataset.xhtml?persistentId={1}) was successful. File(s) have been uploaded and verified. +notification.mail.globus.upload.completed=Globus transfer to Dataset {2} was successful. File(s) have been uploaded and verified.

        {3}
        +notification.mail.globus.download.completed=Globus transfer of file(s) from the dataset {2} was successful.

        {3}
        +notification.mail.globus.upload.completedWithErrors=Globus transfer to Dataset {2} is complete with errors.

        {3}
        +notification.mail.globus.download.completedWithErrors=Globus transfer from the dataset {2} is complete with errors.

        {3}
        +notification.import.filesystem=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. +notification.globus.upload.completed=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. +notification.globus.download.completed=Globus transfer from the dataset {1} was successful. +notification.globus.upload.completedWithErrors=Globus transfer to Dataset {1} is complete with errors. +notification.globus.download.completedWithErrors=Globus transfer from the dataset {1} is complete with errors. notification.import.checksum={1}, dataset had file checksums added via a batch job. removeNotification=Remove Notification @@ -265,6 +274,7 @@ notification.typeDescription.WORKFLOW_SUCCESS=External workflow run has succeede notification.typeDescription.WORKFLOW_FAILURE=External workflow run has failed notification.typeDescription.STATUSUPDATED=Status of dataset has been updated notification.typeDescription.DATASETCREATED=Dataset was created by user +notification.typeDescription.DATASETMENTIONED=Dataset was referenced in remote system groupAndRoles.manageTips=Here is where you can access and manage all the groups you belong to, and the roles you have been assigned. user.message.signup.label=Create Account @@ -780,6 +790,12 @@ contact.delegation.default_personal=Dataverse Installation Admin notification.email.info.unavailable=Unavailable notification.email.apiTokenGenerated=Hello {0} {1},\n\nAPI Token has been generated. Please keep it secure as you would do with a password. notification.email.apiTokenGenerated.subject=API Token was generated +notification.email.datasetWasMentioned=Hello {0},

        The {1} has just been notified that the {2}, {4}, {5} "{8}" in this repository. +notification.email.datasetWasMentioned.subject={0}: A Dataset Relationship has been reported! +notification.email.globus.uploadCompleted.subject={0}: Files uploaded successfully via Globus and verified +notification.email.globus.downloadCompleted.subject={0}: Files downloaded successfully via Globus +notification.email.globus.uploadCompletedWithErrors.subject={0}: Uploaded files via Globus with errors +notification.email.globus.downloadCompletedWithErrors.subject={0}: Downloaded files via Globus with errors # dataverse.xhtml dataverse.name=Dataverse Name @@ -914,9 +930,9 @@ advanced.search.files.variableName=Variable Name advanced.search.files.variableName.tip=The name of the variable's column in the data frame. advanced.search.files.variableLabel=Variable Label advanced.search.files.variableLabel.tip=A short description of the variable. -advanced.search.datasets.persistentId.tip=The persistent identifier for the dataset. -advanced.search.datasets.persistentId=Dataset Persistent ID -advanced.search.datasets.persistentId.tip=The unique persistent identifier for a dataset, which can be a Handle or DOI in Dataverse. +advanced.search.datasets.persistentId.tip=The persistent identifier for the Dataset. +advanced.search.datasets.persistentId=Persistent Identifier +advanced.search.datasets.persistentId.tip=The Dataset's unique persistent identifier, either a DOI or Handle advanced.search.files.fileTags=File Tags advanced.search.files.fileTags.tip=Terms such "Documentation", "Data", or "Code" that have been applied to files. @@ -1511,7 +1527,7 @@ dataset.message.editMetadata.label=Edit Dataset Metadata dataset.message.editMetadata.message=Add more metadata about this dataset to help others easily find it. dataset.message.editMetadata.duplicateFilenames=Duplicate filenames: {0} dataset.message.editMetadata.invalid.TOUA.message=Datasets with restricted files are required to have Request Access enabled or Terms of Access to help people access the data. Please edit the dataset to confirm Request Access or provide Terms of Access to be in compliance with the policy. -dataset.message.toua.invalid=You must enable request access or add terms of access in datasets with restricted files. +dataset.message.toua.invalid=Terms of Use and Access are invalid. You must enable request access or add terms of access in datasets with restricted files. dataset.message.editTerms.label=Edit Dataset Terms dataset.message.editTerms.message=Add the terms of use for this dataset to explain how to access and use your data. @@ -1536,6 +1552,9 @@ dataset.message.submit.remind.draft=When ready for sharing, please submit it dataset.message.publish.remind.draft.filePage=When ready for sharing, please go to the dataset page to publish it so that others can see these changes. dataset.message.submit.remind.draft.filePage=When ready for sharing, please go to the dataset page to submit it for review. dataset.message.publishSuccess=This dataset has been published. +dataset.message.publishGlobusFailure.details=Could not publish Globus data. +dataset.message.publishGlobusFailure=Error with publishing data. +dataset.message.GlobusError=Cannot go to Globus. dataset.message.only.authenticatedUsers=Only authenticated users may release Datasets. dataset.message.deleteSuccess=This dataset has been deleted. dataset.message.bulkFileUpdateSuccess=The selected files have been updated. @@ -1552,18 +1571,18 @@ dataset.message.deleteFailure=This dataset draft could not be deleted. dataset.message.deaccessionFailure=This dataset could not be deaccessioned. dataset.message.createFailure=The dataset could not be created. dataset.message.termsFailure=The dataset terms could not be updated. -dataset.message.label.fileAccess=File Access -dataset.message.publicInstall=Files are stored on a publicly accessible storage server. +dataset.message.label.fileAccess=Publicly-accessible storage +dataset.message.publicInstall=Files in this dataset may be readable outside Dataverse, restricted and embargoed access are disabled dataset.metadata.publicationDate=Publication Date -dataset.metadata.publicationDate.tip=The publication date of a dataset. +dataset.metadata.publicationDate.tip=The publication date of a Dataset. dataset.metadata.citationDate=Citation Date dataset.metadata.citationDate.tip=The citation date of a dataset, determined by the longest embargo on any file in version 1.0. dataset.metadata.publicationYear=Publication Year dataset.metadata.publicationYear.tip=The publication year of a dataset. -dataset.metadata.persistentId=Dataset Persistent ID -dataset.metadata.persistentId.tip=The unique persistent identifier for a dataset, which can be a Handle or DOI in Dataverse. +dataset.metadata.persistentId=Persistent Identifier +dataset.metadata.persistentId.tip=The Dataset's unique persistent identifier, either a DOI or Handle dataset.metadata.alternativePersistentId=Previous Dataset Persistent ID -dataset.metadata.alternativePersistentId.tip=A previously used persistent identifier for a dataset, which can be a Handle or DOI in Dataverse. +dataset.metadata.alternativePersistentId.tip=A previously used persistent identifier for the Dataset, either a DOI or Handle file.metadata.preview=Preview file.metadata.filetags=File Tags file.metadata.persistentId=File Persistent ID @@ -1645,6 +1664,13 @@ file.fromHTTP=Upload with HTTP via your browser file.fromDropbox=Upload from Dropbox file.fromDropbox.tip=Select files from Dropbox. file.fromRsync=Upload with rsync + SSH via Data Capture Module (DCM) +file.fromGlobus.tip=Upload files via Globus transfer. This method is recommended for large file transfers. (Using it will cancel any other types of uploads in progress on this page.) +file.fromGlobusAfterCreate.tip=File upload via Globus transfer will be enabled after this dataset is created. +file.fromGlobus=Upload with Globus +file.finishGlobus=Globus Transfer has finished +file.downloadFromGlobus=Download through Globus +file.globus.transfer=Globus Transfer +file.globus.of=of: file.api.httpDisabled=File upload via HTTP is not available for this installation of Dataverse. file.api.alreadyHasPackageFile=File upload via HTTP disabled since this dataset already contains a package file. file.replace.original=Original File @@ -1690,6 +1716,7 @@ file.download.header=Download file.download.subset.header=Download Data Subset file.preview=Preview: file.fileName=File Name +file.sizeNotAvailable=Size not available file.type.tabularData=Tabular Data file.originalChecksumType=Original File {0} file.checksum.exists.tip=A file with this checksum already exists in the dataset. @@ -1715,6 +1742,8 @@ file.rsyncUpload.httpUploadDisabledDueToRsyncFileExisting=HTTP upload is disable file.rsyncUpload.httpUploadDisabledDueToRsyncFileExistingAndPublished=HTTP upload is disabled for this dataset because you have already uploaded files via rsync and published the dataset. file.rsyncUpload.rsyncUploadDisabledDueFileUploadedViaHttp=Upload with rsync + SSH is disabled for this dataset because you have already uploaded files via HTTP. If you would like to switch to rsync upload, then you must first remove all uploaded files from this dataset. Once this dataset is published, the chosen upload method is permanently locked in. file.rsyncUpload.rsyncUploadDisabledDueFileUploadedViaHttpAndPublished=Upload with rsync + SSH is disabled for this dataset because you have already uploaded files via HTTP and published the dataset. +file.globusUpload.inProgressMessage.summary=Globus Transfer in Progress +file.globusUpload.inProgressMessage.details=This dataset is locked while the data files are being transferred and verified. Large transfers may take significant time. You can check transfer status at https://app.globus.org/activity. file.metaData.checksum.copy=Click to copy file.metaData.dataFile.dataTab.unf=UNF file.metaData.dataFile.dataTab.variables=Variables @@ -1868,6 +1897,12 @@ file.dataFilesTab.versions.headers.summary=Summary file.dataFilesTab.versions.headers.contributors=Contributors file.dataFilesTab.versions.headers.contributors.withheld=Contributor name(s) withheld file.dataFilesTab.versions.headers.published=Published on +file.dataFilesTab.versions.headers.archived=Archival Status +file.dataFilesTab.versions.headers.archived.success=Archived +file.dataFilesTab.versions.headers.archived.pending=Pending +file.dataFilesTab.versions.headers.archived.failure=Failed +file.dataFilesTab.versions.headers.archived.notarchived=Not Archived +file.dataFilesTab.versions.headers.archived.submit=Submit file.dataFilesTab.versions.viewDiffBtn=View Differences file.dataFilesTab.versions.citationMetadata=Citation Metadata: file.dataFilesTab.versions.added=Added @@ -1965,6 +2000,8 @@ file.results.btn.sort.option.type=Type file.compute.fileAccessDenied=This file is restricted and you may not compute on it because you have not been granted access. file.configure.Button=Configure +file.remotelyStored=This file is stored remotely - click for more info + file.auxfiles.download.header=Download Auxiliary Files # These types correspond to the AuxiliaryFile.Type enum. file.auxfiles.types.DP=Differentially Private Statistics @@ -2198,6 +2235,8 @@ dataset.AddReplication=Add "Replication Data for" to Title dataset.replicationDataFor=Replication Data for: dataset.additionalEntry=Additional Entry +#externaltools +externaltools.enable.browser.popups=You must enable popups in your browser to open external tools in a new window or tab. #mydata_fragment.xhtml mydataFragment.infoAccess=Here are all the dataverses, datasets, and files you have access to. You can filter through them by publication status and roles. @@ -2270,12 +2309,12 @@ bagit.sourceOrganization=Dataverse Installation () bagit.sourceOrganizationAddress= bagit.sourceOrganizationEmail= -bagit.checksum.validation.error=Invalid checksum. filePath={0} type={1} fileChecksum={2} calculatedChecksum={3} -bagit.checksum.validation.exception=Error while calculating checksum. filePath={0} type={1} error={2} -bagit.validation.bag.file.not.found=Invalid bag file: {0} -bagit.validation.manifest.not.supported=No supported manifest found in: {0} supportedTypes: {1} -bagit.validation.file.not.found=Manifest declared file: {0} not-found in data provider: {1} -bagit.validation.exception=Unable to complete checksums for: {0} +bagit.checksum.validation.error=Invalid checksum for file "{0}". Manifest checksum={2}, calculated checksum={3}, type={1} +bagit.checksum.validation.exception=Error while calculating checksum for file "{0}". Checksum type={1}, error={2} +bagit.validation.bag.file.not.found=Invalid BagIt package: "{0}" +bagit.validation.manifest.not.supported=No supported manifest found in BagIt package. Supported types are: {1} +bagit.validation.file.not.found=The manifest declared a file, "{0}", that is not found in the BagIt package +bagit.validation.exception=Unable to complete checksums for BagIt package #Permission.java permission.addDataverseDataverse=Add a dataverse within another dataverse @@ -2348,7 +2387,7 @@ dataset.file.uploadWarning=upload warning dataset.file.uploadWorked=upload worked dataset.file.upload.popup.explanation.tip=For more information, please refer to the Duplicate Files section of the User Guide. -dataset.file.error.application/zipped-bagit=BagIt package detected, but errors found. These are the errors found until processing stopped +dataset.file.error.application/zipped-bagit=BagIt package, "{0}", detected but errors found. These are the errors found until processing stopped: #HarvestingClientsPage.java harvest.start.error=Sorry, harvest could not be started for the selected harvesting client configuration (unknown server error). @@ -2466,6 +2505,10 @@ template.delete.error=The dataset template cannot be deleted. template.update=Template data updated template.update.error=Template update failed template.makeDefault.error=The dataset template cannot be made default. +template.instructions.label=Custom Instructions: +template.instructions.label.tip=Click to Edit +template.instructions.empty.label=(None - click to add) + page.copy=Copy of #RolePermissionFragment.java @@ -2715,20 +2758,13 @@ rtabfileparser.ioexception.read=Couldn't read Boolean variable ({0})! rtabfileparser.ioexception.parser1=R Tab File Parser: Could not obtain varQnty from the dataset metadata. rtabfileparser.ioexception.parser2=R Tab File Parser: varQnty=0 in the dataset metadata! +#JsonParser.java +jsonparser.error.metadatablocks.not.found=Invalid JSON object: metadata blocks not found. +jsonparser.error.parsing.date=Error parsing date: {0} +jsonparser.error.parsing.number=Error parsing number: {0} #ConfigureFragmentBean.java configurefragmentbean.apiTokenGenerated=API Token will be generated. Please keep it secure as you would do with a password. -#FacetCategory - staticSearchFields -staticSearchFields.dvCategory=Dataverse Category -staticSearchFields.metadataSource=Metadata Source -staticSearchFields.publicationDate=Publication Year -staticSearchFields.fileTypeGroupFacet=File Type -staticSearchFields.dvObjectType=Type -staticSearchFields.fileTag=File Tag -staticSearchFields.fileAccess=Access -staticSearchFields.publicationStatus=Publication Status -staticSearchFields.subject_ss=Subject - #dataverse category - Facet Labels Researcher=Researcher Research\u0020Project=Research Project @@ -2809,3 +2845,5 @@ publishDatasetCommand.pidNotReserved=Cannot publish dataset because its persiste # APIs api.errors.invalidApiToken=Invalid API token. +api.ldninbox.citation.alert={0},

        The {1} has just been notified that the {2}, {3}, cites "{6}" in this repository. +api.ldninbox.citation.subject={0}: A Dataset Citation has been reported! diff --git a/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties b/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties index 177d6e194cd..c93bb56151f 100644 --- a/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties +++ b/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties @@ -31,3 +31,9 @@ smcl=application/x-stata-smcl swc=application/x-swc xz=application/x-xz xlsx=application/vnd.openxmlformats-officedocument.spreadsheetml.sheet +wdl=text/x-workflow-description-language +cwl=text/x-computational-workflow-language +nf=text/x-nextflow +Rmd=text/x-r-notebook +rb=text/x-ruby-script +dag=text/x-dagman diff --git a/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties b/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties new file mode 100644 index 00000000000..70b0c4e371e --- /dev/null +++ b/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties @@ -0,0 +1,4 @@ +Makefile=text/x-makefile +Snakemake=text/x-snakemake +Dockerfile=application/x-docker-file +Vagrantfile=application/x-vagrant-file diff --git a/src/main/java/propertyFiles/MimeTypeDisplay.properties b/src/main/java/propertyFiles/MimeTypeDisplay.properties index 231435bf299..928419c0405 100644 --- a/src/main/java/propertyFiles/MimeTypeDisplay.properties +++ b/src/main/java/propertyFiles/MimeTypeDisplay.properties @@ -73,6 +73,14 @@ application/x-sas-syntax=SAS Syntax type/x-r-syntax=R Syntax application/vnd.wolfram.mathematica.package=Wolfram Mathematica Code application/vnd.wolfram.mathematica=Wolfram Mathematica Code +text/x-workflow-description-language=Workflow Description Language +text/x-computational-workflow-language=Computational Workflow Language +text/x-nextflow=Nextflow Script +text/x-r-notebook=R Notebook +text/x-ruby-script=Ruby Source Code +text/x-dagman=DAGMan Workflow +text/x-makefile=Makefile Script +text/x-snakemake=Snakemake Workflow # Ingested Tabular Data text/tab-separated-values=Tab-Delimited # RawData @@ -211,5 +219,7 @@ video/webm=WebM Video text/xml-graphml=GraphML Network Data # Other application/octet-stream=Unknown +application/x-docker-file=Docker Image File +application/x-vagrant-file=Vagrant Image File # Dataverse-specific application/vnd.dataverse.file-package=Dataverse Package diff --git a/src/main/java/propertyFiles/MimeTypeFacets.properties b/src/main/java/propertyFiles/MimeTypeFacets.properties index 12762bca26d..2cac63a7ad0 100644 --- a/src/main/java/propertyFiles/MimeTypeFacets.properties +++ b/src/main/java/propertyFiles/MimeTypeFacets.properties @@ -68,6 +68,16 @@ type/x-r-syntax=Code application/postscript=Code application/vnd.wolfram.mathematica.package=Code application/vnd.wolfram.mathematica=Code +text/x-workflow-description-language=Code +text/x-computational-workflow-language=Code +text/x-nextflow=Code +text/x-r-notebook=Code +text/x-ruby-script=Code +text/x-dagman=Code +text/x-makefile=Code +text/x-snakemake=Code +application/x-docker-file=Code +application/x-vagrant-file=Code # Ingested text/tab-separated-values=Tabular Data # Data diff --git a/src/main/java/propertyFiles/astrophysics.properties b/src/main/java/propertyFiles/astrophysics.properties index be81ccdc883..a49b8b66510 100644 --- a/src/main/java/propertyFiles/astrophysics.properties +++ b/src/main/java/propertyFiles/astrophysics.properties @@ -1,5 +1,6 @@ metadatablock.name=astrophysics metadatablock.displayName=Astronomy and Astrophysics Metadata +metadatablock.displayFacet=Astronomy and Astrophysics datasetfieldtype.astroType.title=Type datasetfieldtype.astroFacility.title=Facility datasetfieldtype.astroInstrument.title=Instrument diff --git a/src/main/java/propertyFiles/biomedical.properties b/src/main/java/propertyFiles/biomedical.properties index 723a4ac2f40..1bffed2ee03 100644 --- a/src/main/java/propertyFiles/biomedical.properties +++ b/src/main/java/propertyFiles/biomedical.properties @@ -1,5 +1,6 @@ metadatablock.name=biomedical metadatablock.displayName=Life Sciences Metadata +metadatablock.displayFacet=Life Sciences datasetfieldtype.studyDesignType.title=Design Type datasetfieldtype.studyOtherDesignType.title=Other Design Type datasetfieldtype.studyFactorType.title=Factor Type diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index 70cb98a98e4..668542c92be 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -1,18 +1,19 @@ metadatablock.name=citation metadatablock.displayName=Citation Metadata +metadatablock.displayFacet=Citation datasetfieldtype.title.title=Title datasetfieldtype.subtitle.title=Subtitle datasetfieldtype.alternativeTitle.title=Alternative Title datasetfieldtype.alternativeURL.title=Alternative URL -datasetfieldtype.otherId.title=Other ID +datasetfieldtype.otherId.title=Other Identifier datasetfieldtype.otherIdAgency.title=Agency datasetfieldtype.otherIdValue.title=Identifier datasetfieldtype.author.title=Author datasetfieldtype.authorName.title=Name datasetfieldtype.authorAffiliation.title=Affiliation -datasetfieldtype.authorIdentifierScheme.title=Identifier Scheme +datasetfieldtype.authorIdentifierScheme.title=Identifier Type datasetfieldtype.authorIdentifier.title=Identifier -datasetfieldtype.datasetContact.title=Contact +datasetfieldtype.datasetContact.title=Point of Contact datasetfieldtype.datasetContactName.title=Name datasetfieldtype.datasetContactAffiliation.title=Affiliation datasetfieldtype.datasetContactEmail.title=E-mail @@ -22,49 +23,49 @@ datasetfieldtype.dsDescriptionDate.title=Date datasetfieldtype.subject.title=Subject datasetfieldtype.keyword.title=Keyword datasetfieldtype.keywordValue.title=Term -datasetfieldtype.keywordVocabulary.title=Vocabulary -datasetfieldtype.keywordVocabularyURI.title=Vocabulary URL +datasetfieldtype.keywordVocabulary.title=Controlled Vocabulary Name +datasetfieldtype.keywordVocabularyURI.title=Controlled Vocabulary URL datasetfieldtype.topicClassification.title=Topic Classification datasetfieldtype.topicClassValue.title=Term -datasetfieldtype.topicClassVocab.title=Vocabulary -datasetfieldtype.topicClassVocabURI.title=Vocabulary URL +datasetfieldtype.topicClassVocab.title=Controlled Vocabulary Name +datasetfieldtype.topicClassVocabURI.title=Controlled Vocabulary URL datasetfieldtype.publication.title=Related Publication datasetfieldtype.publicationCitation.title=Citation -datasetfieldtype.publicationIDType.title=ID Type -datasetfieldtype.publicationIDNumber.title=ID Number +datasetfieldtype.publicationIDType.title=Identifier Type +datasetfieldtype.publicationIDNumber.title=Identifier datasetfieldtype.publicationURL.title=URL datasetfieldtype.notesText.title=Notes datasetfieldtype.language.title=Language datasetfieldtype.producer.title=Producer datasetfieldtype.producerName.title=Name datasetfieldtype.producerAffiliation.title=Affiliation -datasetfieldtype.producerAbbreviation.title=Abbreviation +datasetfieldtype.producerAbbreviation.title=Abbreviated Name datasetfieldtype.producerURL.title=URL datasetfieldtype.producerLogoURL.title=Logo URL datasetfieldtype.productionDate.title=Production Date -datasetfieldtype.productionPlace.title=Production Place +datasetfieldtype.productionPlace.title=Production Location datasetfieldtype.contributor.title=Contributor datasetfieldtype.contributorType.title=Type datasetfieldtype.contributorName.title=Name -datasetfieldtype.grantNumber.title=Grant Information -datasetfieldtype.grantNumberAgency.title=Grant Agency -datasetfieldtype.grantNumberValue.title=Grant Number +datasetfieldtype.grantNumber.title=Funding Information +datasetfieldtype.grantNumberAgency.title=Agency +datasetfieldtype.grantNumberValue.title=Identifier datasetfieldtype.distributor.title=Distributor datasetfieldtype.distributorName.title=Name datasetfieldtype.distributorAffiliation.title=Affiliation -datasetfieldtype.distributorAbbreviation.title=Abbreviation +datasetfieldtype.distributorAbbreviation.title=Abbreviated Name datasetfieldtype.distributorURL.title=URL datasetfieldtype.distributorLogoURL.title=Logo URL datasetfieldtype.distributionDate.title=Distribution Date datasetfieldtype.depositor.title=Depositor datasetfieldtype.dateOfDeposit.title=Deposit Date -datasetfieldtype.timePeriodCovered.title=Time Period Covered -datasetfieldtype.timePeriodCoveredStart.title=Start -datasetfieldtype.timePeriodCoveredEnd.title=End +datasetfieldtype.timePeriodCovered.title=Time Period +datasetfieldtype.timePeriodCoveredStart.title=Start Date +datasetfieldtype.timePeriodCoveredEnd.title=End Date datasetfieldtype.dateOfCollection.title=Date of Collection -datasetfieldtype.dateOfCollectionStart.title=Start -datasetfieldtype.dateOfCollectionEnd.title=End -datasetfieldtype.kindOfData.title=Kind of Data +datasetfieldtype.dateOfCollectionStart.title=Start Date +datasetfieldtype.dateOfCollectionEnd.title=End Date +datasetfieldtype.kindOfData.title=Data Type datasetfieldtype.series.title=Series datasetfieldtype.seriesName.title=Name datasetfieldtype.seriesInformation.title=Information @@ -72,106 +73,106 @@ datasetfieldtype.software.title=Software datasetfieldtype.softwareName.title=Name datasetfieldtype.softwareVersion.title=Version datasetfieldtype.relatedMaterial.title=Related Material -datasetfieldtype.relatedDatasets.title=Related Datasets -datasetfieldtype.otherReferences.title=Other References -datasetfieldtype.dataSources.title=Data Sources -datasetfieldtype.originOfSources.title=Origin of Sources -datasetfieldtype.characteristicOfSources.title=Characteristic of Sources Noted +datasetfieldtype.relatedDatasets.title=Related Dataset +datasetfieldtype.otherReferences.title=Other Reference +datasetfieldtype.dataSources.title=Data Source +datasetfieldtype.originOfSources.title=Origin of Historical Sources +datasetfieldtype.characteristicOfSources.title=Characteristic of Sources datasetfieldtype.accessToSources.title=Documentation and Access to Sources -datasetfieldtype.title.description=Full title by which the Dataset is known. -datasetfieldtype.subtitle.description=A secondary title used to amplify or state certain limitations on the main title. -datasetfieldtype.alternativeTitle.description=A title by which the work is commonly referred, or an abbreviation of the title. -datasetfieldtype.alternativeURL.description=A URL where the dataset can be viewed, such as a personal or project website. -datasetfieldtype.otherId.description=Another unique identifier that identifies this Dataset (e.g., producer's or another repository's number). -datasetfieldtype.otherIdAgency.description=Name of agency which generated this identifier. -datasetfieldtype.otherIdValue.description=Other identifier that corresponds to this Dataset. -datasetfieldtype.author.description=The person(s), corporate body(ies), or agency(ies) responsible for creating the work. -datasetfieldtype.authorName.description=The author's Family Name, Given Name or the name of the organization responsible for this Dataset. -datasetfieldtype.authorAffiliation.description=The organization with which the author is affiliated. -datasetfieldtype.authorIdentifierScheme.description=Name of the identifier scheme (ORCID, ISNI). -datasetfieldtype.authorIdentifier.description=Uniquely identifies an individual author or organization, according to various schemes. -datasetfieldtype.datasetContact.description=The contact(s) for this Dataset. -datasetfieldtype.datasetContactName.description=The contact's Family Name, Given Name or the name of the organization. -datasetfieldtype.datasetContactAffiliation.description=The organization with which the contact is affiliated. -datasetfieldtype.datasetContactEmail.description=The e-mail address(es) of the contact(s) for the Dataset. This will not be displayed. -datasetfieldtype.dsDescription.description=A summary describing the purpose, nature, and scope of the Dataset. -datasetfieldtype.dsDescriptionValue.description=A summary describing the purpose, nature, and scope of the Dataset. -datasetfieldtype.dsDescriptionDate.description=In cases where a Dataset contains more than one description (for example, one might be supplied by the data producer and another prepared by the data repository where the data are deposited), the date attribute is used to distinguish between the two descriptions. The date attribute follows the ISO convention of YYYY-MM-DD. -datasetfieldtype.subject.description=Domain-specific Subject Categories that are topically relevant to the Dataset. -datasetfieldtype.keyword.description=Key terms that describe important aspects of the Dataset. -datasetfieldtype.keywordValue.description=Key terms that describe important aspects of the Dataset. Can be used for building keyword indexes and for classification and retrieval purposes. A controlled vocabulary can be employed. The vocab attribute is provided for specification of the controlled vocabulary in use, such as LCSH, MeSH, or others. The vocabURI attribute specifies the location for the full controlled vocabulary. -datasetfieldtype.keywordVocabulary.description=For the specification of the keyword controlled vocabulary in use, such as LCSH, MeSH, or others. -datasetfieldtype.keywordVocabularyURI.description=Keyword vocabulary URL points to the web presence that describes the keyword vocabulary, if appropriate. Enter an absolute URL where the keyword vocabulary web site is found, such as http://www.my.org. -datasetfieldtype.topicClassification.description=The classification field indicates the broad important topic(s) and subjects that the data cover. Library of Congress subject terms may be used here. -datasetfieldtype.topicClassValue.description=Topic or Subject term that is relevant to this Dataset. -datasetfieldtype.topicClassVocab.description=Provided for specification of the controlled vocabulary in use, e.g., LCSH, MeSH, etc. -datasetfieldtype.topicClassVocabURI.description=Specifies the URL location for the full controlled vocabulary. -datasetfieldtype.publication.description=Publications that use the data from this Dataset. The full list of Related Publications will be displayed on the metadata tab. -datasetfieldtype.publicationCitation.description=The full bibliographic citation for this related publication. -datasetfieldtype.publicationIDType.description=The type of digital identifier used for this publication (e.g., Digital Object Identifier (DOI)). -datasetfieldtype.publicationIDNumber.description=The identifier for the selected ID type. -datasetfieldtype.publicationURL.description=Link to the publication web page (e.g., journal article page, archive record page, or other). -datasetfieldtype.notesText.description=Additional important information about the Dataset. -datasetfieldtype.language.description=Language of the Dataset -datasetfieldtype.producer.description=Person or organization with the financial or administrative responsibility over this Dataset -datasetfieldtype.producerName.description=Producer name -datasetfieldtype.producerAffiliation.description=The organization with which the producer is affiliated. -datasetfieldtype.producerAbbreviation.description=The abbreviation by which the producer is commonly known. (ex. IQSS, ICPSR) -datasetfieldtype.producerURL.description=Producer URL points to the producer's web presence, if appropriate. Enter an absolute URL where the producer's web site is found, such as http://www.my.org. -datasetfieldtype.producerLogoURL.description=URL for the producer's logo, which points to this producer's web-accessible logo image. Enter an absolute URL where the producer's logo image is found, such as http://www.my.org/images/logo.gif. -datasetfieldtype.productionDate.description=Date when the data collection or other materials were produced (not distributed, published or archived). -datasetfieldtype.productionPlace.description=The location where the data collection and any other related materials were produced. -datasetfieldtype.contributor.description=The organization or person responsible for either collecting, managing, or otherwise contributing in some form to the development of the resource. -datasetfieldtype.contributorType.description=The type of contributor of the resource. -datasetfieldtype.contributorName.description=The Family Name, Given Name or organization name of the contributor. -datasetfieldtype.grantNumber.description=Grant Information -datasetfieldtype.grantNumberAgency.description=Grant Number Agency -datasetfieldtype.grantNumberValue.description=The grant or contract number of the project that sponsored the effort. -datasetfieldtype.distributor.description=The organization designated by the author or producer to generate copies of the particular work including any necessary editions or revisions. -datasetfieldtype.distributorName.description=Distributor name -datasetfieldtype.distributorAffiliation.description=The organization with which the distributor contact is affiliated. -datasetfieldtype.distributorAbbreviation.description=The abbreviation by which this distributor is commonly known (e.g., IQSS, ICPSR). -datasetfieldtype.distributorURL.description=Distributor URL points to the distributor's web presence, if appropriate. Enter an absolute URL where the distributor's web site is found, such as http://www.my.org. -datasetfieldtype.distributorLogoURL.description=URL of the distributor's logo, which points to this distributor's web-accessible logo image. Enter an absolute URL where the distributor's logo image is found, such as http://www.my.org/images/logo.gif. -datasetfieldtype.distributionDate.description=Date that the work was made available for distribution/presentation. -datasetfieldtype.depositor.description=The person (Family Name, Given Name) or the name of the organization that deposited this Dataset to the repository. -datasetfieldtype.dateOfDeposit.description=Date that the Dataset was deposited into the repository. -datasetfieldtype.timePeriodCovered.description=Time period to which the data refer. This item reflects the time period covered by the data, not the dates of coding or making documents machine-readable or the dates the data were collected. Also known as span. -datasetfieldtype.timePeriodCoveredStart.description=Start date which reflects the time period covered by the data, not the dates of coding or making documents machine-readable or the dates the data were collected. -datasetfieldtype.timePeriodCoveredEnd.description=End date which reflects the time period covered by the data, not the dates of coding or making documents machine-readable or the dates the data were collected. -datasetfieldtype.dateOfCollection.description=Contains the date(s) when the data were collected. -datasetfieldtype.dateOfCollectionStart.description=Date when the data collection started. -datasetfieldtype.dateOfCollectionEnd.description=Date when the data collection ended. -datasetfieldtype.kindOfData.description=Type of data included in the file: survey data, census/enumeration data, aggregate data, clinical data, event/transaction data, program source code, machine-readable text, administrative records data, experimental data, psychological test, textual data, coded textual, coded documents, time budget diaries, observation data/ratings, process-produced data, or other. -datasetfieldtype.series.description=Information about the Dataset series. -datasetfieldtype.seriesName.description=Name of the dataset series to which the Dataset belongs. -datasetfieldtype.seriesInformation.description=History of the series and summary of those features that apply to the series as a whole. -datasetfieldtype.software.description=Information about the software used to generate the Dataset. -datasetfieldtype.softwareName.description=Name of software used to generate the Dataset. -datasetfieldtype.softwareVersion.description=Version of the software used to generate the Dataset. -datasetfieldtype.relatedMaterial.description=Any material related to this Dataset. -datasetfieldtype.relatedDatasets.description=Any Datasets that are related to this Dataset, such as previous research on this subject. -datasetfieldtype.otherReferences.description=Any references that would serve as background or supporting material to this Dataset. -datasetfieldtype.dataSources.description=List of books, articles, serials, or machine-readable data files that served as the sources of the data collection. -datasetfieldtype.originOfSources.description=For historical materials, information about the origin of the sources and the rules followed in establishing the sources should be specified. -datasetfieldtype.characteristicOfSources.description=Assessment of characteristics and source material. -datasetfieldtype.accessToSources.description=Level of documentation of the original sources. -datasetfieldtype.title.watermark=Enter title... +datasetfieldtype.title.description=The main title of the Dataset +datasetfieldtype.subtitle.description=A secondary title that amplifies or states certain limitations on the main title +datasetfieldtype.alternativeTitle.description=Either 1) a title commonly used to refer to the Dataset or 2) an abbreviation of the main title +datasetfieldtype.alternativeURL.description=Another URL where one can view or access the data in the Dataset, e.g. a project or personal webpage +datasetfieldtype.otherId.description=Another unique identifier for the Dataset (e.g. producer's or another repository's identifier) +datasetfieldtype.otherIdAgency.description=The name of the agency that generated the other identifier +datasetfieldtype.otherIdValue.description=Another identifier uniquely identifies the Dataset +datasetfieldtype.author.description=The entity, e.g. a person or organization, that created the Dataset +datasetfieldtype.authorName.description=The name of the author, such as the person's name or the name of an organization +datasetfieldtype.authorAffiliation.description=The name of the entity affiliated with the author, e.g. an organization's name +datasetfieldtype.authorIdentifierScheme.description=The type of identifier that uniquely identifies the author (e.g. ORCID, ISNI) +datasetfieldtype.authorIdentifier.description=Uniquely identifies the author when paired with an identifier type +datasetfieldtype.datasetContact.description=The entity, e.g. a person or organization, that users of the Dataset can contact with questions +datasetfieldtype.datasetContactName.description=The name of the point of contact, e.g. the person's name or the name of an organization +datasetfieldtype.datasetContactAffiliation.description=The name of the entity affiliated with the point of contact, e.g. an organization's name +datasetfieldtype.datasetContactEmail.description=The point of contact's email address +datasetfieldtype.dsDescription.description=A summary describing the purpose, nature, and scope of the Dataset +datasetfieldtype.dsDescriptionValue.description=A summary describing the purpose, nature, and scope of the Dataset +datasetfieldtype.dsDescriptionDate.description=The date when the description was added to the Dataset. If the Dataset contains more than one description, e.g. the data producer supplied one description and the data repository supplied another, this date is used to distinguish between the descriptions +datasetfieldtype.subject.description=The area of study relevant to the Dataset +datasetfieldtype.keyword.description=A key term that describes an important aspect of the Dataset and information about any controlled vocabulary used +datasetfieldtype.keywordValue.description=A key term that describes important aspects of the Dataset +datasetfieldtype.keywordVocabulary.description=The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) +datasetfieldtype.keywordVocabularyURI.description=The URL where one can access information about the term's controlled vocabulary +datasetfieldtype.topicClassification.description=Indicates a broad, important topic or subject that the Dataset covers and information about any controlled vocabulary used +datasetfieldtype.topicClassValue.description=A topic or subject term +datasetfieldtype.topicClassVocab.description=The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) +datasetfieldtype.topicClassVocabURI.description=The URL where one can access information about the term's controlled vocabulary +datasetfieldtype.publication.description=The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab +datasetfieldtype.publicationCitation.description=The full bibliographic citation for the related publication +datasetfieldtype.publicationIDType.description=The type of identifier that uniquely identifies a related publication +datasetfieldtype.publicationIDNumber.description=The identifier for a related publication +datasetfieldtype.publicationURL.description=The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage +datasetfieldtype.notesText.description=Additional information about the Dataset +datasetfieldtype.language.description=A language that the Dataset's files is written in +datasetfieldtype.producer.description=The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset +datasetfieldtype.producerName.description=The name of the entity, e.g. the person's name or the name of an organization +datasetfieldtype.producerAffiliation.description=The name of the entity affiliated with the producer, e.g. an organization's name +datasetfieldtype.producerAbbreviation.description=The producer's abbreviated name (e.g. IQSS, ICPSR) +datasetfieldtype.producerURL.description=The URL of the producer's website +datasetfieldtype.producerLogoURL.description=The URL of the producer's logo +datasetfieldtype.productionDate.description=The date when the data were produced (not distributed, published, or archived) +datasetfieldtype.productionPlace.description=The location where the data and any related materials were produced or collected +datasetfieldtype.contributor.description=The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset +datasetfieldtype.contributorType.description=Indicates the type of contribution made to the dataset +datasetfieldtype.contributorName.description=The name of the contributor, e.g. the person's name or the name of an organization +datasetfieldtype.grantNumber.description=Information about the Dataset's financial support +datasetfieldtype.grantNumberAgency.description=The agency that provided financial support for the Dataset +datasetfieldtype.grantNumberValue.description=The grant identifier or contract identifier of the agency that provided financial support for the Dataset +datasetfieldtype.distributor.description=The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions +datasetfieldtype.distributorName.description=The name of the entity, e.g. the person's name or the name of an organization +datasetfieldtype.distributorAffiliation.description=The name of the entity affiliated with the distributor, e.g. an organization's name +datasetfieldtype.distributorAbbreviation.description=The distributor's abbreviated name (e.g. IQSS, ICPSR) +datasetfieldtype.distributorURL.description=The URL of the distributor's webpage +datasetfieldtype.distributorLogoURL.description=The URL of the distributor's logo image, used to show the image on the Dataset's page +datasetfieldtype.distributionDate.description=The date when the Dataset was made available for distribution/presentation +datasetfieldtype.depositor.description=The entity, such as a person or organization, that deposited the Dataset in the repository +datasetfieldtype.dateOfDeposit.description=The date when the Dataset was deposited into the repository +datasetfieldtype.timePeriodCovered.description=The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable +datasetfieldtype.timePeriodCoveredStart.description=The start date of the time period that the data refer to +datasetfieldtype.timePeriodCoveredEnd.description=The end date of the time period that the data refer to +datasetfieldtype.dateOfCollection.description=The dates when the data were collected or generated +datasetfieldtype.dateOfCollectionStart.description=The date when the data collection started +datasetfieldtype.dateOfCollectionEnd.description=The date when the data collection ended +datasetfieldtype.kindOfData.description=The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) +datasetfieldtype.series.description=Information about the dataset series to which the Dataset belong +datasetfieldtype.seriesName.description=The name of the dataset series +datasetfieldtype.seriesInformation.description=Can include 1) a history of the series and 2) a summary of features that apply to the series +datasetfieldtype.software.description=Information about the software used to generate the Dataset +datasetfieldtype.softwareName.description=The name of software used to generate the Dataset +datasetfieldtype.softwareVersion.description=The version of the software used to generate the Dataset, e.g. 4.11 +datasetfieldtype.relatedMaterial.description=Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset +datasetfieldtype.relatedDatasets.description=Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject +datasetfieldtype.otherReferences.description=Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset +datasetfieldtype.dataSources.description=Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) +datasetfieldtype.originOfSources.description=For historical sources, the origin and any rules followed in establishing them as sources +datasetfieldtype.characteristicOfSources.description=Characteristics not already noted elsewhere +datasetfieldtype.accessToSources.description=1) Methods or procedures for accessing data sources and 2) any special permissions needed for access +datasetfieldtype.title.watermark= datasetfieldtype.subtitle.watermark= datasetfieldtype.alternativeTitle.watermark= -datasetfieldtype.alternativeURL.watermark=Enter full URL, starting with http:// +datasetfieldtype.alternativeURL.watermark=https:// datasetfieldtype.otherId.watermark= datasetfieldtype.otherIdAgency.watermark= datasetfieldtype.otherIdValue.watermark= datasetfieldtype.author.watermark= -datasetfieldtype.authorName.watermark=FamilyName, GivenName or Organization -datasetfieldtype.authorAffiliation.watermark= +datasetfieldtype.authorName.watermark=1) Family Name, Given Name or 2) Organization XYZ +datasetfieldtype.authorAffiliation.watermark=Organization XYZ datasetfieldtype.authorIdentifierScheme.watermark= datasetfieldtype.authorIdentifier.watermark= datasetfieldtype.datasetContact.watermark= -datasetfieldtype.datasetContactName.watermark=FamilyName, GivenName or Organization -datasetfieldtype.datasetContactAffiliation.watermark= -datasetfieldtype.datasetContactEmail.watermark= +datasetfieldtype.datasetContactName.watermark=1) FamilyName, GivenName or 2) Organization +datasetfieldtype.datasetContactAffiliation.watermark=Organization XYZ +datasetfieldtype.datasetContactEmail.watermark=name@email.xyz datasetfieldtype.dsDescription.watermark= datasetfieldtype.dsDescriptionValue.watermark= datasetfieldtype.dsDescriptionDate.watermark=YYYY-MM-DD @@ -179,40 +180,40 @@ datasetfieldtype.subject.watermark= datasetfieldtype.keyword.watermark= datasetfieldtype.keywordValue.watermark= datasetfieldtype.keywordVocabulary.watermark= -datasetfieldtype.keywordVocabularyURI.watermark=Enter full URL, starting with http:// +datasetfieldtype.keywordVocabularyURI.watermark=https:// datasetfieldtype.topicClassification.watermark= datasetfieldtype.topicClassValue.watermark= datasetfieldtype.topicClassVocab.watermark= -datasetfieldtype.topicClassVocabURI.watermark=Enter full URL, starting with http:// +datasetfieldtype.topicClassVocabURI.watermark=https:// datasetfieldtype.publication.watermark= datasetfieldtype.publicationCitation.watermark= datasetfieldtype.publicationIDType.watermark= datasetfieldtype.publicationIDNumber.watermark= -datasetfieldtype.publicationURL.watermark=Enter full URL, starting with http:// +datasetfieldtype.publicationURL.watermark=https:// datasetfieldtype.notesText.watermark= datasetfieldtype.language.watermark= datasetfieldtype.producer.watermark= -datasetfieldtype.producerName.watermark=FamilyName, GivenName or Organization -datasetfieldtype.producerAffiliation.watermark= +datasetfieldtype.producerName.watermark=1) FamilyName, GivenName or 2) Organization +datasetfieldtype.producerAffiliation.watermark=Organization XYZ datasetfieldtype.producerAbbreviation.watermark= -datasetfieldtype.producerURL.watermark=Enter full URL, starting with http:// -datasetfieldtype.producerLogoURL.watermark=Enter full URL for image, starting with http:// +datasetfieldtype.producerURL.watermark=https:// +datasetfieldtype.producerLogoURL.watermark=https:// datasetfieldtype.productionDate.watermark=YYYY-MM-DD datasetfieldtype.productionPlace.watermark= datasetfieldtype.contributor.watermark= datasetfieldtype.contributorType.watermark= -datasetfieldtype.contributorName.watermark=FamilyName, GivenName or Organization +datasetfieldtype.contributorName.watermark=1) FamilyName, GivenName or 2) Organization datasetfieldtype.grantNumber.watermark= -datasetfieldtype.grantNumberAgency.watermark= +datasetfieldtype.grantNumberAgency.watermark=Organization XYZ datasetfieldtype.grantNumberValue.watermark= datasetfieldtype.distributor.watermark= -datasetfieldtype.distributorName.watermark=FamilyName, GivenName or Organization -datasetfieldtype.distributorAffiliation.watermark= +datasetfieldtype.distributorName.watermark=1) FamilyName, GivenName or 2) Organization +datasetfieldtype.distributorAffiliation.watermark=Organization XYZ datasetfieldtype.distributorAbbreviation.watermark= -datasetfieldtype.distributorURL.watermark=Enter full URL, starting with http:// -datasetfieldtype.distributorLogoURL.watermark=Enter full URL for image, starting with http:// +datasetfieldtype.distributorURL.watermark=https:// +datasetfieldtype.distributorLogoURL.watermark=https:// datasetfieldtype.distributionDate.watermark=YYYY-MM-DD -datasetfieldtype.depositor.watermark= +datasetfieldtype.depositor.watermark=1) FamilyName, GivenName or 2) Organization datasetfieldtype.dateOfDeposit.watermark=YYYY-MM-DD datasetfieldtype.timePeriodCovered.watermark= datasetfieldtype.timePeriodCoveredStart.watermark=YYYY-MM-DD @@ -265,6 +266,7 @@ controlledvocabulary.publicationIDType.purl=purl controlledvocabulary.publicationIDType.upc=upc controlledvocabulary.publicationIDType.url=url controlledvocabulary.publicationIDType.urn=urn +controlledvocabulary.publicationIDType.dash-nrs=DASH-NRS controlledvocabulary.contributorType.data_collector=Data Collector controlledvocabulary.contributorType.data_curator=Data Curator controlledvocabulary.contributorType.data_manager=Data Manager diff --git a/src/main/java/propertyFiles/computationalworkflow.properties b/src/main/java/propertyFiles/computationalworkflow.properties new file mode 100644 index 00000000000..eb15ecf9982 --- /dev/null +++ b/src/main/java/propertyFiles/computationalworkflow.properties @@ -0,0 +1,27 @@ +metadatablock.name=computationalworkflow +metadatablock.displayName=Computational Workflow Metadata +metadatablock.displayFacet=Computational Workflow +datasetfieldtype.workflowType.title=Workflow Type +datasetfieldtype.workflowType.description=The kind of Computational Workflow, which is designed to compose and execute a series of computational or data manipulation steps in a scientific application +datasetfieldtype.workflowType.watermark= +datasetfieldtype.workflowCodeRepository.title=External Code Repository URL +datasetfieldtype.workflowCodeRepository.description=A link to another public repository where the un-compiled, human-readable code and related code is also located (e.g., GitHub, GitLab, SVN) +datasetfieldtype.workflowCodeRepository.watermark=https://... +datasetfieldtype.workflowDocumentation.title=Documentation +datasetfieldtype.workflowDocumentation.description=A link (URL) to the documentation or text describing the Computational Workflow and its use +datasetfieldtype.workflowDocumentation.watermark= +controlledvocabulary.workflowType.common_workflow_language_(cwl)=Common Workflow Language (CWL) +controlledvocabulary.workflowType.workflow_description_language_(wdl)=Workflow Description Language (WDL) +controlledvocabulary.workflowType.nextflow=Nextflow +controlledvocabulary.workflowType.snakemake=Snakemake +controlledvocabulary.workflowType.ruffus=Ruffus +controlledvocabulary.workflowType.jupyter_notebook=Jupyter Notebook +controlledvocabulary.workflowType.r_notebook=R Notebook +controlledvocabulary.workflowType.dagman=DAGMan +controlledvocabulary.workflowType.matlab_script=MATLAB Script +controlledvocabulary.workflowType.bash_script=Bash Script +controlledvocabulary.workflowType.makefile=Makefile +controlledvocabulary.workflowType.other_python-based_workflow=Other Python-based workflow +controlledvocabulary.workflowType.other_r-based_workflow=Other R-based workflow +controlledvocabulary.workflowType.other=Other + diff --git a/src/main/java/propertyFiles/customARCS.properties b/src/main/java/propertyFiles/customARCS.properties index e6665b94e64..8a19405208a 100644 --- a/src/main/java/propertyFiles/customARCS.properties +++ b/src/main/java/propertyFiles/customARCS.properties @@ -1,5 +1,6 @@ metadatablock.name=customARCS metadatablock.displayName=Alliance for Research on Corporate Sustainability Metadata +metadatablock.displayFacet=Alliance for Research on Corporate Sustainability datasetfieldtype.ARCS1.title=1) Were any of these data sets a) purchased, b) obtained through licensed databases, or c) provided by an organization under a nondisclosure or other agreement? datasetfieldtype.ARCS2.title=2) If you responded Yes to Q1, have you ensured that sharing the data does not violate terms of the agreement? If you responded No to Q1, please enter N/A here. datasetfieldtype.ARCS3.title=3) Do any of these data sets include individual-level data (either collected or pre-existing in the dataset) that might make them subject to U.S. or international human subjects considerations? diff --git a/src/main/java/propertyFiles/customCHIA.properties b/src/main/java/propertyFiles/customCHIA.properties index 0b05e388cee..0d59493da96 100644 --- a/src/main/java/propertyFiles/customCHIA.properties +++ b/src/main/java/propertyFiles/customCHIA.properties @@ -1,5 +1,6 @@ metadatablock.name=customCHIA metadatablock.displayName=CHIA Metadata +metadatablock.displayFacet=CHIA datasetfieldtype.sourceCHIA.title=Source datasetfieldtype.datesAdditionalInformationCHIA.title=Dates - Additional Information datasetfieldtype.variablesCHIA.title=Variables diff --git a/src/main/java/propertyFiles/customDigaai.properties b/src/main/java/propertyFiles/customDigaai.properties index 85d7df1f2b7..10bb8f23786 100644 --- a/src/main/java/propertyFiles/customDigaai.properties +++ b/src/main/java/propertyFiles/customDigaai.properties @@ -1,5 +1,6 @@ metadatablock.name=customDigaai metadatablock.displayName=Digaai Metadata +metadatablock.displayFacet=Digaai datasetfieldtype.titulo.title=Título datasetfieldtype.numero.title=Número datasetfieldtype.datadePublicao.title=Data de Publicação @@ -52,4 +53,4 @@ controlledvocabulary.titulo.tc_brazil=TC Brazil controlledvocabulary.titulo.texas_magazine=Texas Magazine controlledvocabulary.titulo.the_brazilian_journal=The Brazilian Journal controlledvocabulary.titulo.today_magazine=Today Magazine -controlledvocabulary.titulo.viver_magazine=Viver Magazine \ No newline at end of file +controlledvocabulary.titulo.viver_magazine=Viver Magazine diff --git a/src/main/java/propertyFiles/customGSD.properties b/src/main/java/propertyFiles/customGSD.properties index 15f118c73c4..40dc0328053 100644 --- a/src/main/java/propertyFiles/customGSD.properties +++ b/src/main/java/propertyFiles/customGSD.properties @@ -1,5 +1,6 @@ metadatablock.name=customGSD metadatablock.displayName=Graduate School of Design Metadata +metadatablock.displayFacet=Graduate School of Design datasetfieldtype.gsdStudentName.title=Student Name datasetfieldtype.gsdStudentProgram.title=Student's Program of Study datasetfieldtype.gsdCourseName.title=Course Name diff --git a/src/main/java/propertyFiles/customMRA.properties b/src/main/java/propertyFiles/customMRA.properties index 8d905d266f0..5a702b980cc 100644 --- a/src/main/java/propertyFiles/customMRA.properties +++ b/src/main/java/propertyFiles/customMRA.properties @@ -1,5 +1,6 @@ metadatablock.name=customMRA metadatablock.displayName=MRA Metadata +metadatablock.displayFacet=MRA datasetfieldtype.mraCollection.title=Murray Research Archive Collection datasetfieldtype.mraCollection.description=Browse the Murray Research Archive collection with the following terms. datasetfieldtype.mraCollection.watermark= diff --git a/src/main/java/propertyFiles/customPSI.properties b/src/main/java/propertyFiles/customPSI.properties index e72e4e50222..a88b7409c5a 100644 --- a/src/main/java/propertyFiles/customPSI.properties +++ b/src/main/java/propertyFiles/customPSI.properties @@ -1,5 +1,6 @@ metadatablock.name=customPSI metadatablock.displayName=PSI Metadata +metadatablock.displayFacet=PSI datasetfieldtype.psiBehavior.title=Behavior datasetfieldtype.psiDonor.title=Donor datasetfieldtype.psiHealthArea.title=Health Area diff --git a/src/main/java/propertyFiles/customPSRI.properties b/src/main/java/propertyFiles/customPSRI.properties index 61370bb9fd1..9e76b412bd8 100644 --- a/src/main/java/propertyFiles/customPSRI.properties +++ b/src/main/java/propertyFiles/customPSRI.properties @@ -1,5 +1,6 @@ metadatablock.name=customPSRI metadatablock.displayName=Political Science Replication Initiative Metadata +metadatablock.displayFacet=Political Science Replication Initiative datasetfieldtype.PSRI1.title=Are the original data publicly available? datasetfieldtype.PSRI2.title=Is the original code available? datasetfieldtype.PSRI3.title=Where are the original data archived (name and url)? diff --git a/src/main/java/propertyFiles/custom_hbgdki.properties b/src/main/java/propertyFiles/custom_hbgdki.properties index 087c706d014..2386b5d00a2 100644 --- a/src/main/java/propertyFiles/custom_hbgdki.properties +++ b/src/main/java/propertyFiles/custom_hbgdki.properties @@ -1,5 +1,6 @@ metadatablock.name=custom_hbgdki metadatablock.displayName=HBGDki Custom Metadata +metadatablock.displayFacet=HBGDki datasetfieldtype.hbgdkiStudyName.title=Name of Study datasetfieldtype.hbgdkiStudyRegistry.title=Study Registry datasetfieldtype.hbgdkiStudyRegistryType.title=ID Type diff --git a/src/main/java/propertyFiles/geospatial.properties b/src/main/java/propertyFiles/geospatial.properties index e47982377cb..04db8d3d05f 100644 --- a/src/main/java/propertyFiles/geospatial.properties +++ b/src/main/java/propertyFiles/geospatial.properties @@ -1,5 +1,6 @@ metadatablock.name=geospatial metadatablock.displayName=Geospatial Metadata +metadatablock.displayFacet=Geospatial datasetfieldtype.geographicCoverage.title=Geographic Coverage datasetfieldtype.country.title=Country / Nation datasetfieldtype.state.title=State / Province @@ -281,4 +282,4 @@ controlledvocabulary.country.western_sahara=Western Sahara controlledvocabulary.country.yemen=Yemen controlledvocabulary.country.zambia=Zambia controlledvocabulary.country.zimbabwe=Zimbabwe -controlledvocabulary.country.aland_islands=Åland Islands \ No newline at end of file +controlledvocabulary.country.aland_islands=Åland Islands diff --git a/src/main/java/propertyFiles/journal.properties b/src/main/java/propertyFiles/journal.properties index e17a9bd6d89..753b5895f0a 100644 --- a/src/main/java/propertyFiles/journal.properties +++ b/src/main/java/propertyFiles/journal.properties @@ -1,5 +1,6 @@ metadatablock.name=journal metadatablock.displayName=Journal Metadata +metadatablock.displayFacet=Journal datasetfieldtype.journalVolumeIssue.title=Journal datasetfieldtype.journalVolume.title=Volume datasetfieldtype.journalIssue.title=Issue diff --git a/src/main/java/propertyFiles/socialscience.properties b/src/main/java/propertyFiles/socialscience.properties index 91e73fa78b9..3698b32573f 100644 --- a/src/main/java/propertyFiles/socialscience.properties +++ b/src/main/java/propertyFiles/socialscience.properties @@ -1,5 +1,6 @@ metadatablock.name=socialscience metadatablock.displayName=Social Science and Humanities Metadata +metadatablock.displayFacet=Social Science and Humanities datasetfieldtype.unitOfAnalysis.title=Unit of Analysis datasetfieldtype.universe.title=Universe datasetfieldtype.timeMethod.title=Time Method diff --git a/src/main/java/propertyFiles/staticSearchFields.properties b/src/main/java/propertyFiles/staticSearchFields.properties new file mode 100644 index 00000000000..ab03de64f23 --- /dev/null +++ b/src/main/java/propertyFiles/staticSearchFields.properties @@ -0,0 +1,11 @@ +#FacetCategory - staticSearchFields +staticSearchFields.metadata_type_ss=Dataset Feature +staticSearchFields.dvCategory=Dataverse Category +staticSearchFields.metadataSource=Metadata Source +staticSearchFields.publicationDate=Publication Year +staticSearchFields.fileTypeGroupFacet=File Type +staticSearchFields.dvObjectType=Type +staticSearchFields.fileTag=File Tag +staticSearchFields.fileAccess=Access +staticSearchFields.publicationStatus=Publication Status +staticSearchFields.subject_ss=Subject \ No newline at end of file diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 09d71dfbf3a..16298d83118 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -1,3 +1,8 @@ +# GENERAL +# Will be replaced by Maven property in /target via filtering (see ) +dataverse.version=${project.version} +dataverse.build= + # DATABASE dataverse.db.host=localhost dataverse.db.port=5432 diff --git a/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.ConfigSourceProvider b/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.ConfigSourceProvider index f2e23ca1b4e..796f03d7ce3 100644 --- a/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.ConfigSourceProvider +++ b/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.ConfigSourceProvider @@ -1,2 +1 @@ edu.harvard.iq.dataverse.settings.spi.AliasConfigSourceProvider -edu.harvard.iq.dataverse.settings.spi.DbSettingConfigSourceProvider diff --git a/src/main/resources/db/migration/V5.11.1.1__8605-support-archival-status.sql b/src/main/resources/db/migration/V5.11.1.1__8605-support-archival-status.sql new file mode 100644 index 00000000000..cf708ad0ea9 --- /dev/null +++ b/src/main/resources/db/migration/V5.11.1.1__8605-support-archival-status.sql @@ -0,0 +1,2 @@ +UPDATE datasetversion SET archivalCopyLocation = CONCAT('{"status":"success", "message":"', archivalCopyLocation,'"}') where archivalCopyLocation is not null and not archivalCopyLocation='Attempted'; +UPDATE datasetversion SET archivalCopyLocation = CONCAT('{"status":"failure", "message":"Attempted"}') where archivalCopyLocation='Attempted'; diff --git a/src/main/resources/db/migration/V5.11.1.3__hdc-3b.sql b/src/main/resources/db/migration/V5.11.1.3__hdc-3b.sql new file mode 100644 index 00000000000..af8143a97d6 --- /dev/null +++ b/src/main/resources/db/migration/V5.11.1.3__hdc-3b.sql @@ -0,0 +1 @@ +ALTER TABLE usernotification ADD COLUMN IF NOT EXISTS additionalinfo VARCHAR; diff --git a/src/main/resources/db/migration/V5.11.1.4__hdc-3b2-template-instructions.sql b/src/main/resources/db/migration/V5.11.1.4__hdc-3b2-template-instructions.sql new file mode 100644 index 00000000000..df1d3068159 --- /dev/null +++ b/src/main/resources/db/migration/V5.11.1.4__hdc-3b2-template-instructions.sql @@ -0,0 +1,14 @@ +ALTER TABLE template ADD COLUMN IF NOT EXISTS instructions TEXT; + +ALTER TABLE dataset ADD COLUMN IF NOT EXISTS template_id BIGINT; + +DO $$ +BEGIN + + BEGIN + ALTER TABLE dataset ADD CONSTRAINT fx_dataset_template_id FOREIGN KEY (template_id) REFERENCES template(id); + EXCEPTION + WHEN duplicate_object THEN RAISE NOTICE 'Table constraint fk_dataset_template_id already exists'; + END; + +END $$; diff --git a/src/main/resources/db/migration/V5.11.1.5__8536-metadata-block-facet.sql b/src/main/resources/db/migration/V5.11.1.5__8536-metadata-block-facet.sql new file mode 100644 index 00000000000..47435004b6d --- /dev/null +++ b/src/main/resources/db/migration/V5.11.1.5__8536-metadata-block-facet.sql @@ -0,0 +1,11 @@ +ALTER TABLE dataverse + ADD COLUMN IF NOT EXISTS metadatablockfacetroot BOOLEAN; + +UPDATE dataverse SET metadatablockfacetroot = false; + +CREATE TABLE IF NOT EXISTS dataversemetadatablockfacet ( + id SERIAL NOT NULL, + dataverse_id BIGINT NOT NULL, + metadatablock_id BIGINT NOT NULL, + PRIMARY KEY (ID) +); diff --git a/src/main/resources/db/migration/V5.11.1.6__storageconstraint.sql b/src/main/resources/db/migration/V5.11.1.6__storageconstraint.sql new file mode 100644 index 00000000000..c2629213e98 --- /dev/null +++ b/src/main/resources/db/migration/V5.11.1.6__storageconstraint.sql @@ -0,0 +1,10 @@ +DO $$ +BEGIN + + BEGIN + ALTER TABLE dvobject ADD CONSTRAINT chk_dvobject_storageidentifier check (strpos(storageidentifier,'..') = 0); + EXCEPTION + WHEN duplicate_object THEN RAISE NOTICE 'Table constraint chk_dvobject_storageidentifier already exists'; + END; + +END $$; diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index 9d381f54dd5..1cbf297bf89 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -236,19 +236,19 @@ -
        - +  
        -
        +
        -
        +
        -
        +
        -
        +
        -
        +
        - - -

        - - - - - - -

        -
        -
        - +
         #{bundle['messages.info']} – @@ -325,13 +313,13 @@
        + value="#{termsOfUseAndAccess.fileAccessRequest}" disabled="#{datasetPage == true and !DatasetPage.hasRestrictedFiles}">
        -
        +
        #{bundle['file.dataFilesTab.terms.list.termsOfAccess.requestAccess.warning.outofcompliance']}
        @@ -343,7 +331,7 @@
        + rows="5" styleClass="form-control" disabled="#{datasetPage == true and !DatasetPage.hasRestrictedFiles}" />
        @@ -351,7 +339,7 @@ -
        +
        diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 936c43d07a7..b1612a314fc 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -131,7 +131,7 @@ - + @@ -147,6 +147,24 @@ + + + + + + + + + + + + + + + + + diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index b08c06e1568..1bb862721a5 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -217,6 +217,14 @@ + +
      • + + + +
      • +
        + @@ -401,8 +409,8 @@ -
      • - +
      • + #{bundle['dataset.editBtn.itemLabel.permissions']} - @@ -749,6 +757,7 @@ + @@ -836,6 +845,7 @@ + @@ -886,6 +896,7 @@
      • + @@ -904,6 +915,7 @@ + @@ -923,7 +935,8 @@
        + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -402,6 +430,19 @@ + + + + + + + + + + #{item.theObject.getDisplayName()} + + + diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index b7c2943c3e2..0fd5bf48fb7 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -167,6 +167,11 @@
        +
        +

        #{bundle['file.fromGlobus.tip']}

        +

        #{bundle['file.fromGlobusAfterCreate.tip']}

        + +
        @@ -292,9 +297,9 @@
        -
        +
        - +