diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index cf982992c03..b297dfc4ee8 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,11 +1,13 @@ --- name: Bug report -about: Did you encounter something unexpected or incorrect in the Dataverse software? We'd like to hear about it! +about: Did you encounter something unexpected or incorrect in the Dataverse software? + We'd like to hear about it! title: '' labels: '' assignees: '' --- + - + SAML2 SAML1 diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json new file mode 100644 index 00000000000..b188520dabb --- /dev/null +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json @@ -0,0 +1,26 @@ +{ + "displayName": "AuxFileViewer", + "description": "Show an auxiliary file from a dataset file.", + "toolName": "auxPreviewer", + "scope": "file", + "types": [ + "preview" + ], + "toolUrl": "https://example.com/AuxFileViewer.html", + "toolParameters": { + "queryParameters": [ + { + "fileid": "{fileId}" + } + ] + }, + "requirements": { + "auxFilesExist": [ + { + "formatTag": "myFormatTag", + "formatVersion": "0.1" + } + ] + }, + "contentType": "application/foobar" +} diff --git a/doc/sphinx-guides/source/admin/harvestclients.rst b/doc/sphinx-guides/source/admin/harvestclients.rst index e94a6aa1730..02783e4b97a 100644 --- a/doc/sphinx-guides/source/admin/harvestclients.rst +++ b/doc/sphinx-guides/source/admin/harvestclients.rst @@ -21,6 +21,8 @@ Clients are managed on the "Harvesting Clients" page accessible via the :doc:`da The process of creating a new, or editing an existing client, is largely self-explanatory. It is split into logical steps, in a way that allows the user to go back and correct the entries made earlier. The process is interactive and guidance text is provided. For example, the user is required to enter the URL of the remote OAI server. When they click *Next*, the application will try to establish a connection to the server in order to verify that it is working, and to obtain the information about the sets of metadata records and the metadata formats it supports. The choices offered to the user on the next page will be based on this extra information. If the application fails to establish a connection to the remote archive at the address specified, or if an invalid response is received, the user is given an opportunity to check and correct the URL they entered. +Note that as of 5.13, a new entry "Custom HTTP Header" has been added to the Step 1. of Create or Edit form. This optional field can be used to configure this client with a specific HTTP header to be added to every OAI request. This is to accommodate a (rare) use case where the remote server may require a special token of some kind in order to offer some content not available to other clients. Most OAI servers offer the same publicly-available content to all clients, so few admins will have a use for this feature. It is however on the very first, Step 1. screen in case the OAI server requires this token even for the "ListSets" and "ListMetadataFormats" requests, which need to be sent in the Step 2. of creating or editing a client. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. + How to Stop a Harvesting Run in Progress ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index b29e51b581d..1888fd89761 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -116,6 +116,8 @@ Binder Researchers can launch Jupyter Notebooks, RStudio, and other computational environments by entering the DOI of a dataset in a Dataverse installation on https://mybinder.org +A Binder button can also be added to every dataset page to launch Binder from there. See :doc:`external-tools`. + Institutions can self host BinderHub. The Dataverse Project is one of the supported `repository providers `_. Renku diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index 78b8c8ce223..200c3a3e342 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -57,3 +57,13 @@ Downloading Metadata via API ---------------------------- The :doc:`/api/native-api` section of the API Guide explains how end users can download the metadata formats above via API. + +Exporter Configuration +---------------------- + +Two exporters - Schema.org JSONLD and OpenAire - use an algorithm to determine whether an author, or contact, name belongs to a person or organization. While the algorithm works well, there are cases in which it makes mistakes, usually inferring that an organization is a person. + +The Dataverse software implements two jvm-options that can be used to tune the algorithm: + +- :ref:`dataverse.personOrOrg.assumeCommaInPersonName` - boolean, default false. If true, Dataverse will assume any name without a comma must be an organization. This may be most useful for curated Dataverse instances that enforce the "family name, given name" convention. +- :ref:`dataverse.personOrOrg.orgPhraseArray` - a JsonArray of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. diff --git a/doc/sphinx-guides/source/api/external-tools.rst b/doc/sphinx-guides/source/api/external-tools.rst index 4f6c9a8015c..eec9944338f 100644 --- a/doc/sphinx-guides/source/api/external-tools.rst +++ b/doc/sphinx-guides/source/api/external-tools.rst @@ -53,15 +53,21 @@ External tools must be expressed in an external tool manifest file, a specific J Examples of Manifests +++++++++++++++++++++ -Let's look at two examples of external tool manifests (one at the file level and one at the dataset level) before we dive into how they work. +Let's look at a few examples of external tool manifests (both at the file level and at the dataset level) before we dive into how they work. + +.. _tools-for-files: External Tools for Files ^^^^^^^^^^^^^^^^^^^^^^^^ -:download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` is a file level both an "explore" tool and a "preview" tool that operates on tabular files: +:download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` is a file level (both an "explore" tool and a "preview" tool) that operates on tabular files: .. literalinclude:: ../_static/installation/files/root/external-tools/fabulousFileTool.json +:download:`auxFileTool.json <../_static/installation/files/root/external-tools/auxFileTool.json>` is a file level preview tool that operates on auxiliary files associated with a data file (note the "requirements" section): + +.. literalinclude:: ../_static/installation/files/root/external-tools/auxFileTool.json + External Tools for Datasets ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -113,6 +119,10 @@ Terminology allowedApiCalls httpMethod Which HTTP method the specified callback uses such as ``GET`` or ``POST``. allowedApiCalls timeOut For non-public datasets and datafiles, how many minutes the signed URLs given to the tool should be valid for. Must be an integer. + + requirements **Resources your tool needs to function.** For now, the only requirement you can specify is that one or more auxiliary files exist (see auxFilesExist in the :ref:`tools-for-files` example). Currently, requirements only apply to preview tools. If the requirements are not met, the preview tool is not shown. + + auxFilesExist **An array containing formatTag and formatVersion pairs** for each auxiliary file that your tool needs to download to function properly. For example, a required aux file could have a ``formatTag`` of "NcML" and a ``formatVersion`` of "1.0". See also :doc:`/developers/aux-file-support`. toolName A **name** of an external tool that is used to differentiate between external tools and also used in bundle.properties for localization in the Dataverse installation web interface. For example, the toolName for Data Explorer is ``explorer``. For the Data Curation Tool the toolName is ``dct``. This is an optional parameter in the manifest JSON file. =========================== ========== diff --git a/doc/sphinx-guides/source/api/metrics.rst b/doc/sphinx-guides/source/api/metrics.rst index 6a878d73a98..f1eb1f88c71 100755 --- a/doc/sphinx-guides/source/api/metrics.rst +++ b/doc/sphinx-guides/source/api/metrics.rst @@ -72,7 +72,7 @@ Return Formats There are a number of API calls that provide time series, information reported per item (e.g. per dataset, per file, by subject, by category, and by file Mimetype), or both (time series per item). Because these calls all report more than a single number, the API provides two optional formats for the return that can be selected by specifying an HTTP Accept Header for the desired format: -* application/json - a JSON array of objects. For time-series, the objects include key/values for the ``date`` and ``count`` for that month. For per-item calls, the objects include the item (e.g. for a subject), or it's id/pid (for a dataset or datafile). For timeseries per-item, the objects also include a date. In all cases, the response is a single array. +* application/json - a JSON array of objects. For time-series, the objects include key/values for the ``date`` and ``count`` for that month. For per-item calls, the objects include the item (e.g. for a subject), or it's id/pid (for a dataset or datafile (which may/may not not have a PID)). For timeseries per-item, the objects also include a date. In all cases, the response is a single array. * Example: ``curl -H 'Accept:application/json' https://demo.dataverse.org/api/info/metrics/downloads/monthly`` @@ -120,7 +120,7 @@ Example: ``curl https://demo.dataverse.org/api/info/metrics/makeDataCount/viewsT Endpoint Table -------------- -The following table lists the available metrics endpoints (not including the Make Data Counts endpoints a single dataset which are part of the :doc:`/api/native-api`) along with additional notes about them. +The following table lists the available metrics endpoints (not including the Make Data Counts endpoints for a single dataset which are part of the :doc:`/api/native-api`) along with additional notes about them. .. csv-table:: Metrics Endpoints diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 76ca38fdc70..3cd469e3883 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -552,6 +552,8 @@ You should expect an HTTP 200 ("OK") response and JSON indicating the database I .. note:: Only a Dataverse installation account with superuser permissions is allowed to include files when creating a dataset via this API. Adding files this way only adds their file metadata to the database, you will need to manually add the physical files to the file system. +.. _api-import-dataset: + Import a Dataset into a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -728,13 +730,12 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/datasets/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/J8SJZB - -|CORS| Show the dataset whose id is passed: +|CORS| Show the dataset whose database id is passed: .. code-block:: bash export SERVER_URL=https://demo.dataverse.org - export ID=408730 + export ID=24 curl $SERVER_URL/api/datasets/$ID @@ -742,7 +743,7 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl https://demo.dataverse.org/api/datasets/408730 + curl https://demo.dataverse.org/api/datasets/24 The dataset id can be extracted from the response retrieved from the API which uses the persistent identifier (``/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER``). @@ -1511,6 +1512,45 @@ The fully expanded example above (without environment variables) looks like this curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB -F 'jsonData={"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' +.. _cleanup-storage-api: + +Cleanup storage of a Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is an experimental feature and should be tested on your system before using it in production. +Also, make sure that your backups are up-to-date before using this on production servers. +It is advised to first call this method with the ``dryrun`` parameter set to ``true`` before actually deleting the files. +This will allow you to manually inspect the files that would be deleted if that parameter is set to ``false`` or is omitted (a list of the files that would be deleted is provided in the response). + +If your Dataverse installation has been configured to support direct uploads, or in some other situations, +you could end up with some files in the storage of a dataset that are not linked to that dataset directly. Most commonly, this could +happen when an upload fails in the middle of a transfer, i.e. if a user does a UI direct upload and leaves the page without hitting cancel or save, +Dataverse doesn't know and doesn't clean up the files. Similarly in the direct upload API, if the final /addFiles call isn't done, the files are abandoned. + +All the files stored in the Dataset storage location that are not in the file list of that Dataset (and follow the naming pattern of the dataset files) can be removed, as shown in the example below. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/J8SJZB + export DRYRUN=true + + curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/:persistentId/cleanStorage?persistentId=$PERSISTENT_ID&dryrun=$DRYRUN" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X GET https://demo.dataverse.org/api/datasets/:persistentId/cleanStorage?persistentId=doi:10.5072/FK2/J8SJZB&dryrun=true + +Adding Files To a Dataset via Other Tools +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In some circumstances, it may be useful to move or copy files into Dataverse's storage manually or via external tools and then add then to a dataset (i.e. without involving Dataverse in the file transfer itself). +Two API calls are available for this use case to add files to a dataset or to replace files that were already in the dataset. +These calls were developed as part of Dataverse's direct upload mechanism and are detailed in :doc:`/developers/s3-direct-upload-api`. + Report the data (file) size of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2051,6 +2091,77 @@ The response is a JSON object described in the :doc:`/api/external-tools` sectio Files ----- +Get JSON Representation of a File +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: Files can be accessed using persistent identifiers. This is done by passing the constant ``:persistentId`` where the numeric id of the file is expected, and then passing the actual persistent id as a query parameter with the name ``persistentId``. + +Example: Getting the file whose DOI is *10.5072/FK2/J8SJZB*: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + +You may get its draft version of an unpublished file if you pass an api token with view draft permissions: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" $SERVER/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + + +|CORS| Show the file whose id is passed: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export ID=408730 + + curl $SERVER_URL/api/file/$ID + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl https://demo.dataverse.org/api/files/408730 + +You may get its draft version of an published file if you pass an api token with view draft permissions and use the draft path parameter: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" $SERVER/api/files/:persistentId/draft/?persistentId=$PERSISTENT_IDENTIFIER + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/files/:persistentId/draft/?persistentId=doi:10.5072/FK2/J8SJZB + +The file id can be extracted from the response retrieved from the API which uses the persistent identifier (``/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER``). + Adding Files ~~~~~~~~~~~~ @@ -2248,6 +2359,47 @@ Currently the following methods are used to detect file types: - The file extension (e.g. ".ipybn") is used, defined in a file called ``MimeTypeDetectionByFileExtension.properties``. - The file name (e.g. "Dockerfile") is used, defined in a file called ``MimeTypeDetectionByFileName.properties``. +.. _extractNcml: + +Extract NcML +~~~~~~~~~~~~ + +As explained in the :ref:`netcdf-and-hdf5` section of the User Guide, when those file types are uploaded, an attempt is made to extract an NcML file from them and store it as an auxiliary file. + +This happens automatically but superusers can also manually trigger this NcML extraction process with the API endpoint below. + +Note that "true" will be returned if an NcML file was created. "false" will be returned if there was an error or if the NcML file already exists (check server.log for details). + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=24 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/extractNcml" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/extractNcml + +A curl example using a PID: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/AAA000 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/:persistentId/extractNcml?persistentId=$PERSISTENT_ID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/extractNcml?persistentId=doi:10.5072/FK2/AAA000" + Replacing Files ~~~~~~~~~~~~~~~ @@ -2366,48 +2518,6 @@ The fully expanded example above (without environment variables) looks like this Note: The ``id`` returned in the json response is the id of the file metadata version. - -Adding File Metadata -~~~~~~~~~~~~~~~~~~~~ - -This API call requires a ``jsonString`` expressing the metadata of multiple files. It adds file metadata to the database table where the file has already been copied to the storage. - -The jsonData object includes values for: - -* "description" - A description of the file -* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset -* "storageIdentifier" - String -* "fileName" - String -* "mimeType" - String -* "fixity/checksum" either: - - * "md5Hash" - String with MD5 hash value, or - * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings - -.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of ``export`` below. - -A curl example using an ``PERSISTENT_ID`` - -* ``SERVER_URL`` - e.g. https://demo.dataverse.org -* ``API_TOKEN`` - API endpoints require an API token that can be passed as the X-Dataverse-key HTTP header. For more details, see the :doc:`auth` section. -* ``PERSISTENT_IDENTIFIER`` - Example: ``doi:10.5072/FK2/7U7YBV`` - -.. code-block:: bash - - export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - export SERVER_URL=https://demo.dataverse.org - export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV - export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ - {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" - - curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" - -The fully expanded example above (without environment variables) looks like this: - -.. code-block:: bash - - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/:persistentId/addFiles?persistentId=doi:10.5072/FK2/7U7YBV -F jsonData='[{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}, {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' - Updating File Metadata ~~~~~~~~~~~~~~~~~~~~~~ @@ -3331,7 +3441,8 @@ The following optional fields are supported: - archiveDescription: What the name suggests. If not supplied, will default to "This Dataset is harvested from our partners. Clicking the link will take you directly to the archival source of the data." - set: The OAI set on the remote server. If not supplied, will default to none, i.e., "harvest everything". - style: Defaults to "default" - a generic OAI archive. (Make sure to use "dataverse" when configuring harvesting from another Dataverse installation). - +- customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. + Generally, the API will accept the output of the GET version of the API for an existing client as valid input, but some fields will be ignored. For example, as of writing this there is no way to configure a harvesting schedule via this API. An example JSON file would look like this:: @@ -3343,6 +3454,7 @@ An example JSON file would look like this:: "archiveUrl": "https://zenodo.org", "archiveDescription": "Moissonné depuis la collection LMOPS de l'entrepôt Zenodo. En cliquant sur ce jeu de données, vous serez redirigé vers Zenodo.", "metadataFormat": "oai_dc", + "customHeaders": "x-oai-api-key: xxxyyyzzz", "set": "user-lmops" } diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst new file mode 100644 index 00000000000..931c722f91b --- /dev/null +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -0,0 +1,354 @@ +Application Base Image +====================== + +.. contents:: |toctitle| + :local: + +A "base image" offers you a pre-installed and pre-tuned application server to deploy Dataverse software to. +Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks etc is all done +at this layer, to make the application image focus on the app itself. + +**NOTE: The base image does not contain the Dataverse application itself.** + +Within the main repository, you may find the base image's files at ``/modules/container-base``. +This Maven module uses the `Maven Docker Plugin `_ to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**NOTE: This image is created, maintained and supported by the Dataverse community on a best-effort basis.** +IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it. +You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based) +efforts. + +Supported Image Tags +++++++++++++++++++++ + +This image is sourced from the main upstream code `repository of the Dataverse software `_. +Development and maintenance of the `image's code `_ +happens there (again, by the community). Community-supported image tags are based on the two most important +upstream branches: + +- The ``unstable`` tag corresponds to the ``develop`` branch, where pull requests are merged. + (`Dockerfile `__) +- The ``stable`` tag corresponds to the ``master`` branch, where releases are cut from. + (`Dockerfile `__) + + + +Image Contents +++++++++++++++ + +The base image provides: + +- `Eclipse Temurin JRE using Java 11 `_ +- `Payara Community Application Server `_ +- CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide) +- Linux tools for analysis, monitoring and so on +- `Jattach `__ (attach to running JVM) +- `wait-for `__ (tool to "wait for" a service to be available) +- `dumb-init `__ (see :ref:`below ` for details) + +This image is created as a "multi-arch image", see :ref:`below `. + +It inherits (is built on) an Ubuntu environment from the upstream +`base image of Eclipse Temurin `_. +You are free to change the JRE/JDK image to your liking (see below). + + + +Build Instructions +++++++++++++++++++ + +Assuming you have `Docker `_, `Docker Desktop `_, +`Moby `_ or some remote Docker host configured, up and running from here on. + +Simply execute the Maven modules packaging target with activated "container profile. Either from the projects Git root: + +``mvn -Pct -f modules/container-base install`` + +Or move to the module and execute: + +``cd modules/container-base && mvn -Pct install`` + +Some additional notes, using Maven parameters to change the build and use ...: + +- | ... a different tag only: add ``-Dbase.image.tag=tag``. + | *Note:* default is ``develop`` +- | ... a different image name and tag: add ``-Dbase.image=name:tag``. + | *Note:* default is ``gdcc/base:${base.image.tag}`` +- ... a different image registry than Docker Hub: add ``-Ddocker.registry=registry.example.org`` (see also + `DMP docs on registries `__) +- ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``. +- | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...). + | *Note:* must resolve to an available image tag ``A-jre`` of Eclipse Temurin! + (See also `Docker Hub search example `_) +- ... a different Java Distribution: add ``-Djava.image="name:tag"`` with precise reference to an + image available local or remote. +- ... a different UID/GID for the ``payara`` user/group: add ``-Dbase.image.uid=1234`` (or ``.gid``) + +Automated Builds & Publishing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To make reusing most simple, the image is built with a Github Action within the IQSS repository and then pushed +to `Docker Hub gdcc/base repository `_. It is built and pushed on every edit to +its sources plus uncached scheduled nightly builds to make sure security updates are finding their way in. + +*Note:* For the Github Action to be able to push to Docker Hub, two repository secrets +(DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository. + +.. _base-multiarch: + +Processor Architecture and Multiarch +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: +AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2), by using Maven Docker Plugin's *BuildX* mode. + +Building the image via ``mvn -Pct package`` or ``mvn -Pct install`` as above will only build for the architecture of +the Docker maschine's CPU. + +Only ``mvn -Pct deploy`` will trigger building on all enabled architectures. +Yet, to enable building with non-native code on your build machine, you will need to setup a cross-platform builder. + +On Linux, you should install `qemu-user-static `__ (preferably via +your package management) on the host and run ``docker run --rm --privileged multiarch/qemu-user-static --reset -p yes`` +to enable that builder. The Docker plugin will setup everything else for you. + + + +Tunables +++++++++ + +The base image provides a Payara domain suited for production use, but can also be used during development. +Many settings have been carefully selected for best performance and stability of the Dataverse application. + +As with any service, you should always monitor any metrics and make use of the tuning capabilities the base image +provides. These are mostly based on environment variables (very common with containers) and provide sane defaults. + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 10 50 + :header-rows: 1 + + * - Env. variable + - Default + - Type + - Description + * - ``DEPLOY_PROPS`` + - (empty) + - String + - Set to add arguments to generated `asadmin deploy` commands. + * - ``PREBOOT_COMMANDS`` + - [preboot]_ + - Abs. path + - Provide path to file with ``asadmin`` commands to run **before** boot of application server. + See also `Pre/postboot script docs`_. + * - ``POSTBOOT_COMMANDS`` + - [postboot]_ + - Abs. path + - Provide path to file with ``asadmin`` commands to run **after** boot of application server. + See also `Pre/postboot script docs`_. + * - ``JVM_ARGS`` + - (empty) + - String + - Additional arguments to pass to application server's JVM on start. + * - ``MEM_MAX_RAM_PERCENTAGE`` + - ``70.0`` + - Percentage + - Maximum amount of container's allocated RAM to be used as heap space. + Make sure to leave some room for native memory, OS overhead etc! + * - ``MEM_XSS`` + - ``512k`` + - Size + - Tune the maximum JVM stack size. + * - ``MEM_MIN_HEAP_FREE_RATIO`` + - ``20`` + - Integer + - Make the heap shrink aggressively and grow conservatively. See also `run-java-sh recommendations`_. + * - ``MEM_MAX_HEAP_FREE_RATIO`` + - ``40`` + - Integer + - Make the heap shrink aggressively and grow conservatively. See also `run-java-sh recommendations`_. + * - ``MEM_MAX_GC_PAUSE_MILLIS`` + - ``500`` + - Milliseconds + - Shorter pause times might result in lots of collections causing overhead without much gain. + This needs monitoring and tuning. It's a complex matter. + * - ``MEM_METASPACE_SIZE`` + - ``256m`` + - Size + - Initial size of memory reserved for class metadata, also used as trigger to run a garbage collection + once passing this size. + * - ``MEM_MAX_METASPACE_SIZE`` + - ``2g`` + - Size + - The metaspace's size will not outgrow this limit. + * - ``ENABLE_DUMPS`` + - ``0`` + - Bool, ``0|1`` + - If enabled, the argument(s) given in ``JVM_DUMP_ARG`` will be added to the JVM starting up. + This means it will enable dumping the heap to ``${DUMPS_DIR}`` (see below) in "out of memory" cases. + (You should back this location with disk space / ramdisk, so it does not write into an overlay filesystem!) + * - ``JVM_DUMPS_ARG`` + - [dump-option]_ + - String + - Can be fine tuned for more grained controls of dumping behaviour. + * - ``ENABLE_JMX`` + - ``0`` + - Bool, ``0|1`` + - Allow insecure JMX connections, enable AMX and tune all JMX monitoring levels to ``HIGH``. + See also `Payara Docs - Basic Monitoring `_. + A basic JMX service is enabled by default in Payara, exposing basic JVM MBeans, but especially no Payara MBeans. + * - ``ENABLE_JDWP`` + - ``0`` + - Bool, ``0|1`` + - Enable the "Java Debug Wire Protocol" to attach a remote debugger to the JVM in this container. + Listens on port 9009 when enabled. Search the internet for numerous tutorials to use it. + * - ``ENABLE_RELOAD`` + - ``0`` + - Bool, ``0|1`` + - Enable the dynamic "hot" reloads of files when changed in a deployment. Useful for development, + when new artifacts are copied into the running domain. + * - ``DATAVERSE_HTTP_TIMEOUT`` + - ``900`` + - Seconds + - See :ref:`:ApplicationServerSettings` ``http.request-timeout-seconds``. + + *Note:* can also be set using any other `MicroProfile Config Sources`_ available via ``dataverse.http.timeout``. + + +.. [preboot] ``${CONFIG_DIR}/pre-boot-commands.asadmin`` +.. [postboot] ``${CONFIG_DIR}/post-boot-commands.asadmin`` +.. [dump-option] ``-XX:+HeapDumpOnOutOfMemoryError`` + + + +Locations ++++++++++ + +This environment variables represent certain locations and might be reused in your scripts etc. +All of these variables aren't meant to be reconfigurable and reflect state in the filesystem layout! + +**Writeable at build time:** + +The overlay filesystem of Docker and other container technologies is not meant to be used for any performance IO. +You should avoid *writing* data anywhere in the file tree at runtime, except for well known locations with mounted +volumes backing them (see below). + +The locations below are meant to be written to when you build a container image, either this base or anything +building upon it. You can also use these for references in scripts, etc. + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 50 + :header-rows: 1 + + * - Env. variable + - Value + - Description + * - ``HOME_DIR`` + - ``/opt/payara`` + - Home base to Payara and the application + * - ``PAYARA_DIR`` + - ``${HOME_DIR}/appserver`` + - Installation directory of Payara server + * - ``SCRIPT_DIR`` + - ``${HOME_DIR}/scripts`` + - Any scripts like the container entrypoint, init scripts, etc + * - ``CONFIG_DIR`` + - ``${HOME_DIR}/config`` + - Payara Server configurations like pre/postboot command files go here + (Might be reused for Dataverse one day) + * - ``DEPLOY_DIR`` + - ``${HOME_DIR}/deployments`` + - Any EAR or WAR file, exploded WAR directory etc are autodeployed on start + * - ``DOMAIN_DIR`` + - ``${PAYARA_DIR}/glassfish`` ``/domains/${DOMAIN_NAME}`` + - Path to root of the Payara domain applications will be deployed into. Usually ``${DOMAIN_NAME}`` will be ``domain1``. + + +**Writeable at runtime:** + +The locations below are defined as `Docker volumes `_ by the base image. +They will by default get backed by an "anonymous volume", but you can (and should) bind-mount a host directory or +named Docker volume in these places to avoid data loss, gain performance and/or use a network file system. + +**Notes:** +1. On Kubernetes you still need to provide volume definitions for these places in your deployment objects! +2. You should not write data into these locations at build time - it will be shadowed by the mounted volumes! + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 50 + :header-rows: 1 + + * - Env. variable + - Value + - Description + * - ``STORAGE_DIR`` + - ``/dv`` + - This place is writeable by the Payara user, making it usable as a place to store research data, customizations + or other. Images inheriting the base image should create distinct folders here, backed by different + mounted volumes. + * - ``SECRETS_DIR`` + - ``/secrets`` + - Mount secrets or other here, being picked up automatically by + `Directory Config Source `_. + See also various :doc:`../installation/config` options involving secrets. + * - ``DUMPS_DIR`` + - ``/dumps`` + - Default location where heap dumps will be stored (see above). + You should mount some storage here (disk or ephemeral). + + +Exposed Ports ++++++++++++++ + +The default ports that are exposed by this image are: + +- 8080 - HTTP listener +- 4848 - Admin Service HTTPS listener +- 8686 - JMX listener +- 9009 - "Java Debug Wire Protocol" port (when ``ENABLE_JDWP=1``) + +The HTTPS listener (on port 8181) becomes deactivated during the build, as we will always need to reverse-proxy the +application server and handle SSL/TLS termination at this point. Save the memory and some CPU cycles! + + + +.. _base-entrypoint: + +Entry & Extension Points +++++++++++++++++++++++++ + +The entrypoint shell script provided by this base image will by default ensure to: + +- Run any scripts named ``${SCRIPT_DIR}/init_*`` or in ``${SCRIPT_DIR}/init.d/*`` directory for initialization + **before** the application server starts. +- Run an executable script ``${SCRIPT_DIR}/startInBackground.sh`` in the background - if present. +- Run the application server startup scripting in foreground (``${SCRIPT_DIR}/startInForeground.sh``). + +If you need to create some scripting that runs in parallel under supervision of `dumb-init `_, +e.g. to wait for the application to deploy before executing something, this is your point of extension: simply provide +the ``${SCRIPT_DIR}/startInBackground.sh`` executable script with your application image. + + + +Other Hints ++++++++++++ + +By default, ``domain1`` is enabled to use the ``G1GC`` garbage collector. + +For running a Java application within a Linux based container, the support for CGroups is essential. It has been +included and activated by default since Java 8u192, Java 11 LTS and later. If you are interested in more details, +you can read about those in a few places like https://developers.redhat.com/articles/2022/04/19/java-17-whats-new-openjdks-container-awareness, +https://www.eclipse.org/openj9/docs/xxusecontainersupport, etc. The other memory defaults are inspired +from `run-java-sh recommendations`_. + + + +.. _Pre/postboot script docs: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Micro%20Documentation/Payara%20Micro%20Configuration%20and%20Management/Micro%20Management/Asadmin%20Commands/Pre%20and%20Post%20Boot%20Commands.html +.. _MicroProfile Config Sources: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html +.. _run-java-sh recommendations: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst new file mode 100644 index 00000000000..92ac94e2cf2 --- /dev/null +++ b/doc/sphinx-guides/source/container/index.rst @@ -0,0 +1,27 @@ +Container Guide +=============== + +Running the Dataverse software in containers is quite different than in a :doc:`standard installation <../installation/prep>`. + +Both approaches have pros and cons. These days, containers are very often used for development and testing, +but there is an ever rising move toward running applications in the cloud using container technology. + +**NOTE:** +**As the Institute for Quantitative Social Sciences (IQSS) at Harvard is running a standard, non-containerized installation, +container support described in this guide is mostly created and maintained by the Dataverse community on a best-effort +basis.** + +This guide is *not* about installation on technology like Docker Swarm, Kubernetes, Rancher or other +solutions to run containers in production. There is the `Dataverse on K8s project `_ for this +purpose, as mentioned in the :doc:`/developers/containers` section of the Developer Guide. + +This guide focuses on describing the container images managed from the main Dataverse repository (again: by the +community, not IQSS), their features and limitations. Instructions on how to build the images yourself and how to +develop and extend them further are provided. + +**Contents:** + +.. toctree:: + + base-image + diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 0782fd239a1..0a3dd23ed23 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -36,10 +36,20 @@ At present, one potential drawback for direct-upload is that files are only part ``./asadmin create-jvm-options "-Ddataverse.files..ingestsizelimit="`` +.. _cors-s3-bucket: -**IMPORTANT:** One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers is to allow cross site (CORS) requests on your S3 store. +Allow CORS for S3 Buckets +~~~~~~~~~~~~~~~~~~~~~~~~~ + +**IMPORTANT:** One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers and direct upload to work with dvwebloader (:ref:`folder-upload`) is to allow cross site (CORS) requests on your S3 store. The example below shows how to enable CORS rules (to support upload and download) on a bucket using the AWS CLI command line tool. Note that you may want to limit the AllowedOrigins and/or AllowedHeaders further. https://github.com/gdcc/dataverse-previewers/wiki/Using-Previewers-with-download-redirects-from-S3 has some additional information about doing this. +If you'd like to check the CORS configuration on your bucket before making changes: + +``aws s3api get-bucket-cors --bucket `` + +To proceed with making changes: + ``aws s3api put-bucket-cors --bucket --cors-configuration file://cors.json`` with the contents of the file cors.json as follows: diff --git a/doc/sphinx-guides/source/developers/containers.rst b/doc/sphinx-guides/source/developers/containers.rst index 64c7710f0f5..63eff266a4f 100755 --- a/doc/sphinx-guides/source/developers/containers.rst +++ b/doc/sphinx-guides/source/developers/containers.rst @@ -9,6 +9,8 @@ The Dataverse Community is exploring the use of Docker, Kubernetes, and other co The :doc:`testing` section mentions using Docker for integration tests. +See also the :doc:`/container/index`. + .. contents:: |toctitle| :local: diff --git a/doc/sphinx-guides/source/developers/documentation.rst b/doc/sphinx-guides/source/developers/documentation.rst index b20fd112533..c89ed6e3b75 100755 --- a/doc/sphinx-guides/source/developers/documentation.rst +++ b/doc/sphinx-guides/source/developers/documentation.rst @@ -22,6 +22,8 @@ That's it! Thank you for your contribution! Your pull request will be added manu Please see https://github.com/IQSS/dataverse/pull/5857 for an example of a quick fix that was merged (the "Files changed" tab shows how a typo was fixed). +Preview your documentation changes which will be built automatically as part of your pull request in Github. It will show up as a check entitled: `docs/readthedocs.org:dataverse-guide — Read the Docs build succeeded!`. For example, this PR built to https://dataverse-guide--9249.org.readthedocs.build/en/9249/. + If you would like to read more about the Dataverse Project's use of GitHub, please see the :doc:`version-control` section. For bug fixes and features we request that you create an issue before making a pull request but this is not at all necessary for quick fixes to the documentation. .. _admin: https://github.com/IQSS/dataverse/tree/develop/doc/sphinx-guides/source/admin diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index bf525422c84..6f93cf75d51 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -19,6 +19,7 @@ Developer Guide sql-upgrade-scripts testing documentation + security dependencies debugging coding-style diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index 55f5f550dd9..a2575bb5f50 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -95,6 +95,8 @@ At this point you can send around the draft release for any final feedback. Link Make corrections to the draft, if necessary. It will be out of sync with the .md file, but that's ok (`#7988 `_ is tracking this). +.. _run-build-create-war: + Run a Build to Create the War File ---------------------------------- @@ -110,6 +112,15 @@ Click "Save" then "Build Now". The build number will appear in ``/api/info/version`` (along with the commit mentioned above) from a running installation (e.g. ``{"version":"5.10.1","build":"907-b844672``). +Note that the build number comes from script in an early build step... + +.. code-block:: bash + + COMMIT_SHA1=`echo $GIT_COMMIT | cut -c-7` + echo "build.number=${BUILD_NUMBER}-${COMMIT_SHA1}" > $WORKSPACE/src/main/java/BuildNumber.properties + +... but we can explore alternative methods of specifying the build number, as described in :ref:`auto-custom-build-number`. + Build Installer (dvinstall.zip) ------------------------------- diff --git a/doc/sphinx-guides/source/developers/remote-users.rst b/doc/sphinx-guides/source/developers/remote-users.rst index a5e51aa5e54..21d36d28a75 100755 --- a/doc/sphinx-guides/source/developers/remote-users.rst +++ b/doc/sphinx-guides/source/developers/remote-users.rst @@ -1,6 +1,6 @@ -==================== -Shibboleth and OAuth -==================== +========================== +Shibboleth, OAuth and OIDC +========================== .. contents:: |toctitle| :local: @@ -30,4 +30,36 @@ Now when you go to http://localhost:8080/oauth2/firstLogin.xhtml you should be p ---- +OpenID Connect (OIDC) +--------------------- + +If you are working on the OpenID Connect (OIDC) user authentication flow, you do not need to connect to a remote provider (as explained in :doc:`/installation/oidc`) to test this feature. Instead, you can use the available configuration that allows you to run a test Keycloak OIDC identity management service locally through a Docker container. + +(Please note! The client secret (``ss6gE8mODCDfqesQaSG3gwUwZqZt547E``) is hard-coded in ``oidc-realm.json`` and ``oidc-keycloak-auth-provider.json``. Do not use this config in production! This is only for developers.) + +You can find this configuration in ``conf/keycloak``. There are two options available in this directory to run a Keycloak container: bash script or docker-compose. + +To run the container via bash script, execute the following command (positioned in ``conf/keycloak``): + +``./run-keycloak.sh`` + +The script will create a Keycloak container or restart it if the container was already created and stopped. Once the script is executed, Keycloak should be accessible from http://localhost:8090/ + +Now load the configuration defined in ``oidc-keycloak-auth-provider.json`` into your Dataverse installation to enable Keycloak as an authentication provider. + +``curl -X POST -H 'Content-type: application/json' --upload-file oidc-keycloak-auth-provider.json http://localhost:8080/api/admin/authenticationProviders`` + +You should see the new provider, called "OIDC-Keycloak", under "Other options" on the Log In page. + +You should be able to log into Keycloak with the following credentials: + +- username: kcuser +- password: kcpassword + +In case you want to stop and remove the Keycloak container, just run the other available bash script: + +``./rm-keycloak.sh`` + +---- + Previous: :doc:`unf/index` | Next: :doc:`geospatial` diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst index 3dc73ce6a0c..4d323455d28 100644 --- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst +++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst @@ -122,7 +122,7 @@ To add multiple Uploaded Files to the Dataset --------------------------------------------- Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: +jsonData for this call is an array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: * "description" - A description of the file * "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset @@ -154,7 +154,7 @@ Replacing an existing file in the Dataset ----------------------------------------- Once the file exists in the s3 bucket, a final API call is needed to register it as a replacement of an existing file. This call is the same call used to replace a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must also include values for: +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must include values for: * "storageIdentifier" - String, as specified in prior calls * "fileName" - String @@ -172,9 +172,107 @@ Note that the API call does not validate that the file matches the hash value su export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org export FILE_IDENTIFIER=5072 - export JSON_DATA="{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'forceReplace':'true', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}" + export JSON_DATA='{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "forceReplace":"true", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}' curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F "jsonData=$JSON_DATA" Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + +Replacing multiple existing files in the Dataset +------------------------------------------------ + +Once the replacement files exist in the s3 bucket, a final API call is needed to register them as replacements for existing files. In this API call, additional metadata is added using the "jsonData" parameter. +jsonData for this call is array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must include some additional values: + +* "fileToReplaceId" - the id of the file being replaced +* "forceReplace" - whether to replace a file with one of a different mimetype (optional, default is false) +* "description" - A description of the file +* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset +* "storageIdentifier" - String +* "fileName" - String +* "mimeType" - String +* "fixity/checksum" either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export JSON_DATA='[{"fileToReplaceId": 10, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}},{"fileToReplaceId": 11, "forceReplace": true, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +The JSON object returned as a response from this API call includes a "data" that indicates how many of the file replacements succeeded and provides per-file error messages for those that don't, e.g. + +.. code-block:: + + { + "status": "OK", + "data": { + "Files": [ + { + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", + "errorMessage": "Bad Request:The file to replace does not belong to this dataset.", + "fileDetails": { + "fileToReplaceId": 10, + "description": "My description.", + "directoryLabel": "data/subdir1", + "categories": [ + "Data" + ], + "restrict": "false", + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", + "fileName": "file1.Bin", + "mimeType": "application/octet-stream", + "checksum": { + "@type": "SHA-1", + "@value": "123456" + } + } + }, + { + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", + "successMessage": "Replaced successfully in the dataset", + "fileDetails": { + "description": "My description.", + "label": "file2.txt", + "restricted": false, + "directoryLabel": "data/subdir1", + "categories": [ + "Data" + ], + "dataFile": { + "persistentId": "", + "pidURL": "", + "filename": "file2.txt", + "contentType": "text/plain", + "filesize": 2407, + "description": "My description.", + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", + "rootDataFileId": 11, + "previousDataFileId": 11, + "checksum": { + "type": "SHA-1", + "value": "123789" + } + } + } + } + ], + "Result": { + "Total number of files": 2, + "Number of files successfully replaced": 1 + } + } + } + + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. diff --git a/doc/sphinx-guides/source/developers/security.rst b/doc/sphinx-guides/source/developers/security.rst new file mode 100755 index 00000000000..09b80a4c840 --- /dev/null +++ b/doc/sphinx-guides/source/developers/security.rst @@ -0,0 +1,34 @@ +======== +Security +======== + +This section describes security practices and procedures for the Dataverse team. + +.. contents:: |toctitle| + :local: + +Intake of Security Issues +------------------------- + +As described under :ref:`reporting-security-issues`, we encourage the community to email security@dataverse.org if they have any security concerns. These emails go into our private ticket tracker (RT_). + +.. _RT: https://help.hmdc.harvard.edu + +We use a private GitHub issue tracker at https://github.com/IQSS/dataverse-security/issues for security issues. + +Sending Security Notices +------------------------ + +When drafting the security notice, it might be helpful to look at `previous examples`_. + +.. _previous examples: https://drive.google.com/drive/folders/0B_qMYwdHFZghaDZIU2hWQnBDZVE?resourcekey=0-SYjuhCohAIM7_pmysVc3Xg&usp=sharing + +Gather email addresses from the following sources (these are also described under :ref:`ongoing-security` in the Installation Guide): + +- "contact_email" in the `public installation spreadsheet`_ +- "Other Security Contacts" in the `private installation spreadsheet`_ + +Once you have the emails, include them as bcc. + +.. _public installation spreadsheet: https://docs.google.com/spreadsheets/d/1bfsw7gnHlHerLXuk7YprUT68liHfcaMxs1rFciA-mEo/edit#gid=0 +.. _private installation spreadsheet: https://docs.google.com/spreadsheets/d/1EWDwsj6eptQ7nEr-loLvdU7I6Tm2ljAplfNSVWR42i0/edit?usp=sharing diff --git a/doc/sphinx-guides/source/developers/tips.rst b/doc/sphinx-guides/source/developers/tips.rst index 3fff3e76ea8..bf75a05f84e 100755 --- a/doc/sphinx-guides/source/developers/tips.rst +++ b/doc/sphinx-guides/source/developers/tips.rst @@ -58,6 +58,8 @@ From the root of the git repo, run the following command to set the build number This should update or place a file at ``src/main/java/BuildNumber.properties``. +(See also :ref:`auto-custom-build-number` for other ways of changing the build number.) + Then, from Netbeans, click "Run" and then "Clean and Build Project (dataverse)". After this completes successfully, click "Run" and then "Run Project (dataverse)" Confirm the Change Was Deployed @@ -164,6 +166,8 @@ Git on Mac On a Mac, you won't have git installed unless you have "Command Line Developer Tools" installed but running ``git clone`` for the first time will prompt you to install them. +.. _auto-custom-build-number: + Automation of Custom Build Number on Webpage ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -173,6 +177,15 @@ commit id in your test deployment webpages on the bottom right corner next to th When you prefer manual updates, there is another script, see above: :ref:`custom_build_num_script`. +An alternative to that is using *MicroProfile Config* and set the option ``dataverse.build`` via a system property, +environment variable (``DATAVERSE_BUILD``) or `one of the other config sources +`__. + +You could even override the version itself with the option ``dataverse.version`` in the same way, which is usually +picked up from a build time source. + +See also discussion of version numbers in :ref:`run-build-create-war`. + Sample Data ----------- diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst index 148518d2ce5..f6eda53d718 100755 --- a/doc/sphinx-guides/source/index.rst +++ b/doc/sphinx-guides/source/index.rst @@ -6,7 +6,7 @@ Dataverse Documentation v. |version| ==================================== -These documentation guides are for the |version| version of Dataverse. To find guides belonging to previous versions, :ref:`guides_versions` has a list of all available versions. +These documentation guides are for the |version| version of Dataverse. To find guides belonging to previous or future versions, :ref:`guides_versions` has a list of all available versions. .. toctree:: :glob: @@ -18,6 +18,7 @@ These documentation guides are for the |version| version of Dataverse. To find g api/index installation/index developers/index + container/index style/index How the Guides Are Organized @@ -25,11 +26,13 @@ How the Guides Are Organized The guides are documentation that explain how to use Dataverse, which are divided into the following sections: User Guide, -Installation Guide, Developer Guide, API Guide and Style Guide. The User Guide is further divided into primary activities: finding & using +Installation Guide, Developer Guide, API Guide, Style Guide and Container Guide. +The User Guide is further divided into primary activities: finding & using data, adding Datasets, administering dataverses or Datasets, and Dataset exploration/visualizations. Details on all of the above tasks can be found in the Users Guide. The Installation Guide is for people or organizations who want to host their -own Dataverse. The Developer Guide contains instructions for +own Dataverse. The Container Guide gives information on how to deploy Dataverse with containers. +The Developer Guide contains instructions for people who want to contribute to the Open Source Dataverse project or who want to modify the code to suit their own needs. Finally, the API Guide is for Developers that work on other applications and are interested in connecting with Dataverse through our APIs. @@ -67,7 +70,7 @@ The support email address is `support@dataverse.org `__ or use `GitHub pull requests `__, if you have some code, scripts or documentation that you'd like to share. -If you have a **security issue** to report, please email `security@dataverse.org `__. +If you have a **security issue** to report, please email `security@dataverse.org `__. See also :ref:`reporting-security-issues`. Indices and Tables diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 2c576b03989..ee89b718777 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -101,6 +101,31 @@ Password complexity rules for "builtin" accounts can be adjusted with a variety - :ref:`:PVGoodStrength` - :ref:`:PVCustomPasswordResetAlertMessage` +.. _ongoing-security: + +Ongoing Security of Your Installation ++++++++++++++++++++++++++++++++++++++ + +Like any application, you should keep up-to-date with patches to both the Dataverse software and the platform (usually Linux) it runs on. Dataverse releases are announced on the dataverse-community_ mailing list, the Dataverse blog_, and in chat.dataverse.org_. + +.. _dataverse-community: https://groups.google.com/g/dataverse-community +.. _blog: https://dataverse.org/blog +.. _chat.dataverse.org: https://chat.dataverse.org + +In addition to these public channels, you can subscribe to receive security notices via email from the Dataverse team. These notices are sent to the ``contact_email`` in the installation spreadsheet_ and you can open an issue in the dataverse-installations_ repo to add or change the contact email. Security notices are also sent to people and organizations that prefer to remain anonymous. To be added to this private list, please email support@dataverse.org. + +.. _spreadsheet: https://docs.google.com/spreadsheets/d/1bfsw7gnHlHerLXuk7YprUT68liHfcaMxs1rFciA-mEo/edit#gid=0 +.. _dataverse-installations: https://github.com/IQSS/dataverse-installations + +For additional details about security practices by the Dataverse team, see the :doc:`/developers/security` section of the Developer Guide. + +.. _reporting-security-issues: + +Reporting Security Issues ++++++++++++++++++++++++++ + +If you have a security issue to report, please email it to security@dataverse.org. + .. _network-ports: Network Ports @@ -238,6 +263,153 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. +.. _database-persistence: + +Database Persistence +-------------------- + +The Dataverse software uses a PostgreSQL database to store objects users create. +You can configure basic and advanced settings for the PostgreSQL database connection with the help of +MicroProfile Config API. + +Basic Database Settings ++++++++++++++++++++++++ + +1. Any of these settings can be set via system properties (see :ref:`jvm-options` starting at :ref:`dataverse.db.name`), environment variables or other + MicroProfile Config mechanisms supported by the app server. + `See Payara docs for supported sources `_. +2. Remember to protect your secrets. For passwords, use an environment variable (bare minimum), a password alias named the same + as the key (OK) or use the "dir config source" of Payara (best). + + Alias creation example: + + .. code-block:: shell + + echo "AS_ADMIN_ALIASPASSWORD=changeme" > /tmp/p.txt + asadmin create-password-alias --passwordfile /tmp/p.txt dataverse.db.password + rm /tmp/p.txt + +3. Environment variables follow the key, replacing any dot, colon, dash, etc. into an underscore "_" and all uppercase + letters. Example: ``dataverse.db.host`` -> ``DATAVERSE_DB_HOST`` + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.host + - The PostgreSQL server to connect to. + - ``localhost`` + * - dataverse.db.port + - The PostgreSQL server port to connect to. + - ``5432`` + * - dataverse.db.user + - The PostgreSQL user name to connect with. + - | ``dataverse`` + | (installer sets to ``dvnapp``) + * - dataverse.db.password + - The PostgreSQL users password to connect with. + + **Please note the safety advisory above.** + - *No default* + * - dataverse.db.name + - The PostgreSQL database name to use for the Dataverse installation. + - | ``dataverse`` + | (installer sets to ``dvndb``) + * - dataverse.db.parameters + - Connection parameters, such as ``sslmode=require``. See `Postgres JDBC docs `_ + Note: you don't need to provide the initial "?". + - *Empty string* + +Advanced Database Settings +++++++++++++++++++++++++++ + +The following options are useful in many scenarios. You might be interested in debug output during development or +monitoring performance in production. + +You can find more details within the Payara docs: + +- `User Guide: Connection Pool Configuration `_ +- `Tech Doc: Advanced Connection Pool Configuration `_. + +Connection Validation +^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.is-connection-validation-required + - ``true``: Validate connections, allow server to reconnect in case of failure. + - false + * - dataverse.db.connection-validation-method + - | The method of connection validation: + | ``table|autocommit|meta-data|custom-validation``. + - *Empty string* + * - dataverse.db.validation-table-name + - The name of the table used for validation if the validation method is set to ``table``. + - *Empty string* + * - dataverse.db.validation-classname + - The name of the custom class used for validation if the ``validation-method`` is set to ``custom-validation``. + - *Empty string* + * - dataverse.db.validate-atmost-once-period-in-seconds + - Specifies the time interval in seconds between successive requests to validate a connection at most once. + - ``0`` (disabled) + +Connection & Statement Leaks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.connection-leak-timeout-in-seconds + - Specify timeout when connections count as "leaked". + - ``0`` (disabled) + * - dataverse.db.connection-leak-reclaim + - If enabled, leaked connection will be reclaimed by the pool after connection leak timeout occurs. + - ``false`` + * - dataverse.db.statement-leak-timeout-in-seconds + - Specifiy timeout when statements should be considered to be "leaked". + - ``0`` (disabled) + * - dataverse.db.statement-leak-reclaim + - If enabled, leaked statement will be reclaimed by the pool after statement leak timeout occurs. + - ``false`` + +Logging & Slow Performance +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.statement-timeout-in-seconds + - Timeout property of a connection to enable termination of abnormally long running queries. + - ``-1`` (disabled) + * - dataverse.db.slow-query-threshold-in-seconds + - SQL queries that exceed this time in seconds will be logged. + - ``-1`` (disabled) + * - dataverse.db.log-jdbc-calls + - When set to true, all JDBC calls will be logged allowing tracing of all JDBC interactions including SQL. + - ``false`` + + + .. _file-storage: File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores @@ -263,7 +435,9 @@ To support multiple stores, a Dataverse installation now requires an id, type, a Out of the box, a Dataverse installation is configured to use local file storage in the 'file' store by default. You can add additional stores and, as a superuser, configure specific Dataverse collections to use them (by editing the 'General Information' for the Dataverse collection as described in the :doc:`/admin/dataverses-datasets` section). -Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored (in the /temp subdir of that directory), independent of the location of any 'file' store defined above. +Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored +(in the /temp subdir of that directory), independent of the location of any 'file' store defined above. +(See also the option reference: :ref:`dataverse.files.directory`) If you wish to change which store is used by default, you'll need to delete the existing default storage driver and set a new one using jvm options. @@ -274,6 +448,8 @@ If you wish to change which store is used by default, you'll need to delete the It is also possible to set maximum file upload size limits per store. See the :ref:`:MaxFileUploadSizeInBytes` setting below. +.. _storage-files-dir: + File Storage ++++++++++++ @@ -1435,39 +1611,83 @@ It's also possible to change these values by stopping Payara, editing ``payara5/ dataverse.fqdn ++++++++++++++ -If the Dataverse installation has multiple DNS names, this option specifies the one to be used as the "official" hostname. For example, you may want to have ``dataverse.example.edu``, and not the less appealing ``server-123.example.edu`` to appear exclusively in all the registered global identifiers, etc. +If the Dataverse installation has multiple DNS names, this option specifies the one to be used as the "official" +hostname. For example, you may want to have ``dataverse.example.edu``, and not the less appealing +``server-123.example.edu`` to appear exclusively in all the registered global identifiers, etc. -The password reset feature requires ``dataverse.fqdn`` to be configured. +- Email confirmation links +- Password reset links +- Generating a Private URL +- PID minting +- Exporting to Schema.org format (and showing JSON-LD in HTML's tag) +- Exporting to DDI format +- Which Dataverse installation an "external tool" should return to +- URLs embedded in SWORD API responses +- ... -Configuring ``dataverse.fqdn`` is not enough. Read on for the importance of also setting ``dataverse.siteUrl``. +Usually it will follow the pattern ``https:///``. +*Only* the FQDN part of your Dataverse installation URL can be determined by setting ``dataverse.fqdn``. + +**Notes:** + +- The URL will default to using ``https://`` and no additional port information. If that does not suit your setup, you + can define an additional option, ``dataverse.siteUrl``, :ref:`explained below `, which always + takes precedence. +- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FQDN``. + Defaults to ``localhost`` when used with ``mp.config.profile=ct`` .. _dataverse.siteUrl: dataverse.siteUrl +++++++++++++++++ -``dataverse.siteUrl`` is used to configure the URL for your Dataverse installation that you plan to advertise to your users. As explained in the :ref:`installation ` docs, this setting is critical for the correct operation of your installation. - -For example, your site URL could be https://dataverse.example.edu +``dataverse.siteUrl`` is used to configure the URL for your Dataverse installation that you plan to advertise to your +users. As explained in the :ref:`installation ` docs, this setting is critical for the correct +operation of your installation. For example, your site URL could be https://dataverse.example.edu . That is, even though +the server might also be available at uglier URLs such as https://server-123.example.edu, the site URL is the +"official" URL. -That is, even though the server might also be available at uglier URLs such as https://server-123.example.edu the site URL is the "official" URL. +That said, some environments may require using a different URL pattern to access your installation. You might need to +use HTTP without "S", a non-standard port and so on. This is especially useful in development or testing environments. -The ``dataverse.siteUrl`` JVM option can be configured by following the procedure under :ref:`jvm-options` or by editing ``domain.xml`` directly. You can specify the protocol, host, and port number. Your ``domain.xml`` file could look like this, for example: +You can provide any custom tailored site URL via ``dataverse.siteUrl``, which always takes precedence. +Example: ``dataverse.siteUrl=http://localhost:8080`` -``-Ddataverse.siteUrl=https://dataverse.example.edu`` +If you wish to change your site URL by changing the domain configuration, you should edit your ``domain.xml`` directly +to avoid problems with colons in commands. Find a line similar to +``-Ddataverse.siteUrl=https://dataverse.example.edu`` and change it. You can specify the +protocol, host, and port number and should not include a trailing slash. -Note that it's also possible to use the ``dataverse.fqdn`` as a variable, if you wish. Here's an example of this as well as a custom port (which is usually not necessary): +**Notes:** -``-Ddataverse.siteUrl=https://${dataverse.fqdn}:444`` - -We are absolutely aware that it's confusing to have both ``dataverse.fqdn`` and ``dataverse.siteUrl``. https://github.com/IQSS/dataverse/issues/6636 is about resolving this confusion. +- This setting may be used in combination with variable replacement, referencing :ref:`dataverse.fqdn` with + ``./asadmin create-jvm-options "\-Ddataverse.siteUrl=http\://\${dataverse.fqdn}\:8080"`` +- Remember to restart Payara after editing ``domain.xml``. +- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SITEURL``. + Defaults to ``http://${dataverse.fqdn}:8080`` when used with ``mp.config.profile=ct`` +- We are absolutely aware that it's confusing to have both ``dataverse.fqdn`` and ``dataverse.siteUrl``. + https://github.com/IQSS/dataverse/issues/6636 is about resolving this confusion. .. _dataverse.files.directory: dataverse.files.directory +++++++++++++++++++++++++ -This is how you configure the path Dataverse uses for temporary files. (File store specific dataverse.files.\.directory options set the permanent data storage locations.) +Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number +of purposes: + +1. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before + shipping to the final storage destination. +2. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer + to final storage location and/or ingest. +3. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. + This location is deprecated and might be refactored into a distinct setting in the future. +4. The experimental DCM feature for :doc:`../developers/big-data-support` is able to trigger imports for externally + uploaded files in a directory tree at ``//`` + under certain conditions. This directory may also be used by file stores for :ref:`permanent file storage `, + but this is controlled by other, store-specific settings. + +Defaults to ``/tmp/dataverse``. Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_DIRECTORY``. .. _dataverse.files.uploads: @@ -1488,6 +1708,8 @@ dataverse.auth.password-reset-timeout-in-minutes Users have 60 minutes to change their passwords by default. You can adjust this value here. +.. _dataverse.db.name: + dataverse.db.name +++++++++++++++++ @@ -1497,6 +1719,8 @@ Defaults to ``dataverse`` (but the installer sets it to ``dvndb``). Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_NAME``. +See also :ref:`database-persistence`. + dataverse.db.user +++++++++++++++++ @@ -1539,30 +1763,118 @@ Defaults to ``5432``, the default PostgreSQL port. Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_PORT``. +.. _dataverse.solr.host: + +dataverse.solr.host ++++++++++++++++++++ + +The hostname of a Solr server to connect to. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``localhost``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_HOST``. +Defaults to ``solr``, when used with ``mp.config.profile=ct`` (:ref:`see below <:ApplicationServerSettings>`). + +dataverse.solr.port ++++++++++++++++++++ + +The Solr server port to connect to. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``8983``, the default Solr port. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PORT``. + +dataverse.solr.core ++++++++++++++++++++ + +The name of the Solr core to use for this Dataverse installation. Might be used to switch to a different core quickly. +Remember to restart / redeploy Dataverse after changing the setting (as with :ref:`:SolrHostColonPort`). + +Defaults to ``collection1``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_CORE``. + +dataverse.solr.protocol ++++++++++++++++++++++++ + +The Solr server URL protocol for the connection. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``http``, but might be set to ``https`` for extra secure Solr installations. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PROTOCOL``. + +dataverse.solr.path ++++++++++++++++++++ + +The path part of the Solr endpoint URL (e.g. ``/solr/collection1`` of ``http://localhost:8389/solr/collection1``). +Might be used to target a Solr API at non-default places. Remember to restart / redeploy Dataverse after changing the +setting (as with :ref:`:SolrHostColonPort`). + +Defaults to ``/solr/${dataverse.solr.core}``, interpolating the core name when used. Make sure to include the variable +when using it to configure your core name! + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PATH``. + dataverse.rserve.host +++++++++++++++++++++ -Host name for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Host name for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``localhost``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_HOST``. dataverse.rserve.port +++++++++++++++++++++ -Port number for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Port number for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``6311`` when not configured or no valid integer. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_PORT``. dataverse.rserve.user +++++++++++++++++++++ -Username for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Username for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``rserve``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_USER``. dataverse.rserve.password +++++++++++++++++++++++++ -Password for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Password for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``rserve``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_PASSWORD``. dataverse.rserve.tempdir ++++++++++++++++++++++++ -Temporary directory used by Rserve (defaults to /tmp/Rserv). Note that this location is local to the host on which Rserv is running (specified in ``dataverse.rserve.host`` above). When talking to Rserve, Dataverse needs to know this location in order to generate absolute path names of the files on the other end. +Temporary directory used by Rserve (defaults to ``/tmp/Rserv``). Note that this +location is local to the host on which Rserv is running (specified in +``dataverse.rserve.host`` above). When talking to Rserve, Dataverse needs to +know this location in order to generate absolute path names of the files on the +other end. + +Defaults to ``/tmp/Rserv``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_TEMPDIR``. .. _dataverse.dropbox.key: @@ -1704,8 +2016,6 @@ By default, download URLs to files will be included in Schema.org JSON-LD output ``./asadmin create-jvm-options '-Ddataverse.files.hide-schema-dot-org-download-urls=true'`` -Please note that there are other reasons why download URLs may not be included for certain files such as if a guestbook entry is required or if the file is restricted. - For more on Schema.org JSON-LD, see the :doc:`/admin/metadataexport` section of the Admin Guide. .. _useripaddresssourceheader: @@ -1735,6 +2045,27 @@ This setting is useful in cases such as running your Dataverse installation behi "HTTP_FORWARDED", "HTTP_VIA", "REMOTE_ADDR" + +.. _dataverse.personOrOrg.assumeCommaInPersonName: + +dataverse.personOrOrg.assumeCommaInPersonName ++++++++++++++++++++++++++++++++++++++++++++++ + +Please note that this setting is experimental. + +The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. If you are sure that +users are following the guidance to add people in the recommended family name, given name order, with a comma, you can set this true to always assume entries without a comma are for Organizations. The default is false. + +.. _dataverse.personOrOrg.orgPhraseArray: + +dataverse.personOrOrg.orgPhraseArray +++++++++++++++++++++++++++++++++++++ + +Please note that this setting is experimental. + +The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. +If you have examples where an orgization name is being inferred to belong to a person, you can use this setting to force it to be recognized as an organization. +The value is expected to be a JsonArray of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. .. _dataverse.api.signature-secret: @@ -1783,6 +2114,21 @@ To facilitate large file upload and download, the Dataverse Software installer b and restart Payara to apply your change. +mp.config.profile ++++++++++++++++++ + +MicroProfile Config 2.0 defines the `concept of "profiles" `_. +They can be used to change configuration values by context. This is used in Dataverse to change some configuration +defaults when used inside container context rather classic installations. + +As per the spec, you will need to set the configuration value ``mp.config.profile`` to ``ct`` as early as possible. +This is best done with a system property: + +``./asadmin create-system-properties 'mp.config.profile=ct'`` + +You might also create your own profiles and use these, please refer to the upstream documentation linked above. + + .. _database-settings: Database Settings @@ -2270,6 +2616,8 @@ Limit the number of files in a zip that your Dataverse installation will accept. ``curl -X PUT -d 2048 http://localhost:8080/api/admin/settings/:ZipUploadFilesLimit`` +.. _:SolrHostColonPort: + :SolrHostColonPort ++++++++++++++++++ @@ -2277,6 +2625,8 @@ By default your Dataverse installation will attempt to connect to Solr on port 8 ``curl -X PUT -d localhost:8983 http://localhost:8080/api/admin/settings/:SolrHostColonPort`` +**Note:** instead of using a database setting, you could alternatively use JVM settings like :ref:`dataverse.solr.host`. + :SolrFullTextIndexing +++++++++++++++++++++ @@ -2666,6 +3016,7 @@ The URL for your Repository Storage Abstraction Layer (RSAL) installation. This This setting controls which upload methods are available to users of your Dataverse installation. The following upload methods are available: - ``native/http``: Corresponds to "Upload with HTTP via your browser" and APIs that use HTTP (SWORD and native). +- ``dvwebloader``: Corresponds to :ref:`folder-upload`. Note that ``dataverse.files..upload-redirect`` must be set to "true" on an S3 store for this method to show up in the UI. In addition, :ref:`:WebloaderUrl` must be set. CORS allowed on the S3 bucket. See :ref:`cors-s3-bucket`. - ``dcm/rsync+ssh``: Corresponds to "Upload with rsync+ssh via Data Capture Module (DCM)". A lot of setup is required, as explained in the :doc:`/developers/big-data-support` section of the Developer Guide. Out of the box only ``native/http`` is enabled and will work without further configuration. To add multiple upload method, separate them using a comma like this: @@ -3071,7 +3422,7 @@ For example: ``curl -X PUT -d "This content needs to go through an additional review by the Curation Team before it can be published." http://localhost:8080/api/admin/settings/:DatasetMetadataValidationFailureMsg`` - + :ExternalValidationAdminOverride ++++++++++++++++++++++++++++++++ @@ -3167,6 +3518,11 @@ The interval in seconds between Dataverse calls to Globus to check on upload pro A true/false option to add a Globus transfer option to the file download menu which is not yet fully supported in the dataverse-globus app. See :ref:`globus-support` for details. +.. _:WebloaderUrl: + +:WebloaderUrl ++++++++++++++ +The URL for main HTML file in https://github.com/gdcc/dvwebloader when that app is deployed. See also :ref:`:UploadMethods` for another required settings. .. _supported MicroProfile Config API source: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index b05459b6aaf..7d60054ae17 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -26,8 +26,8 @@ Detailed below are what metadata schemas we support for Citation and Domain Spec - `Geospatial Metadata `__ (`see .tsv version `__): compliant with DDI Lite, DDI 2.5 Codebook, DataCite, and Dublin Core. Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. - `Social Science & Humanities Metadata `__ (`see .tsv version `__): compliant with DDI Lite, DDI 2.5 Codebook, and Dublin Core. - `Astronomy and Astrophysics Metadata `__ (`see .tsv version `__): These metadata elements can be mapped/exported to the International Virtual Observatory Alliance’s (IVOA) - `VOResource Schema format `__ and is based on - `Virtual Observatory (VO) Discovery and Provenance Metadata `__. + `VOResource Schema format `__ and is based on + `Virtual Observatory (VO) Discovery and Provenance Metadata `__ (`see .tsv version `__). - `Life Sciences Metadata `__ (`see .tsv version `__): based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__. - `Journal Metadata `__ (`see .tsv version `__): based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__. @@ -36,8 +36,12 @@ Experimental Metadata Unlike supported metadata, experimental metadata is not enabled by default in a new Dataverse installation. Feedback via any `channel `_ is welcome! +- `CodeMeta Software Metadata `__: based on the `CodeMeta Software Metadata Schema, version 2.0 `__ (`see .tsv version `__) - `Computational Workflow Metadata `__ (`see .tsv version `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. +Please note: these custom metadata schemas are not included in the Solr schema for indexing by default, you will need +to add them as necessary for your custom metadata blocks. See "Update the Solr Schema" in :doc:`../admin/metadatacustomization`. + See Also ~~~~~~~~ diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index ec3bb392ce5..31dd7f9cf78 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -93,6 +93,13 @@ Dropbox Upload Some Dataverse installations support the ability to upload files directly from Dropbox. To do so, click the "Upload from Dropbox" button, log in to Dropbox in the pop-up window, and select the files you'd like to transfer over. +.. _folder-upload: + +Folder Upload +------------- + +Some Dataverse installations support the ability to upload files from a local folder and subfolders. To do this, click the "Upload from Folder" button, select the folder you wish to upload, select/unselect specific files, and click "Start Uploads". More detailed instructions are available in the `DVWebloader wiki `_. + .. _rsync_upload: rsync + SSH Upload @@ -177,11 +184,32 @@ File Handling Certain file types in the Dataverse installation are supported by additional functionality, which can include downloading in different formats, previews, file-level metadata preservation, file-level data citation; and exploration through data visualization and analysis. See the sections below for information about special functionality for specific file types. +.. _file-previews: + File Previews ------------- Dataverse installations can add previewers for common file types uploaded by their research communities. The previews appear on the file page. If a preview tool for a specific file type is available, the preview will be created and will display automatically, after terms have been agreed to or a guestbook entry has been made, if necessary. File previews are not available for restricted files unless they are being accessed using a Private URL. See also :ref:`privateurl`. +Previewers are available for the following file types: + +- Text +- PDF +- Tabular (CSV, Excel, etc., see :doc:`tabulardataingest/index`) +- Code (R, etc.) +- Images (PNG, GIF, JPG) +- Audio (MP3, MPEG, WAV, OGG, M4A) +- Video (MP4, OGG, Quicktime) +- Zip (preview and extract/download) +- HTML +- GeoJSON +- NetCDF/HDF5 (NcML format) +- Hypothes.is + +Additional file types will be added to the `dataverse-previewers `_ repo before they are listed above so please check there for the latest information or to request (or contribute!) an additional file previewer. + +Installation of previewers is explained in the :doc:`/admin/external-tools` section of in the Admin Guide. + Tabular Data Files ------------------ @@ -268,7 +296,7 @@ After you :ref:`upload your files `, you can apply a "Workf |cw-image4| How to Describe Your Computational Workflow -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The Dataverse installation you are using may have enabled Computational Workflow metadata fields for your use. If so, when :ref:`editing your dataset metadata `, you will see the fields described below. @@ -299,6 +327,22 @@ Astronomy (FITS) Metadata found in the header section of `Flexible Image Transport System (FITS) files `_ are automatically extracted by the Dataverse Software, aggregated and displayed in the Astronomy Domain-Specific Metadata of the Dataset that the file belongs to. This FITS file metadata, is therefore searchable and browsable (facets) at the Dataset-level. +.. _geojson: + +GeoJSON +------- + +A map will be shown as a preview of GeoJSON files when the previewer has been enabled (see :ref:`file-previews`). See also a `video demo `_ of the GeoJSON previewer by its author, Kaitlin Newson. + +.. _netcdf-and-hdf5: + +NetCDF and HDF5 +--------------- + +For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML_ (XML) format and save it as an auxiliary file. (See also :doc:`/developers/aux-file-support` in the Developer Guide.) A previewer for these NcML files is available (see :ref:`file-previews`). + +.. _NcML: https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_overview.html + Compressed Files ---------------- diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst index e0a344de9a1..138e7516ae1 100755 --- a/doc/sphinx-guides/source/versions.rst +++ b/doc/sphinx-guides/source/versions.rst @@ -4,8 +4,9 @@ Dataverse Software Documentation Versions ========================================= -This list provides a way to refer to the documentation for previous versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. +This list provides a way to refer to the documentation for previous and future versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. +- `develop Git branch `__ - 5.12.1 - `5.12 `__ - `5.11.1 `__ diff --git a/modules/container-base/.gitignore b/modules/container-base/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/container-base/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/container-base/README.md b/modules/container-base/README.md new file mode 100644 index 00000000000..15011d5c6f4 --- /dev/null +++ b/modules/container-base/README.md @@ -0,0 +1,61 @@ +# Dataverse Base Container Image + +The Dataverse Base Container Image contains primarily a pre-installed and pre-tuned application server with the +necessary software dependencies for deploying and launching a Dataverse repository installation. + +Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks, etc., is all done +at this layer. Application images building from this very base focus on adding deployable Dataverse code and +actual scripts. + +*Note:* Currently, there is no application image. Please watch https://github.com/IQSS/dataverse/issues/8934 + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +The [Dataverse Container Guide - Base Image](https://guides.dataverse.org/en/latest/container/base-image.html) +provides in-depth information about content, building, tuning and so on for this image. + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-base) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-base/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-base`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml new file mode 100644 index 00000000000..bbee6ad67d5 --- /dev/null +++ b/modules/container-base/pom.xml @@ -0,0 +1,176 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-base + ${packaging.type} + Container Base Image + This module provides an application server base image to be decorated with the Dataverse app. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/base:${base.image.tag} + unstable + eclipse-temurin:${target.java.version}-jre + 1000 + 1000 + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + unpack + initialize + + unpack + + + + + fish.payara.distributions + payara + ${payara.version} + zip + false + ${project.build.directory} + + + ^payara\d + payara + + + + + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + \ No newline at end of file diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile new file mode 100644 index 00000000000..07968e92359 --- /dev/null +++ b/modules/container-base/src/main/docker/Dockerfile @@ -0,0 +1,231 @@ +# Copyright 2022 Forschungszentrum Jülich GmbH +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +################################################################################################################ +# +# THIS FILE IS TO BE USED WITH MAVEN DOCKER BUILD: +# mvn -Pct clean package docker:build +# +################################################################################################################ +# +# Some commands used are inspired by https://github.com/payara/Payara/tree/master/appserver/extras/docker-images. +# Most parts origin from older versions of https://github.com/gdcc/dataverse-kubernetes. +# +# We are not using upstream Payara images because: +# - Using same base image as Solr (https://hub.docker.com/_/solr) is reducing pulls +# - Their image is less optimised for production usage and Dataverse by design choices +# - We provide multi-arch images +# - We provide some tweaks for development and monitoring +# + +# Make the Java base image and version configurable (useful for trying newer Java versions and flavors) +ARG JAVA_IMAGE="eclipse-temurin:11-jre" +FROM $JAVA_IMAGE + +# Default payara ports to expose +# 4848: admin console +# 9009: debug port (JDWP) +# 8080: http +# 8181: https - but http-listener-2 is disabled here! +# 8686: JMX +EXPOSE 4848 9009 8080 8686 + +ENV HOME_DIR="/opt/payara" +ENV PAYARA_DIR="${HOME_DIR}/appserver" \ + SCRIPT_DIR="${HOME_DIR}/scripts" \ + CONFIG_DIR="${HOME_DIR}/config" \ + DEPLOY_DIR="${HOME_DIR}/deployments" \ + STORAGE_DIR="/dv" \ + SECRETS_DIR="/secrets" \ + DUMPS_DIR="/dumps" \ + PASSWORD_FILE="${HOME_DIR}/passwordFile" \ + ADMIN_USER="admin" \ + ADMIN_PASSWORD="admin" \ + DOMAIN_NAME="domain1" \ + PAYARA_ARGS="" +ENV PATH="${PATH}:${PAYARA_DIR}/bin:${SCRIPT_DIR}" \ + DOMAIN_DIR="${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}" \ + DEPLOY_PROPS="" \ + PREBOOT_COMMANDS="${CONFIG_DIR}/pre-boot-commands.asadmin" \ + POSTBOOT_COMMANDS="${CONFIG_DIR}/post-boot-commands.asadmin" \ + JVM_ARGS="" \ + MEM_MAX_RAM_PERCENTAGE="70.0" \ + MEM_XSS="512k" \ + # Source: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations + MEM_MIN_HEAP_FREE_RATIO="20" \ + MEM_MAX_HEAP_FREE_RATIO="40" \ + MEM_MAX_GC_PAUSE_MILLIS="500" \ + MEM_METASPACE_SIZE="256m" \ + MEM_MAX_METASPACE_SIZE="2g" \ + # Make heap dumps on OOM appear in DUMPS_DIR + ENABLE_DUMPS=0 \ + JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" \ + ENABLE_JMX=0 \ + ENABLE_JDWP=0 \ + ENABLE_RELOAD=0 + +### PART 1: SYSTEM ### +ARG UID=1000 +ARG GID=1000 +USER root +WORKDIR / +SHELL ["/bin/bash", "-euo", "pipefail", "-c"] +# Mark these directories as mutuable data containers to avoid cluttering the images overlayfs at runtime. +VOLUME ${STORAGE_DIR} ${SECRETS_DIR} ${DUMPS_DIR} +RUN <> /tmp/password-change-file.txt + echo "AS_ADMIN_PASSWORD=${ADMIN_PASSWORD}" >> ${PASSWORD_FILE} + asadmin --user=${ADMIN_USER} --passwordfile=/tmp/password-change-file.txt change-admin-password --domain_name=${DOMAIN_NAME} + # Start domain for configuration + ${ASADMIN} start-domain ${DOMAIN_NAME} + # Allow access to admin with password only + ${ASADMIN} enable-secure-admin + + ### CONTAINER USAGE ENABLEMENT + # List & delete memory settings from domain + for MEMORY_JVM_OPTION in $(${ASADMIN} list-jvm-options | grep "Xm[sx]\|Xss\|NewRatio"); + do + ${ASADMIN} delete-jvm-options $(echo $MEMORY_JVM_OPTION | sed -e 's/:/\\:/g'); + done + # Tweak memory settings for containers + ${ASADMIN} create-jvm-options "-XX\:+UseContainerSupport" + ${ASADMIN} create-jvm-options "-XX\:MaxRAMPercentage=\${ENV=MEM_MAX_RAM_PERCENTAGE}" + ${ASADMIN} create-jvm-options "-Xss\${ENV=MEM_XSS}" + ${ASADMIN} create-jvm-options "-XX\:MinHeapFreeRatio=\${ENV=MEM_MIN_HEAP_FREE_RATIO}" + ${ASADMIN} create-jvm-options "-XX\:MaxHeapFreeRatio=\${ENV=MEM_MAX_HEAP_FREE_RATIO}" + ${ASADMIN} create-jvm-options "-XX\:HeapDumpPath=\${ENV=DUMPS_DIR}" + # Set logging to console only for containers + ${ASADMIN} set-log-attributes com.sun.enterprise.server.logging.GFFileHandler.logtoFile=false \ + + ### PRODUCTION READINESS + ${ASADMIN} create-jvm-options '-XX\:+UseG1GC' + ${ASADMIN} create-jvm-options '-XX\:+UseStringDeduplication' + ${ASADMIN} create-jvm-options '-XX\:+DisableExplicitGC' + ${ASADMIN} create-jvm-options '-XX\:MaxGCPauseMillis=${ENV=MEM_MAX_GC_PAUSE_MILLIS}' + ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}' + ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}' + ${ASADMIN} create-jvm-options '-XX\:+IgnoreUnrecognizedVMOptions' + # Disable autodeploy and hot reload + ${ASADMIN} set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled="false" + ${ASADMIN} set configs.config.server-config.admin-service.das-config.autodeploy-enabled="false" + # Enlarge thread pools + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-thread-pool-size="50" + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-queue-size="" + ${ASADMIN} set default-config.thread-pools.thread-pool.thread-pool-1.max-thread-pool-size="250" + # Enable file caching + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" + # Disable the HTTPS listener (we are always fronting our appservers with a reverse proxy handling SSL) + ${ASADMIN} set configs.config.server-config.network-config.network-listeners.network-listener.http-listener-2.enabled="false" + # Enlarge and tune EJB pools (cannot do this for server-config as set does not create new entries) + ${ASADMIN} set default-config.ejb-container.pool-resize-quantity="2" + ${ASADMIN} set default-config.ejb-container.max-pool-size="128" + ${ASADMIN} set default-config.ejb-container.steady-pool-size="10" + # Misc settings + ${ASADMIN} create-system-properties fish.payara.classloading.delegate="false" + ${ASADMIN} create-system-properties jersey.config.client.readTimeout="300000" + ${ASADMIN} create-system-properties jersey.config.client.connectTimeout="300000" \ + + ### DATAVERSE APPLICATION SPECIFICS + # Configure the MicroProfile directory config source to point to /secrets + ${ASADMIN} set-config-dir --directory="${SECRETS_DIR}" + # Make request timeouts configurable via MPCONFIG (default to 900 secs = 15 min) + ${ASADMIN} set 'server-config.network-config.protocols.protocol.http-listener-1.http.request-timeout-seconds=${MPCONFIG=dataverse.http.timeout:900}' + # TODO: what of the below 3 items can be deleted for container usage? + ${ASADMIN} create-network-listener --protocol=http-listener-1 --listenerport=8009 --jkenabled=true jk-connector + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled=true + ${ASADMIN} create-system-properties javax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl + # Always disable phoning home... + ${ASADMIN} disable-phone-home \ + + ### CLEANUP + # Stop domain + ${ASADMIN} stop-domain "${DOMAIN_NAME}" + # Disable JSP servlet dynamic reloads + sed -i 's#org.apache.jasper.servlet.JspServlet#org.apache.jasper.servlet.JspServlet\n \n development\n false\n \n \n genStrAsCharArray\n true\n #' "${DOMAIN_DIR}/config/default-web.xml" + # Cleanup old CA certificates to avoid unnecessary log clutter during startup + ${SCRIPT_DIR}/removeExpiredCaCerts.sh + # Delete generated files + rm -rf \ + "/tmp/password-change-file.txt" \ + "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/osgi-cache" \ + "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/logs" +EOF + +# Set the entrypoint to tini (as a process supervisor) +ENTRYPOINT ["/usr/bin/dumb-init", "--"] +# This works because we add ${SCRIPT_DIR} to $PATH above! +CMD ["entrypoint.sh"] + +LABEL org.opencontainers.image.created="@git.build.time@" \ + org.opencontainers.image.authors="Research Data Management at FZJ " \ + org.opencontainers.image.url="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.documentation="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.source="https://github.com/IQSS/dataverse/tree/develop/modules/container-base" \ + org.opencontainers.image.version="@project.version@" \ + org.opencontainers.image.revision="@git.commit.id.abbrev@" \ + org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.title="Dataverse Base Image" \ + org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software" diff --git a/modules/container-base/src/main/docker/assembly.xml b/modules/container-base/src/main/docker/assembly.xml new file mode 100644 index 00000000000..9fc62d49fa1 --- /dev/null +++ b/modules/container-base/src/main/docker/assembly.xml @@ -0,0 +1,17 @@ + + + + + ${project.basedir}/target/payara + appserver + + + + ${project.basedir}/src/main/docker/scripts + scripts + 0755 + + + \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/entrypoint.sh b/modules/container-base/src/main/docker/scripts/entrypoint.sh new file mode 100644 index 00000000000..47933bd42e2 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/entrypoint.sh @@ -0,0 +1,33 @@ +#!/usr/bin/dumb-init /bin/bash +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/extras/docker-images/ +# server-full/src/main/docker/bin/entrypoint.sh and licensed under CDDL 1.1 by the Payara Foundation. +# +########################################################################################################## + +# This shellscript is supposed to be executed by https://github.com/Yelp/dumb-init to keep subprocesses +# and zombies under control. If the ENTRYPOINT command is changed, it will still use dumb-init because shebang. +# dumb-init takes care to send any signals to subshells, too! (Which might run in the background...) + + +# Execute any scripts BEFORE the appserver starts +for f in "${SCRIPT_DIR}"/init_* "${SCRIPT_DIR}"/init.d/*; do + # shellcheck disable=SC1090 + case "$f" in + *.sh) echo "[Entrypoint] running $f"; . "$f" ;; + *) echo "[Entrypoint] ignoring $f" ;; + esac + echo +done + +# If present, run a startInBackground.sh in the background (e.g. to run tasks AFTER the application server starts) +if [ -x "${SCRIPT_DIR}/startInBackground.sh" ]; then + echo "[Entrypoint] running ${SCRIPT_DIR}/startInBackground.sh in background" + "${SCRIPT_DIR}"/startInBackground.sh & +fi + +# Start the application server and make it REPLACE this shell, so init system and Java directly interact +# Remember - this means no code below this statement will be run! +echo "[Entrypoint] running ${SCRIPT_DIR}/startInForeground.sh in foreground" +exec "${SCRIPT_DIR}"/startInForeground.sh "${PAYARA_ARGS}" diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh new file mode 100644 index 00000000000..e2d717af666 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh @@ -0,0 +1,65 @@ +#!/bin/bash +########################################################################################################## +# +# A script to append deploy commands to the post boot command file at +# $PAYARA_HOME/scripts/post-boot-commands.asadmin file. All applications in the +# $DEPLOY_DIR (either files or folders) will be deployed. +# The $POSTBOOT_COMMANDS file can then be used with the start-domain using the +# --postbootcommandfile parameter to deploy applications on startup. +# +# Usage: +# ./generate_deploy_commands.sh +# +# Optionally, any number of parameters of the asadmin deploy command can be +# specified as parameters to this script. +# E.g., to deploy applications with implicit CDI scanning disabled: +# +# ./generate_deploy_commands.sh --properties=implicitCdiEnabled=false +# +# Environment variables used: +# - $PREBOOT_COMMANDS - the pre boot command file. +# - $POSTBOOT_COMMANDS - the post boot command file. +# +# Note that many parameters to the deploy command can be safely used only when +# a single application exists in the $DEPLOY_DIR directory. +# +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/extras/docker-images/ +# server-full/src/main/docker/bin/init_1_generate_deploy_commands.sh and licensed under CDDL 1.1 +# by the Payara Foundation. +# +########################################################################################################## + +# Check required variables are set +if [ -z "$DEPLOY_DIR" ]; then echo "Variable DEPLOY_DIR is not set."; exit 1; fi +if [ -z "$PREBOOT_COMMANDS" ]; then echo "Variable PREBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$POSTBOOT_COMMANDS" ]; then echo "Variable POSTBOOT_COMMANDS is not set."; exit 1; fi + +# Create pre and post boot command files if they don't exist +touch "$POSTBOOT_COMMANDS" +touch "$PREBOOT_COMMANDS" + +deploy() { + + if [ -z "$1" ]; then + echo "No deployment specified"; + exit 1; + fi + + DEPLOY_STATEMENT="deploy $DEPLOY_PROPS $1" + if grep -q "$1" "$POSTBOOT_COMMANDS"; then + echo "post boot commands already deploys $1"; + else + echo "Adding deployment target $1 to post boot commands"; + echo "$DEPLOY_STATEMENT" >> "$POSTBOOT_COMMANDS"; + fi +} + +# RAR files first +find "$DEPLOY_DIR" -mindepth 1 -maxdepth 1 -name "*.rar" -print0 \ + | while IFS= read -r -d '' file; do deploy "$file"; done + +# Then every other WAR, EAR, JAR or directory +find "$DEPLOY_DIR" -mindepth 1 -maxdepth 1 ! -name "*.rar" -a -name "*.war" -o -name "*.ear" -o -name "*.jar" -o -type d -print0 \ + | while IFS= read -r -d '' file; do deploy "$file"; done \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh new file mode 100644 index 00000000000..bb0984332f7 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +set -euo pipefail + +###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### +# This script enables different development options, like a JMX connector +# usable with VisualVM, JRebel hot-reload support and JDWP debugger service. +# Enable it by adding env vars on startup (e.g. via ConfigMap) +# +# As this script is "sourced" from entrypoint.sh, we can manipulate env vars +# for the parent shell before executing Payara. +###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### + +# 0. Init variables +ENABLE_JMX=${ENABLE_JMX:-0} +ENABLE_JDWP=${ENABLE_JDWP:-0} +ENABLE_RELOAD=${ENABLE_RELOAD:-0} + +DV_PREBOOT=${PAYARA_DIR}/dataverse_preboot +echo "# Dataverse preboot configuration for Payara" > "${DV_PREBOOT}" + +# 1. Configure JMX (enabled by default on port 8686, but requires SSL) +# See also https://blog.payara.fish/monitoring-payara-server-with-jconsole +# To still use it, you can use a sidecar container proxying or using JMX via localhost without SSL. +if [ "${ENABLE_JMX}" = "1" ]; then + echo "Enabling unsecured JMX on 0.0.0.0:8686, enabling AMX and tuning monitoring levels to HIGH. You'll need a sidecar for this, as access is allowed from same machine only (without SSL)." + { \ + echo "set configs.config.server-config.amx-configuration.enabled=true" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jvm=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-connection-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jdbc-connection-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.web-services-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.ejb-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.thread-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.http-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.security=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jms-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jersey=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.transaction-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jpa=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.web-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.orb=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.deployment=HIGH" + echo "set configs.config.server-config.admin-service.jmx-connector.system.security-enabled=false" + } >> "${DV_PREBOOT}" +fi + +# 2. Enable JDWP via debugging switch +if [ "${ENABLE_JDWP}" = "1" ]; then + echo "Enabling JDWP remote debugging support via asadmin debugging switch." + export PAYARA_ARGS="${PAYARA_ARGS} --debug=true" +fi + +# 3. Enable hot reload +if [ "${ENABLE_RELOAD}" = "1" ]; then + echo "Enabling hot reload of deployments." + echo "set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled=true" >> "${DV_PREBOOT}" +fi + +# 4. Add the commands to the existing preboot file, but insert BEFORE deployment +TMP_PREBOOT=$(mktemp) +cat "${DV_PREBOOT}" "${PREBOOT_COMMANDS}" > "${TMP_PREBOOT}" +mv "${TMP_PREBOOT}" "${PREBOOT_COMMANDS}" +echo "DEBUG: preboot contains the following commands:" +echo "--------------------------------------------------" +cat "${PREBOOT_COMMANDS}" +echo "--------------------------------------------------" \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/removeExpiredCaCerts.sh b/modules/container-base/src/main/docker/scripts/removeExpiredCaCerts.sh new file mode 100644 index 00000000000..205a9eda5d7 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/removeExpiredCaCerts.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Remove expired certs from a keystore +# ------------------------------------ +# This script was copied from https://gist.github.com/damkh/a4a0d74891f92b0285a3853418357c1e (thanks @damkh) +# and slightly modified to be used within our scenario and comply with shellcheck good practices. + +set -euo pipefail + +KEYSTORE="${DOMAIN_DIR}/config/cacerts.jks" +keytool -list -v -keystore "${KEYSTORE}" -storepass changeit 2>/dev/null | \ + grep -i 'alias\|until' > aliases.txt + +i=1 +# Split dates and aliases to different arrays +while read -r p; do + # uneven lines are dates, evens are aliases + if ! ((i % 2)); then + arr_date+=("$p") + else + arr_cn+=("$p") + fi + i=$((i+1)) +done < aliases.txt +i=0 + +# Parse until-dates -> +# convert until-dates to "seconds from 01-01-1970"-format -> +# compare until-dates with today-date -> +# delete expired aliases +for date_idx in $(seq 0 $((${#arr_date[*]}-1))); +do + a_date=$(echo "${arr_date[$date_idx]}" | awk -F"until: " '{print $2}') + if [ "$(date +%s --date="$a_date")" -lt "$(date +%s)" ]; + then + echo "removing ${arr_cn[$i]} expired: $a_date" + alias_name=$(echo "${arr_cn[$i]}" | awk -F"name: " '{print $2}') + keytool -delete -alias "$alias_name" -keystore "${KEYSTORE}" -storepass changeit + fi + i=$((i+1)) +done +echo "Done." \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/startInForeground.sh b/modules/container-base/src/main/docker/scripts/startInForeground.sh new file mode 100644 index 00000000000..4843f6ae055 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/startInForeground.sh @@ -0,0 +1,89 @@ +#!/bin/bash +########################################################################################################## +# +# This script is to execute Payara Server in foreground, mainly in a docker environment. +# It allows to avoid running 2 instances of JVM, which happens with the start-domain --verbose command. +# +# Usage: +# Running +# startInForeground.sh +# is equivalent to running +# asadmin start-domain +# +# It's possible to use any arguments of the start-domain command as arguments to startInForeground.sh +# +# Environment variables used: +# - $ADMIN_USER - the username to use for the asadmin utility. +# - $PASSWORD_FILE - the password file to use for the asadmin utility. +# - $PREBOOT_COMMANDS - the pre boot command file. +# - $POSTBOOT_COMMANDS - the post boot command file. +# - $DOMAIN_NAME - the name of the domain to start. +# - $JVM_ARGS - extra JVM options to pass to the Payara Server instance. +# - $AS_ADMIN_MASTERPASSWORD - the master password for the Payara Server instance. +# +# This script executes the asadmin tool which is expected at ~/appserver/bin/asadmin. +# +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/ +# extras/docker-images/server-full/src/main/docker/bin/startInForeground.sh and licensed under CDDL 1.1 +# by the Payara Foundation. +# +########################################################################################################## + +# Check required variables are set +if [ -z "$ADMIN_USER" ]; then echo "Variable ADMIN_USER is not set."; exit 1; fi +if [ -z "$PASSWORD_FILE" ]; then echo "Variable PASSWORD_FILE is not set."; exit 1; fi +if [ -z "$PREBOOT_COMMANDS" ]; then echo "Variable PREBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$POSTBOOT_COMMANDS" ]; then echo "Variable POSTBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$DOMAIN_NAME" ]; then echo "Variable DOMAIN_NAME is not set."; exit 1; fi + +# Check if dumps are enabled - add arg to JVM_ARGS in this case +if [ -n "${ENABLE_DUMPS}" ] && [ "${ENABLE_DUMPS}" = "1" ]; then + JVM_ARGS="${JVM_DUMPS_ARG} ${JVM_ARGS}" +fi + +# The following command gets the command line to be executed by start-domain +# - print the command line to the server with --dry-run, each argument on a separate line +# - remove -read-string argument +# - surround each line except with parenthesis to allow spaces in paths +# - remove lines before and after the command line and squash commands on a single line + +# Create pre and post boot command files if they don't exist +touch "$POSTBOOT_COMMANDS" +touch "$PREBOOT_COMMANDS" + +# shellcheck disable=SC2068 +# -- Using $@ is necessary here as asadmin cannot deal with options enclosed in ""! +OUTPUT=$("${PAYARA_DIR}"/bin/asadmin --user="${ADMIN_USER}" --passwordfile="${PASSWORD_FILE}" start-domain --dry-run --prebootcommandfile="${PREBOOT_COMMANDS}" --postbootcommandfile="${POSTBOOT_COMMANDS}" $@ "$DOMAIN_NAME") +STATUS=$? +if [ "$STATUS" -ne 0 ] + then + echo ERROR: "$OUTPUT" >&2 + exit 1 +fi + +COMMAND=$(echo "$OUTPUT"\ + | sed -n -e '2,/^$/p'\ + | sed "s|glassfish.jar|glassfish.jar $JVM_ARGS |g") + +echo Executing Payara Server with the following command line: +echo "$COMMAND" | tr ' ' '\n' +echo + +# Run the server in foreground - read master password from variable or file or use the default "changeit" password + +set +x +if test "$AS_ADMIN_MASTERPASSWORD"x = x -a -f "$PASSWORD_FILE" + then + # shellcheck disable=SC1090 + source "$PASSWORD_FILE" +fi +if test "$AS_ADMIN_MASTERPASSWORD"x = x + then + AS_ADMIN_MASTERPASSWORD=changeit +fi +echo "AS_ADMIN_MASTERPASSWORD=$AS_ADMIN_MASTERPASSWORD" > /tmp/masterpwdfile +# shellcheck disable=SC2086 +# -- Unquoted exec var is necessary, as otherwise things get escaped that may not be escaped (parameters for Java) +exec ${COMMAND} < /tmp/masterpwdfile diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index e36a78b11be..38a7c246fde 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -13,6 +13,7 @@ ../../pom.xml ../../scripts/zipdownload + ../container-base 5.2022.3 - 42.5.0 + 42.5.1 8.11.1 1.12.290 0.177.0 @@ -163,7 +164,7 @@ 4.4.14 - 5.0.0-RC1 + 5.0.0 1.15.0 @@ -181,10 +182,14 @@ 3.2.2 3.3.2 3.2.0 + 3.0.0-M1 3.0.0-M5 3.0.0-M5 3.3.0 3.1.2 + + + 0.40.2 @@ -225,6 +230,11 @@ maven-dependency-plugin ${maven-dependency-plugin.version} + + org.apache.maven.plugins + maven-install-plugin + ${maven-install-plugin.version} + org.apache.maven.plugins maven-surefire-plugin @@ -247,6 +257,11 @@ + + io.fabric8 + docker-maven-plugin + ${fabric8-dmp.version} + @@ -323,4 +338,44 @@ --> + + + ct + + + 5.2022.4 + + + + + + + io.github.git-commit-id + git-commit-id-maven-plugin + 5.0.0 + + + retrieve-git-details + + revision + + initialize + + + + ${project.basedir}/../../.git + UTC + 8 + false + + + + + + + + diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 1b1ff0ae819..be32bb7134e 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -43,7 +43,7 @@ producerURL URL The URL of the producer's website https:// url 39 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation producerLogoURL Logo URL The URL of the producer's logo https:// url 40
FALSE FALSE FALSE FALSE FALSE FALSE producer citation productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 41 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Location The location where the data and any related materials were produced or collected text 42 FALSE FALSE FALSE FALSE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 42 TRUE FALSE TRUE TRUE FALSE FALSE citation contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 43 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor contributorType Type Indicates the type of contribution made to the dataset text 44 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 45 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv new file mode 100644 index 00000000000..a5c50368b75 --- /dev/null +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -0,0 +1,37 @@ +#metadataBlock name dataverseAlias displayName blockURI + codeMeta20 Software Metadata (CodeMeta v2.0) https://codemeta.github.io/terms/ +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI + codeVersion Software Version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion + developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 https://www.repostatus.org + codeRepository Code Repository Link to the repository where the un-compiled, human-readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.). e.g. https://github.com/user/project url 2 #VALUE TRUE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/codeRepository + applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. text 3 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory + applicationSubCategory Application Subcategory Subcategory of the application, e.g. Arcade Game. text 4 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/applicationSubCategory + programmingLanguage Programming Language The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) text 5 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/programmingLanguage + runtimePlatform Runtime Platform Runtime platform or script interpreter dependencies (e.g. Java 11, Python 3.10 or .Net Framework 4.8). e.g. Python 3.10 text 6 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/runtimePlatform + operatingSystem Operating Systems Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). text 7 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/operatingSystem + targetProduct Target Product Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. text 8 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/targetProduct + buildInstructions Build Instructions Link to installation instructions/documentation e.g. https://github.com/user/project/blob/main/BUILD.md url 9 #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/buildInstructions + softwareRequirementsItem Software Requirements Required software dependencies none 10 FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 + softwareRequirements Name & Version Name and version of the required software/library dependency e.g. Pandas 1.4.3 text 0 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE softwareRequirementsItem codeMeta20 https://schema.org/softwareRequirements + softwareRequirementsInfoUrl Info URL Link to required software/library homepage or documentation (ideally also versioned) e.g. https://pandas.pydata.org/pandas-docs/version/1.4.3 url 1 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE softwareRequirementsItem codeMeta20 https://dataverse.org/schema/codeMeta20/softwareRequirementsInfoUrl + softwareSuggestionsItem Software Suggestions Optional dependencies, e.g. for optional features, code development, etc. none 11 FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 + softwareSuggestions Name & Version Name and version of the optional software/library dependency e.g. Sphinx 5.0.2 text 0 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE softwareSuggestionsItem codeMeta20 https://codemeta.github.io/terms/softwareSuggestions + softwareSuggestionsInfoUrl Info URL Link to optional software/library homepage or documentation (ideally also versioned) e.g. https://www.sphinx-doc.org url 1 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE softwareSuggestionsItem codeMeta20 https://dataverse.org/schema/codeMeta20/softwareSuggestionsInfoUrl + memoryRequirements Memory Requirements Minimum memory requirements. text 12 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/memoryRequirements + processorRequirements Processor Requirements Processor architecture or other CPU requirements to run the application (e.g. IA64). text 13 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/processorRequirements + storageRequirements Storage Requirements Minimum storage requirements (e.g. free space required). text 14 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/storageRequirements + permissions Permissions Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). text 15 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/permissions + softwareHelp Software Help/Documentation Link to help texts or documentation e.g. https://user.github.io/project/docs url 16 #VALUE FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/softwareHelp + readme Readme Link to the README of the project e.g. https://github.com/user/project/blob/main/README.md url 17 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/readme + releaseNotes Release Notes Link to release notes e.g. https://github.com/user/project/blob/main/docs/release-0.1.md url 18 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/releaseNotes + contIntegration Continuous Integration Link to continuous integration service e.g. https://github.com/user/project/actions url 19 #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/contIntegration + issueTracker Issue Tracker Link to software bug reporting or issue tracking system e.g. https://github.com/user/project/issues url 20 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/issueTracker +#controlledVocabulary DatasetField Value identifier displayOrder + developmentStatus Concept concept 0 + developmentStatus WIP wip 1 + developmentStatus Active active 2 + developmentStatus Inactive inactive 3 + developmentStatus Unsupported unsupported 4 + developmentStatus Moved moved 5 + developmentStatus Suspended suspended 6 + developmentStatus Abandoned abandoned 7 diff --git a/scripts/api/setup-datasetfields.sh b/scripts/api/setup-datasetfields.sh index 0d79176c099..0d2d60b9538 100755 --- a/scripts/api/setup-datasetfields.sh +++ b/scripts/api/setup-datasetfields.sh @@ -7,4 +7,3 @@ curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @da curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values" - diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java index a7a89934f47..344032ef5e3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java @@ -55,7 +55,10 @@ public class AuxiliaryFile implements Serializable { private String formatTag; private String formatVersion; - + + /** + * The application/entity that created the auxiliary file. + */ private String origin; private boolean isPublic; diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 76c91382868..05f3e209632 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -70,9 +70,13 @@ public AuxiliaryFile save(AuxiliaryFile auxiliaryFile) { * @param type how to group the files such as "DP" for "Differentially * @param mediaType user supplied content type (MIME type) * Private Statistics". - * @return success boolean - returns whether the save was successful + * @param save boolean - true to save immediately, false to let the cascade + * do persist to the database. + * @return an AuxiliaryFile with an id when save=true (assuming no + * exceptions) or an AuxiliaryFile without an id that will be persisted + * later through the cascade. */ - public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) { + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType, boolean save) { StorageIO storageIO = null; AuxiliaryFile auxFile = new AuxiliaryFile(); @@ -114,7 +118,14 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile auxFile.setType(type); auxFile.setDataFile(dataFile); auxFile.setFileSize(storageIO.getAuxObjectSize(auxExtension)); - auxFile = save(auxFile); + if (save) { + auxFile = save(auxFile); + } else { + if (dataFile.getAuxiliaryFiles() == null) { + dataFile.setAuxiliaryFiles(new ArrayList<>()); + } + dataFile.getAuxiliaryFiles().add(auxFile); + } } catch (IOException ioex) { logger.severe("IO Exception trying to save auxiliary file: " + ioex.getMessage()); throw new InternalServerErrorException(); @@ -129,7 +140,11 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile } return auxFile; } - + + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) { + return processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, true); + } + public AuxiliaryFile lookupAuxiliaryFile(DataFile dataFile, String formatTag, String formatVersion) { Query query = em.createNamedQuery("AuxiliaryFile.lookupAuxiliaryFile"); diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index cb43dff0e20..5171e8d49f2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -569,7 +569,7 @@ public FileMetadata getLatestPublishedFileMetadata() throws UnsupportedOperation if(fmd == null) { throw new UnsupportedOperationException("No published metadata version for DataFile " + this.getId()); } - + return fmd; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 0b935183182..7da06f36be4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1544,6 +1544,10 @@ public void finalizeFileDelete(Long dataFileId, String storageLocation) throws I throw new IOException("Attempted to permanently delete a physical file still associated with an existing DvObject " + "(id: " + dataFileId + ", location: " + storageLocation); } + if(storageLocation == null || storageLocation.isBlank()) { + throw new IOException("Attempted to delete a physical file with no location " + + "(id: " + dataFileId + ", location: " + storageLocation); + } StorageIO directStorageAccess = DataAccess.getDirectStorageIO(storageLocation); directStorageAccess.delete(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index e91221ce36c..d7e7271738d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -33,6 +33,8 @@ import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; + +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -530,11 +532,8 @@ private Collection getCategoryNames() { @Deprecated public Path getFileSystemDirectory() { Path studyDir = null; - - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } + + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); if (this.getAlternativePersistentIndentifiers() != null && !this.getAlternativePersistentIndentifiers().isEmpty()) { for (AlternativePersistentIdentifier api : this.getAlternativePersistentIndentifiers()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 6e71f6c5042..429a0d7a4e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -56,6 +56,8 @@ import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.WebloaderUtil; import edu.harvard.iq.dataverse.validation.URLValidator; import edu.harvard.iq.dataverse.workflows.WorkflowComment; @@ -1845,7 +1847,9 @@ public boolean globusUploadSupported() { return settingsWrapper.isGlobusUpload() && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId()); } - + public boolean webloaderUploadSupported() { + return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId()); + } private String init(boolean initFull) { @@ -5490,7 +5494,7 @@ public List getCachedToolsForDataFile(Long fileId, ExternalTool.Ty return cachedTools; } DataFile dataFile = datafileService.find(fileId); - cachedTools = ExternalToolServiceBean.findExternalToolsByFile(externalTools, dataFile); + cachedTools = externalToolService.findExternalToolsByFile(externalTools, dataFile); cachedToolsByFileId.put(fileId, cachedTools); //add to map so we don't have to do the lifting again return cachedTools; } @@ -6062,4 +6066,19 @@ public void startGlobusTransfer() { } PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken)); } + + public String getWebloaderUrlForDataset(Dataset d) { + String localeCode = session.getLocaleCode(); + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + ApiToken apiToken = authService.getValidApiTokenForUser((AuthenticatedUser) user); + return WebloaderUtil.getWebloaderUrl(d, apiToken, localeCode, + settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl)); + } else { + // Shouldn't normally happen (seesion timeout? bug?) + logger.warning("getWebloaderUrlForDataset called for non-Authenticated user"); + return null; + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 30815c43381..c21861a1bf4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.util.MarkupChecker; +import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import edu.harvard.iq.dataverse.branding.BrandingUtil; @@ -842,12 +843,26 @@ public String getDescriptionPlainText() { return MarkupChecker.stripAllTags(getDescription()); } - public List getDescriptionsPlainText() { - List plainTextDescriptions = new ArrayList<>(); + /* This method is (only) used in creating schema.org json-jd where Google requires a text description <5000 chars. + * + * @returns - a single string composed of all descriptions (joined with \n if more than one) truncated with a trailing '...' if >=5000 chars + */ + public String getDescriptionsPlainTextTruncated() { + List plainTextDescriptions = new ArrayList(); + for (String htmlDescription : getDescriptions()) { plainTextDescriptions.add(MarkupChecker.stripAllTags(htmlDescription)); } - return plainTextDescriptions; + String description = String.join("\n", plainTextDescriptions); + if (description.length() >= 5000) { + int endIndex = description.substring(0, 4997).lastIndexOf(" "); + if (endIndex == -1) { + //There are no spaces so just break anyway + endIndex = 4997; + } + description = description.substring(0, endIndex) + "..."; + } + return description; } /** @@ -1802,27 +1817,46 @@ public String getJsonLd() { for (DatasetAuthor datasetAuthor : this.getDatasetAuthors()) { JsonObjectBuilder author = Json.createObjectBuilder(); String name = datasetAuthor.getName().getDisplayValue(); + String identifierAsUrl = datasetAuthor.getIdentifierAsUrl(); DatasetField authorAffiliation = datasetAuthor.getAffiliation(); String affiliation = null; if (authorAffiliation != null) { - affiliation = datasetAuthor.getAffiliation().getDisplayValue(); - } - // We are aware of "givenName" and "familyName" but instead of a person it might be an organization such as "Gallup Organization". - //author.add("@type", "Person"); - author.add("name", name); - // We are aware that the following error is thrown by https://search.google.com/structured-data/testing-tool - // "The property affiliation is not recognized by Google for an object of type Thing." - // Someone at Google has said this is ok. - // This logic could be moved into the `if (authorAffiliation != null)` block above. - if (!StringUtil.isEmpty(affiliation)) { - author.add("affiliation", affiliation); + affiliation = datasetAuthor.getAffiliation().getValue(); } - String identifierAsUrl = datasetAuthor.getIdentifierAsUrl(); - if (identifierAsUrl != null) { - // It would be valid to provide an array of identifiers for authors but we have decided to only provide one. - author.add("@id", identifierAsUrl); - author.add("identifier", identifierAsUrl); + JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, false, (identifierAsUrl!=null)); + String givenName= entity.containsKey("givenName") ? entity.getString("givenName"):null; + String familyName= entity.containsKey("familyName") ? entity.getString("familyName"):null; + + if (entity.getBoolean("isPerson")) { + // Person + author.add("@type", "Person"); + if (givenName != null) { + author.add("givenName", givenName); + } + if (familyName != null) { + author.add("familyName", familyName); + } + if (!StringUtil.isEmpty(affiliation)) { + author.add("affiliation", Json.createObjectBuilder().add("@type", "Organization").add("name", affiliation)); + } + //Currently all possible identifier URLs are for people not Organizations + if(identifierAsUrl != null) { + author.add("sameAs", identifierAsUrl); + //Legacy - not sure if these are still useful + author.add("@id", identifierAsUrl); + author.add("identifier", identifierAsUrl); + + } + } else { + // Organization + author.add("@type", "Organization"); + if (!StringUtil.isEmpty(affiliation)) { + author.add("parentOrganization", Json.createObjectBuilder().add("@type", "Organization").add("name", affiliation)); + } } + // Both cases + author.add("name", entity.getString("fullName")); + //And add to the array authors.add(author); } JsonArray authorsArray = authors.build(); @@ -1859,16 +1893,8 @@ public String getJsonLd() { job.add("dateModified", this.getPublicationDateAsString()); job.add("version", this.getVersionNumber().toString()); - JsonArrayBuilder descriptionsArray = Json.createArrayBuilder(); - List descriptions = this.getDescriptionsPlainText(); - for (String description : descriptions) { - descriptionsArray.add(description); - } - /** - * In Dataverse 4.8.4 "description" was a single string but now it's an - * array. - */ - job.add("description", descriptionsArray); + String description = this.getDescriptionsPlainTextTruncated(); + job.add("description", description); /** * "keywords" - contains subject(s), datasetkeyword(s) and topicclassification(s) @@ -1892,11 +1918,16 @@ public String getJsonLd() { job.add("keywords", keywords); /** - * citation: (multiple) related publication citation and URLs, if - * present. + * citation: (multiple) related publication citation and URLs, if present. * - * In Dataverse 4.8.4 "citation" was an array of strings but now it's an - * array of objects. + * Schema.org allows text or a CreativeWork object. Google recommends text with + * either the full citation or the PID URL. This code adds an object if we have + * the citation text for the work and/or an entry in the URL field (i.e. + * https://doi.org/...) The URL is reported as the 'url' field while the + * citation text (which would normally include the name) is reported as 'name' + * since there doesn't appear to be a better field ('text', which was used + * previously, is the actual text of the creative work). + * */ List relatedPublications = getRelatedPublications(); if (!relatedPublications.isEmpty()) { @@ -1911,11 +1942,12 @@ public String getJsonLd() { JsonObjectBuilder citationEntry = Json.createObjectBuilder(); citationEntry.add("@type", "CreativeWork"); if (pubCitation != null) { - citationEntry.add("text", pubCitation); + citationEntry.add("name", pubCitation); } if (pubUrl != null) { citationEntry.add("@id", pubUrl); citationEntry.add("identifier", pubUrl); + citationEntry.add("url", pubUrl); } if (addToArray) { jsonArrayBuilder.add(citationEntry); @@ -1957,13 +1989,14 @@ public String getJsonLd() { job.add("license",DatasetUtil.getLicenseURI(this)); } + String installationBrandName = BrandingUtil.getInstallationBrandName(); + job.add("includedInDataCatalog", Json.createObjectBuilder() .add("@type", "DataCatalog") - .add("name", BrandingUtil.getRootDataverseCollectionName()) + .add("name", installationBrandName) .add("url", SystemConfig.getDataverseSiteUrlStatic()) ); - - String installationBrandName = BrandingUtil.getInstallationBrandName(); + /** * Both "publisher" and "provider" are included but they have the same * values. Some services seem to prefer one over the other. @@ -2012,7 +2045,7 @@ public String getJsonLd() { } fileObject.add("@type", "DataDownload"); fileObject.add("name", fileMetadata.getLabel()); - fileObject.add("fileFormat", fileMetadata.getDataFile().getContentType()); + fileObject.add("encodingFormat", fileMetadata.getDataFile().getContentType()); fileObject.add("contentSize", fileMetadata.getDataFile().getFilesize()); fileObject.add("description", fileMetadata.getDescription()); fileObject.add("@id", filePidUrlAsString); @@ -2021,10 +2054,8 @@ public String getJsonLd() { if (hideFilesBoolean != null && hideFilesBoolean.equals("true")) { // no-op } else { - if (FileUtil.isPubliclyDownloadable(fileMetadata)) { - String nullDownloadType = null; - fileObject.add("contentUrl", dataverseSiteUrl + FileUtil.getFileDownloadUrlPath(nullDownloadType, fileMetadata.getDataFile().getId(), false, fileMetadata.getId())); - } + String nullDownloadType = null; + fileObject.add("contentUrl", dataverseSiteUrl + FileUtil.getFileDownloadUrlPath(nullDownloadType, fileMetadata.getDataFile().getId(), false, fileMetadata.getId())); } fileArray.add(fileObject); } diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index fc8df8681af..1c033b37872 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -5,7 +5,9 @@ import edu.harvard.iq.dataverse.api.AbstractApiBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; @@ -31,11 +33,14 @@ import edu.harvard.iq.dataverse.ingest.IngestUtil; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.Setting; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.WebloaderUtil; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.EjbUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -586,8 +591,7 @@ public String init() { datafileService, permissionService, commandEngine, - systemConfig, - licenseServiceBean); + systemConfig); fileReplacePageHelper = new FileReplacePageHelper(addReplaceFileHelper, dataset, @@ -2425,10 +2429,8 @@ public boolean isTemporaryPreviewAvailable(String fileSystemId, String mimeType) return false; } - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.isEmpty()) { - filesRootDirectory = "/tmp/files"; - } + // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); String fileSystemName = filesRootDirectory + "/temp/" + fileSystemId; @@ -3067,6 +3069,10 @@ public boolean globusUploadSupported() { return settingsWrapper.isGlobusUpload() && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId()); } + + public boolean webloaderUploadSupported() { + return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId()); + } private void populateFileMetadatas() { fileMetadatas = new ArrayList<>(); @@ -3106,4 +3112,18 @@ public void setFileAccessRequest(boolean fileAccessRequest) { public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); } + + public String getWebloaderUrlForDataset(Dataset d) { + String localeCode = session.getLocaleCode(); + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + ApiToken apiToken = authService.getValidApiTokenForUser((AuthenticatedUser) user); + return WebloaderUtil.getWebloaderUrl(d, apiToken, localeCode, + settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl)); + } else { + // Shouldn't normally happen (seesion timeout? bug?) + logger.warning("getWebloaderUrlForDataset called for non-Authenticated user"); + return null; + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java index 6262b6204f4..fc31d0867ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java @@ -237,7 +237,6 @@ public List getCategoriesByName() { return ret; } - public JsonArrayBuilder getCategoryNamesAsJsonArrayBuilder() { JsonArrayBuilder builder = Json.createArrayBuilder(); diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index 85eb79d2ddc..228db0a7584 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -39,6 +39,7 @@ import edu.harvard.iq.dataverse.util.JsfHelper; import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.IOException; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -57,6 +58,9 @@ import javax.faces.view.ViewScoped; import javax.inject.Inject; import javax.inject.Named; +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonValue; import javax.validation.ConstraintViolation; import org.primefaces.PrimeFaces; @@ -125,6 +129,8 @@ public class FilePage implements java.io.Serializable { ExternalToolServiceBean externalToolService; @EJB PrivateUrlServiceBean privateUrlService; + @EJB + AuxiliaryFileServiceBean auxiliaryFileService; @Inject DataverseRequestServiceBean dvRequestService; @@ -285,8 +291,15 @@ public void setDatasetVersionId(Long datasetVersionId) { this.datasetVersionId = datasetVersionId; } + // findPreviewTools would be a better name private List sortExternalTools(){ - List retList = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.PREVIEW, file.getContentType()); + List retList = new ArrayList<>(); + List previewTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.PREVIEW, file.getContentType()); + for (ExternalTool previewTool : previewTools) { + if (externalToolService.meetsRequirements(previewTool, file)) { + retList.add(previewTool); + } + } Collections.sort(retList, CompareExternalToolName); return retList; } diff --git a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java index 1a8ee8a85e8..df16991b51e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java @@ -24,8 +24,6 @@ import java.io.File; import java.io.FileInputStream; -import java.net.InetAddress; -import java.net.UnknownHostException; import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; @@ -34,6 +32,7 @@ import java.security.PrivateKey; /* Handlenet imports: */ +import edu.harvard.iq.dataverse.util.SystemConfig; import net.handle.hdllib.AbstractMessage; import net.handle.hdllib.AbstractResponse; import net.handle.hdllib.AdminRecord; @@ -247,21 +246,7 @@ private String getRegistrationUrl(DvObject dvObject) { } public String getSiteUrl() { - logger.log(Level.FINE,"getSiteUrl"); - String hostUrl = System.getProperty("dataverse.siteUrl"); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; - } - String hostName = System.getProperty("dataverse.fqdn"); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } - } - hostUrl = "https://" + hostName; - return hostUrl; + return SystemConfig.getDataverseSiteUrlStatic(); } private byte[] readKey(final String file) { diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java index bc83c15dcd7..5be7578f7f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java @@ -9,7 +9,6 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestingClientCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteHarvestingClientCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestingClientCommand; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; @@ -24,7 +23,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Locale; import java.util.Collections; import java.util.logging.Level; import java.util.logging.Logger; @@ -79,7 +77,7 @@ public class HarvestingClientsPage implements java.io.Serializable { private Dataverse dataverse; private Long dataverseId = null; private HarvestingClient selectedClient; - private boolean setListTruncated = false; + private boolean setListTruncated = false; //private static final String solrDocIdentifierDataset = "dataset_"; @@ -245,6 +243,7 @@ public void editClient(HarvestingClient harvestingClient) { this.newNickname = harvestingClient.getName(); this.newHarvestingUrl = harvestingClient.getHarvestingUrl(); + this.customHeader = harvestingClient.getCustomHttpHeaders(); this.initialSettingsValidated = false; // TODO: do we want to try and contact the server, again, to make @@ -340,6 +339,7 @@ public void createClient(ActionEvent ae) { getSelectedDestinationDataverse().getHarvestingClientConfigs().add(newHarvestingClient); newHarvestingClient.setHarvestingUrl(newHarvestingUrl); + newHarvestingClient.setCustomHttpHeaders(customHeader); if (!StringUtils.isEmpty(newOaiSet)) { newHarvestingClient.setHarvestingSet(newOaiSet); } @@ -426,6 +426,7 @@ public void saveClient(ActionEvent ae) { // nickname is not editable for existing clients: //harvestingClient.setName(newNickname); harvestingClient.setHarvestingUrl(newHarvestingUrl); + harvestingClient.setCustomHttpHeaders(customHeader); harvestingClient.setHarvestingSet(newOaiSet); harvestingClient.setMetadataPrefix(newMetadataFormat); harvestingClient.setHarvestStyle(newHarvestingStyle); @@ -554,6 +555,9 @@ public boolean validateServerUrlOAI() { if (!StringUtils.isEmpty(getNewHarvestingUrl())) { OaiHandler oaiHandler = new OaiHandler(getNewHarvestingUrl()); + if (getNewCustomHeader() != null) { + oaiHandler.setCustomHeaders(oaiHandler.makeCustomHeaders(getNewCustomHeader())); + } boolean success = true; String message = null; @@ -635,6 +639,23 @@ public boolean validateServerUrlOAI() { return false; } + public boolean validateCustomHeader() { + if (!StringUtils.isEmpty(getNewCustomHeader())) { + // TODO: put this method somewhere else as a static utility + + // check that it's looking like "{header-name}: {header value}" at least + if (!Pattern.matches("^[a-zA-Z0-9\\_\\-]+:.*",getNewCustomHeader())) { + FacesContext.getCurrentInstance().addMessage(getNewClientCustomHeaderInputField().getClientId(), + new FacesMessage(FacesMessage.SEVERITY_ERROR, "", BundleUtil.getStringFromBundle("harvestclients.newClientDialog.customHeader.invalid"))); + + return false; + } + } + + // this setting is optional + return true; + } + public void validateInitialSettings() { if (isHarvestTypeOAI()) { boolean nicknameValidated = true; @@ -644,9 +665,10 @@ public void validateInitialSettings() { destinationDataverseValidated = validateSelectedDataverse(); } boolean urlValidated = validateServerUrlOAI(); + boolean customHeaderValidated = validateCustomHeader(); - if (nicknameValidated && destinationDataverseValidated && urlValidated) { - // In Create mode we want to run all 3 validation tests; this is why + if (nicknameValidated && destinationDataverseValidated && urlValidated && customHeaderValidated) { + // In Create mode we want to run all 4 validation tests; this is why // we are not doing "if ((validateNickname() && validateServerUrlOAI())" // in the line above. -- L.A. 4.4 May 2016. @@ -688,6 +710,7 @@ public void backToStepThree() { UIInput newClientNicknameInputField; UIInput newClientUrlInputField; + UIInput newClientCustomHeaderInputField; UIInput hiddenInputField; /*UISelectOne*/ UIInput metadataFormatMenu; UIInput remoteArchiveStyleMenu; @@ -695,6 +718,7 @@ public void backToStepThree() { private String newNickname = ""; private String newHarvestingUrl = ""; + private String customHeader = null; private boolean initialSettingsValidated = false; private String newOaiSet = ""; private String newMetadataFormat = ""; @@ -718,6 +742,7 @@ public void initNewClient(ActionEvent ae) { //this.selectedClient = new HarvestingClient(); this.newNickname = ""; this.newHarvestingUrl = ""; + this.customHeader = null; this.initialSettingsValidated = false; this.newOaiSet = ""; this.newMetadataFormat = ""; @@ -762,6 +787,14 @@ public void setNewHarvestingUrl(String newHarvestingUrl) { this.newHarvestingUrl = newHarvestingUrl; } + public String getNewCustomHeader() { + return customHeader; + } + + public void setNewCustomHeader(String customHeader) { + this.customHeader = customHeader; + } + public int getHarvestTypeRadio() { return this.harvestTypeRadio; } @@ -871,6 +904,14 @@ public void setNewClientUrlInputField(UIInput newClientInputField) { this.newClientUrlInputField = newClientInputField; } + public UIInput getNewClientCustomHeaderInputField() { + return newClientCustomHeaderInputField; + } + + public void setNewClientCustomHeaderInputField(UIInput newClientInputField) { + this.newClientCustomHeaderInputField = newClientInputField; + } + public UIInput getHiddenInputField() { return hiddenInputField; } diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index aa40423000d..bf36f265743 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -107,6 +107,8 @@ public class SettingsWrapper implements java.io.Serializable { private Boolean rsyncOnly = null; + private Boolean webloaderUpload = null; + private String metricsUrl = null; private Boolean dataFilePIDSequentialDependent = null; @@ -338,6 +340,13 @@ public String getGlobusAppUrl() { } + public boolean isWebloaderUpload() { + if (webloaderUpload == null) { + webloaderUpload = systemConfig.isWebloaderUpload(); + } + return webloaderUpload; + } + public boolean isRsyncOnly() { if (rsyncOnly == null) { String downloadMethods = getValueForKey(SettingsServiceBean.Key.DownloadMethods); diff --git a/src/main/java/edu/harvard/iq/dataverse/Template.java b/src/main/java/edu/harvard/iq/dataverse/Template.java index 61f0a78656f..7798367b4d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Template.java +++ b/src/main/java/edu/harvard/iq/dataverse/Template.java @@ -9,6 +9,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.TreeMap; import java.util.stream.Collectors; import javax.json.Json; @@ -139,9 +140,9 @@ public List getDatasetFields() { private Map instructionsMap = null; @Transient - private Map> metadataBlocksForView = new HashMap<>(); + private TreeMap> metadataBlocksForView = new TreeMap<>(); @Transient - private Map> metadataBlocksForEdit = new HashMap<>(); + private TreeMap> metadataBlocksForEdit = new TreeMap<>(); @Transient private boolean isDefaultForDataverse; @@ -166,19 +167,19 @@ public void setDataversesHasAsDefault(List dataversesHasAsDefault) { } - public Map> getMetadataBlocksForView() { + public TreeMap> getMetadataBlocksForView() { return metadataBlocksForView; } - public void setMetadataBlocksForView(Map> metadataBlocksForView) { + public void setMetadataBlocksForView(TreeMap> metadataBlocksForView) { this.metadataBlocksForView = metadataBlocksForView; } - public Map> getMetadataBlocksForEdit() { + public TreeMap> getMetadataBlocksForEdit() { return metadataBlocksForEdit; } - public void setMetadataBlocksForEdit(Map> metadataBlocksForEdit) { + public void setMetadataBlocksForEdit(TreeMap> metadataBlocksForEdit) { this.metadataBlocksForEdit = metadataBlocksForEdit; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index e919ecf786d..51f6f05f326 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -505,6 +505,7 @@ protected Dataset findDatasetOrDie(String id) throws WrappedResponse { } protected DataFile findDataFileOrDie(String id) throws WrappedResponse { + DataFile datafile; if (id.equals(PERSISTENT_ID_KEY)) { String persistentId = getRequestParameter(PERSISTENT_ID_KEY.substring(1)); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 75aa57a0d2b..3bd0a19672b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -187,9 +187,6 @@ public class Access extends AbstractApiBean { @Inject MakeDataCountLoggingServiceBean mdcLogService; - - private static final String API_KEY_HEADER = "X-Dataverse-key"; - //@EJB // TODO: @@ -197,26 +194,23 @@ public class Access extends AbstractApiBean { @Path("datafile/bundle/{fileId}") @GET @Produces({"application/zip"}) - public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId, @QueryParam("fileMetadataId") Long fileMetadataId,@QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId, @QueryParam("fileMetadataId") Long fileMetadataId,@QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { GuestbookResponse gbr = null; DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - // This will throw a ForbiddenException if access isn't authorized: - checkAuthorization(df, apiToken); + checkAuthorization(df); if (gbrecs != true && df.isReleased()){ // Write Guestbook record if not done previously and file is released - User apiTokenUser = findAPITokenUser(apiToken); + //This calls findUserOrDie which will retrieve the key param or api token header, or the workflow token header. + User apiTokenUser = findAPITokenUser(); gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser); guestbookResponseService.save(gbr); - MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df); + MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df); mdcLogService.logEntry(entry); } @@ -278,7 +272,7 @@ private DataFile findDataFileOrDieWrapper(String fileId){ @Path("datafile/{fileId:.+}") @GET @Produces({"application/xml"}) - public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { // check first if there's a trailing slash, and chop it: while (fileId.lastIndexOf('/') == fileId.length() - 1) { @@ -303,20 +297,16 @@ public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs throw new NotFoundException(errorMessage); // (nobody should ever be using this API on a harvested DataFile)! } - - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - + + // This will throw a ForbiddenException if access isn't authorized: + checkAuthorization(df); + if (gbrecs != true && df.isReleased()){ // Write Guestbook record if not done previously and file is released - User apiTokenUser = findAPITokenUser(apiToken); + User apiTokenUser = findAPITokenUser(); gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser); } - - // This will throw a ForbiddenException if access isn't authorized: - checkAuthorization(df, apiToken); - + DownloadInfo dInfo = new DownloadInfo(df); logger.fine("checking if thumbnails are supported on this file."); @@ -532,11 +522,10 @@ public String tabularDatafileMetadataDDI(@PathParam("fileId") String fileId, @Q @Path("datafile/{fileId}/auxiliary") @GET public Response listDatafileMetadataAux(@PathParam("fileId") String fileId, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { - return listAuxiliaryFiles(fileId, null, apiToken, uriInfo, headers, response); + return listAuxiliaryFiles(fileId, null, uriInfo, headers, response); } /* * GET method for retrieving a list auxiliary files associated with @@ -547,26 +536,21 @@ public Response listDatafileMetadataAux(@PathParam("fileId") String fileId, @GET public Response listDatafileMetadataAuxByOrigin(@PathParam("fileId") String fileId, @PathParam("origin") String origin, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { - return listAuxiliaryFiles(fileId, origin, apiToken, uriInfo, headers, response); + return listAuxiliaryFiles(fileId, origin, uriInfo, headers, response); } - private Response listAuxiliaryFiles(String fileId, String origin, String apiToken, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) { + private Response listAuxiliaryFiles(String fileId, String origin, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) { DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - List auxFileList = auxiliaryFileService.findAuxiliaryFiles(df, origin); if (auxFileList == null || auxFileList.isEmpty()) { throw new NotFoundException("No Auxiliary files exist for datafile " + fileId + (origin==null ? "": " and the specified origin")); } - boolean isAccessAllowed = isAccessAuthorized(df, apiToken); + boolean isAccessAllowed = isAccessAuthorized(df); JsonArrayBuilder jab = Json.createArrayBuilder(); auxFileList.forEach(auxFile -> { if (isAccessAllowed || auxFile.getIsPublic()) { @@ -594,17 +578,12 @@ private Response listAuxiliaryFiles(String fileId, String origin, String apiToke public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId, @PathParam("formatTag") String formatTag, @PathParam("formatVersion") String formatVersion, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - DownloadInfo dInfo = new DownloadInfo(df); boolean publiclyAvailable = false; @@ -654,7 +633,7 @@ public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId // as defined for the DataFile itself), and will throw a ForbiddenException // if access is denied: if (!publiclyAvailable) { - checkAuthorization(df, apiToken); + checkAuthorization(df); } return downloadInstance; @@ -670,16 +649,16 @@ public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId @POST @Consumes("text/plain") @Produces({ "application/zip" }) - public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } @Path("dataset/{id}") @GET @Produces({"application/zip"}) - public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { try { User user = findUserOrDie(); DataverseRequest req = createDataverseRequest(user); @@ -693,7 +672,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist // We don't want downloads from Draft versions to be counted, // so we are setting the gbrecs (aka "do not write guestbook response") // variable accordingly: - return downloadDatafiles(fileIds, true, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, true, uriInfo, headers, response); } } @@ -714,7 +693,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist } String fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas()); - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -763,7 +742,7 @@ public Command handleLatestPublished() { if (dsv.isDraft()) { gbrecs = true; } - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -784,11 +763,11 @@ private static String getFileIdsAsCommaSeparated(List fileMetadata @Path("datafiles/{fileIds}") @GET @Produces({"application/zip"}) - public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } - private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, String apiTokenParam, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { final long zipDownloadSizeLimit = systemConfig.getZipDownloadLimit(); logger.fine("setting zip download size limit to " + zipDownloadSizeLimit + " bytes."); @@ -810,11 +789,7 @@ private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBRespon String customZipServiceUrl = settingsService.getValueForKey(SettingsServiceBean.Key.CustomZipDownloadServiceUrl); boolean useCustomZipService = customZipServiceUrl != null; - String apiToken = (apiTokenParam == null || apiTokenParam.equals("")) - ? headers.getHeaderString(API_KEY_HEADER) - : apiTokenParam; - - User apiTokenUser = findAPITokenUser(apiToken); //for use in adding gb records if necessary + User apiTokenUser = findAPITokenUser(); //for use in adding gb records if necessary Boolean getOrig = false; for (String key : uriInfo.getQueryParameters().keySet()) { @@ -827,7 +802,7 @@ private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBRespon if (useCustomZipService) { URI redirect_uri = null; try { - redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiToken, apiTokenUser, uriInfo, headers, donotwriteGBResponse, true); + redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiTokenUser, uriInfo, headers, donotwriteGBResponse, true); } catch (WebApplicationException wae) { throw wae; } @@ -859,7 +834,7 @@ public void write(OutputStream os) throws IOException, logger.fine("token: " + fileIdParams[i]); Long fileId = null; try { - fileId = new Long(fileIdParams[i]); + fileId = Long.parseLong(fileIdParams[i]); } catch (NumberFormatException nfe) { fileId = null; } @@ -867,7 +842,7 @@ public void write(OutputStream os) throws IOException, logger.fine("attempting to look up file id " + fileId); DataFile file = dataFileService.find(fileId); if (file != null) { - if (isAccessAuthorized(file, apiToken)) { + if (isAccessAuthorized(file)) { logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); //downloadInstance.addDataFile(file); @@ -1436,8 +1411,8 @@ public Response requestFileAccess(@PathParam("id") String fileToRequestAccessId, List args = Arrays.asList(wr.getLocalizedMessage()); return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.fileAccess.failure.noUser", args)); } - - if (isAccessAuthorized(dataFile, getRequestApiKey())) { + //Already have access + if (isAccessAuthorized(dataFile)) { return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.requestAccess.failure.invalidRequest")); } @@ -1708,15 +1683,15 @@ public Response rejectFileAccess(@PathParam("id") String fileToRequestAccessId, // checkAuthorization is a convenience method; it calls the boolean method // isAccessAuthorized(), the actual workhorse, tand throws a 403 exception if not. - private void checkAuthorization(DataFile df, String apiToken) throws WebApplicationException { + private void checkAuthorization(DataFile df) throws WebApplicationException { - if (!isAccessAuthorized(df, apiToken)) { + if (!isAccessAuthorized(df)) { throw new ForbiddenException(); } } - private boolean isAccessAuthorized(DataFile df, String apiToken) { + private boolean isAccessAuthorized(DataFile df) { // First, check if the file belongs to a released Dataset version: boolean published = false; @@ -1787,37 +1762,50 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { } } - if (!restricted && !embargoed) { - // And if they are not published, they can still be downloaded, if the user + + + //The one case where we don't need to check permissions + if (!restricted && !embargoed && published) { + // If they are not published, they can still be downloaded, if the user // has the permission to view unpublished versions! (this case will // be handled below) - if (published) { - return true; - } + return true; } + //For permissions check decide if we have a session user, or an API user User user = null; /** * Authentication/authorization: + */ + + User apiTokenUser = null; + + /* + * The logic looks for an apitoken authenticated user and uses it if it exists. + * If not, and a session user exists, we use that. If the apitoken method + * indicates a GuestUser, we will use that if there's no session. * - * note that the fragment below - that retrieves the session object - * and tries to find the user associated with the session - is really - * for logging/debugging purposes only; for practical purposes, it - * would be enough to just call "permissionService.on(df).has(Permission.DownloadFile)" - * and the method does just that, tries to authorize for the user in - * the current session (or guest user, if no session user is available): + * This is currently the only API call that supports sessions. If the rest of + * the API is opened up, the custom logic here wouldn't be needed. */ + + try { + logger.fine("calling apiTokenUser = findUserOrDie()..."); + apiTokenUser = findUserOrDie(); + } catch (WrappedResponse wr) { + logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); + } - if (session != null) { + if ((apiTokenUser instanceof GuestUser) && session != null) { if (session.getUser() != null) { - if (session.getUser().isAuthenticated()) { - user = session.getUser(); - } else { + user = session.getUser(); + apiTokenUser=null; + //Fine logging + if (!session.getUser().isAuthenticated()) { logger.fine("User associated with the session is not an authenticated user."); if (session.getUser() instanceof PrivateUrlUser) { logger.fine("User associated with the session is a PrivateUrlUser user."); - user = session.getUser(); } if (session.getUser() instanceof GuestUser) { logger.fine("User associated with the session is indeed a guest user."); @@ -1829,154 +1817,45 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { } else { logger.fine("Session is null."); } - - User apiTokenUser = null; - - if ((apiToken != null)&&(apiToken.length()!=64)) { - // We'll also try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling apiTokenUser = findUserOrDie()..."); - apiTokenUser = findUserOrDie(); - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - } - - if (apiTokenUser == null) { - logger.warning("API token-based auth: Unable to find a user with the API token provided."); - } + //If we don't have a user, nothing more to do. (Note session could have returned GuestUser) + if (user == null && apiTokenUser == null) { + logger.warning("Unable to find a user via session or with a token."); + return false; } - - // OK, let's revisit the case of non-restricted files, this time in - // an unpublished version: - // (if (published) was already addressed above) - - if (!restricted && !embargoed) { - // If the file is not published, they can still download the file, if the user - // has the permission to view unpublished versions: - - if ( user != null ) { - // used in JSF context - if (permissionService.requestOn(dvRequestService.getDataverseRequest(), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - // it's not unthinkable, that a null user (i.e., guest user) could be given - // the ViewUnpublished permission! - logger.log(Level.FINE, "Session-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", user.getIdentifier()); - return true; - } - } - if (apiTokenUser != null) { - // used in an API context - if (permissionService.requestOn( createDataverseRequest(apiTokenUser), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "Token-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", apiTokenUser.getIdentifier()); - return true; - } - } + /* + * Since published and not restricted/embargoed is handled above, the main split + * now is whether it is published or not. If it's published, the only case left + * is with restricted/embargoed. With unpublished, both the restricted/embargoed + * and not restricted/embargoed both get handled the same way. + */ - // last option - guest user in either contexts - // Guset user is impled by the code above. - if ( permissionService.requestOn(dvRequestService.getDataverseRequest(), df.getOwner()).has(Permission.ViewUnpublishedDataset) ) { - return true; - } - + DataverseRequest dvr = null; + if (apiTokenUser != null) { + dvr = createDataverseRequest(apiTokenUser); } else { - - // OK, this is a restricted and/or embargoed file. - - boolean hasAccessToRestrictedBySession = false; - boolean hasAccessToRestrictedByToken = false; - - if (permissionService.on(df).has(Permission.DownloadFile)) { - // Note: PermissionServiceBean.on(Datafile df) will obtain the - // User from the Session object, just like in the code fragment - // above. That's why it's not passed along as an argument. - hasAccessToRestrictedBySession = true; - } else if (apiTokenUser != null && permissionService.requestOn(createDataverseRequest(apiTokenUser), df).has(Permission.DownloadFile)) { - hasAccessToRestrictedByToken = true; - } - - if (hasAccessToRestrictedBySession || hasAccessToRestrictedByToken) { - if (published) { - if (hasAccessToRestrictedBySession) { - if (user != null) { - logger.log(Level.FINE, "Session-based auth: user {0} is granted access to the restricted, published datafile.", user.getIdentifier()); - } else { - logger.fine("Session-based auth: guest user is granted access to the restricted, published datafile."); - } - } else { - logger.log(Level.FINE, "Token-based auth: user {0} is granted access to the restricted, published datafile.", apiTokenUser.getIdentifier()); - } - return true; - } else { - // if the file is NOT published, we will let them download the - // file ONLY if they also have the permission to view - // unpublished versions: - // Note that the code below does not allow a case where it is the - // session user that has the permission on the file, and the API token - // user with the ViewUnpublished permission, or vice versa! - if (hasAccessToRestrictedBySession) { - if (permissionService.on(df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - if (user != null) { - logger.log(Level.FINE, "Session-based auth: user {0} is granted access to the restricted, unpublished datafile.", user.getIdentifier()); - } else { - logger.fine("Session-based auth: guest user is granted access to the restricted, unpublished datafile."); - } - return true; - } - } else { - if (apiTokenUser != null && permissionService.requestOn(createDataverseRequest(apiTokenUser), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "Token-based auth: user {0} is granted access to the restricted, unpublished datafile.", apiTokenUser.getIdentifier()); - return true; - } - } - } - } - } + // used in JSF context, user may be Guest + dvr = dvRequestService.getDataverseRequest(); + } + if (!published) { // and restricted or embargoed (implied by earlier processing) + // If the file is not published, they can still download the file, if the user + // has the permission to view unpublished versions: - - if ((apiToken != null)) { - // Will try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling user = findUserOrDie()..."); - user = findUserOrDie(); - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); + // This line handles all three authenticated session user, token user, and guest cases. + if (permissionService.requestOn(dvr, df.getOwner()).has(Permission.ViewUnpublishedDataset)) { + // it's not unthinkable, that a GuestUser could be given + // the ViewUnpublished permission! + logger.log(Level.FINE, + "Session-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", + dvr.getUser().getIdentifier()); + return true; } - - if (user == null) { - logger.warning("API token-based auth: Unable to find a user with the API token provided."); - return false; - } - - - //Doesn't this ~duplicate logic above - if so, if there's a way to get here, I think it still works for embargoed files (you only get access if you have download permissions, and, if not published, also view unpublished) - if (permissionService.requestOn(createDataverseRequest(user), df).has(Permission.DownloadFile)) { - if (published) { - logger.log(Level.FINE, "API token-based auth: User {0} has rights to access the datafile.", user.getIdentifier()); - //Same case as line 1809 (and part of 1708 though when published you don't need the DownloadFile permission) - return true; - } else { - // if the file is NOT published, we will let them download the - // file ONLY if they also have the permission to view - // unpublished versions: - if (permissionService.requestOn(createDataverseRequest(user), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "API token-based auth: User {0} has rights to access the (unpublished) datafile.", user.getIdentifier()); - //Same case as line 1843? - return true; - } else { - logger.log(Level.FINE, "API token-based auth: User {0} is not authorized to access the (unpublished) datafile.", user.getIdentifier()); - } - } - } else { - logger.log(Level.FINE, "API token-based auth: User {0} is not authorized to access the datafile.", user.getIdentifier()); + } else { // published and restricted and/or embargoed + // This line also handles all three authenticated session user, token user, and guest cases. + if (permissionService.requestOn(dvr, df).has(Permission.DownloadFile)) { + return true; } - - return false; - } - + } if (user != null) { logger.log(Level.FINE, "Session-based auth: user {0} has NO access rights on the requested datafile.", user.getIdentifier()); } @@ -1984,37 +1863,35 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { if (apiTokenUser != null) { logger.log(Level.FINE, "Token-based auth: user {0} has NO access rights on the requested datafile.", apiTokenUser.getIdentifier()); } - - if (user == null && apiTokenUser == null) { - logger.fine("Unauthenticated access: No guest access to the datafile."); - } - return false; } - private User findAPITokenUser(String apiToken) { + private User findAPITokenUser() { User apiTokenUser = null; - - if ((apiToken != null) && (apiToken.length() != 64)) { - // We'll also try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling apiTokenUser = findUserOrDie()..."); - apiTokenUser = findUserOrDie(); - return apiTokenUser; - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - return null; + try { + logger.fine("calling apiTokenUser = findUserOrDie()..."); + apiTokenUser = findUserOrDie(); + /* + * The idea here is to not let a guest user returned from findUserOrDie (which + * happens when there is no key/token, and which we want if there's no session) + * from overriding an authenticated session user. + */ + if(apiTokenUser instanceof GuestUser) { + if(session!=null && session.getUser()!=null) { + //The apiTokenUser, if set, will override the sessionUser in permissions calcs, so set it to null if we have a session user + apiTokenUser=null; + } } - + return apiTokenUser; + } catch (WrappedResponse wr) { + logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); + return null; } - return apiTokenUser; } - private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, String apiToken, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { + private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { String zipServiceKey = null; Timestamp timestamp = null; @@ -2031,7 +1908,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, for (int i = 0; i < fileIdParams.length; i++) { Long fileId = null; try { - fileId = new Long(fileIdParams[i]); + fileId = Long.parseLong(fileIdParams[i]); validIdCount++; } catch (NumberFormatException nfe) { fileId = null; @@ -2040,7 +1917,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, DataFile file = dataFileService.find(fileId); if (file != null) { validFileCount++; - if (isAccessAuthorized(file, apiToken)) { + if (isAccessAuthorized(file)) { logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); if (donotwriteGBResponse != true && file.isReleased()) { GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), file, session, apiTokenUser); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 7695a00833e..0bb6eebb80b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -67,6 +67,7 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand; @@ -114,11 +115,13 @@ import java.time.LocalDateTime; import java.util.*; import java.util.concurrent.*; +import java.util.function.Predicate; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.regex.Pattern; import java.util.stream.Collectors; import javax.ejb.EJB; @@ -155,6 +158,7 @@ public class Datasets extends AbstractApiBean { private static final Logger logger = Logger.getLogger(Datasets.class.getCanonicalName()); + private static final Pattern dataFilePattern = Pattern.compile("^[0-9a-f]{11}-[0-9a-f]{12}\\.?.*"); @Inject DataverseSession session; @@ -2452,8 +2456,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, fileService, permissionSvc, commandEngine, - systemConfig, - licenseSvc); + systemConfig); //------------------- @@ -2503,6 +2506,76 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } // end: addFileToDataset + /** + * Clean storage of a Dataset + * + * @param idSupplied + * @return + */ + @GET + @Path("{id}/cleanStorage") + public Response cleanStorage(@PathParam("id") String idSupplied, @QueryParam("dryrun") Boolean dryrun) { + // get user and dataset + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + Dataset dataset; + try { + dataset = findDatasetOrDie(idSupplied); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + // check permissions + if (!permissionSvc.permissionsFor(createDataverseRequest(authUser), dataset).contains(Permission.EditDataset)) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "Access denied!"); + } + + boolean doDryRun = dryrun != null && dryrun.booleanValue(); + + // check if no legacy files are present + Set datasetFilenames = getDatasetFilenames(dataset); + if (datasetFilenames.stream().anyMatch(x -> !dataFilePattern.matcher(x).matches())) { + logger.log(Level.WARNING, "Dataset contains legacy files not matching the naming pattern!"); + } + + Predicate filter = getToDeleteFilesFilter(datasetFilenames); + List deleted; + try { + StorageIO datasetIO = DataAccess.getStorageIO(dataset); + deleted = datasetIO.cleanUp(filter, doDryRun); + } catch (IOException ex) { + logger.log(Level.SEVERE, null, ex); + return error(Response.Status.INTERNAL_SERVER_ERROR, "IOException! Serious Error! See administrator!"); + } + + return ok("Found: " + datasetFilenames.stream().collect(Collectors.joining(", ")) + "\n" + "Deleted: " + deleted.stream().collect(Collectors.joining(", "))); + + } + + private static Set getDatasetFilenames(Dataset dataset) { + Set files = new HashSet<>(); + for (DataFile dataFile: dataset.getFiles()) { + String storageIdentifier = dataFile.getStorageIdentifier(); + String location = storageIdentifier.substring(storageIdentifier.indexOf("://") + 3); + String[] locationParts = location.split(":");//separate bucket, swift container, etc. from fileName + files.add(locationParts[locationParts.length-1]); + } + return files; + } + + public static Predicate getToDeleteFilesFilter(Set datasetFilenames) { + return f -> { + return dataFilePattern.matcher(f).matches() && datasetFilenames.stream().noneMatch(x -> f.startsWith(x)); + }; + } + private void msg(String m) { //System.out.println(m); logger.fine(m); @@ -3388,14 +3461,84 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, this.fileService, this.permissionSvc, this.commandEngine, - this.systemConfig, - this.licenseSvc + this.systemConfig ); return addFileHelper.addFiles(jsonData, dataset, authUser); } + /** + * Replace multiple Files to an existing Dataset + * + * @param idSupplied + * @param jsonData + * @return + */ + @POST + @Path("{id}/replaceFiles") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response replaceFilesInDataset(@PathParam("id") String idSupplied, + @FormDataParam("jsonData") String jsonData) { + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(idSupplied); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + dataset.getLocks().forEach(dl -> { + logger.info(dl.toString()); + }); + + //------------------------------------ + // (2a) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + DataverseRequest dvRequest = createDataverseRequest(authUser); + + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dvRequest, + this.ingestService, + this.datasetService, + this.fileService, + this.permissionSvc, + this.commandEngine, + this.systemConfig + ); + + return addFileHelper.replaceFiles(jsonData, dataset, authUser); + + } + /** * API to find curation assignments and statuses * diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index af0f6be6d32..965d56d355e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -43,10 +43,10 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import java.io.InputStream; -import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -55,10 +55,8 @@ import javax.ejb.EJB; import javax.inject.Inject; import javax.json.Json; -import javax.json.JsonReader; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.Consumes; -import javax.ws.rs.DELETE; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.PUT; @@ -235,7 +233,6 @@ public Response replaceFileInDataset( if (null == contentDispositionHeader) { if (optionalFileParams.hasStorageIdentifier()) { newStorageIdentifier = optionalFileParams.getStorageIdentifier(); - // ToDo - check that storageIdentifier is valid if (optionalFileParams.hasFileName()) { newFilename = optionalFileParams.getFileName(); if (optionalFileParams.hasMimetype()) { @@ -261,39 +258,34 @@ public Response replaceFileInDataset( this.fileService, this.permissionSvc, this.commandEngine, - this.systemConfig, - this.licenseSvc); + this.systemConfig); // (5) Run "runReplaceFileByDatasetId" long fileToReplaceId = 0; try { DataFile dataFile = findDataFileOrDie(fileIdOrPersistentId); fileToReplaceId = dataFile.getId(); - - if (dataFile.isFilePackage()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile")); + + if (dataFile.isFilePackage()) { + return error(Response.Status.SERVICE_UNAVAILABLE, + BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile")); + } + + if (forceReplace) { + addFileHelper.runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, testFileInputStream, dataFile.getOwner(), optionalFileParams); + } else { + addFileHelper.runReplaceFile(fileToReplaceId, newFilename, newFileContentType, newStorageIdentifier, + testFileInputStream, dataFile.getOwner(), optionalFileParams); } } catch (WrappedResponse ex) { - String error = BundleUtil.getStringFromBundle("file.addreplace.error.existing_file_to_replace_not_found_by_id", Arrays.asList(fileIdOrPersistentId)); - // TODO: Some day, return ex.getResponse() instead. Also run FilesIT and updated expected status code and message. + String error = BundleUtil.getStringFromBundle( + "file.addreplace.error.existing_file_to_replace_not_found_by_id", + Arrays.asList(fileIdOrPersistentId)); + // TODO: Some day, return ex.getResponse() instead. Also run FilesIT and updated + // expected status code and message. return error(BAD_REQUEST, error); } - if (forceReplace){ - addFileHelper.runForceReplaceFile(fileToReplaceId, - newFilename, - newFileContentType, - newStorageIdentifier, - testFileInputStream, - optionalFileParams); - }else{ - addFileHelper.runReplaceFile(fileToReplaceId, - newFilename, - newFileContentType, - newStorageIdentifier, - testFileInputStream, - optionalFileParams); - } - msg("we're back....."); if (addFileHelper.hasError()){ msg("yes, has error"); @@ -411,8 +403,7 @@ public Response updateFileMetadata(@FormDataParam("jsonData") String jsonData, return error(Response.Status.BAD_REQUEST, "An error has occurred attempting to update the requested DataFile. It is not part of the current version of the Dataset."); } - JsonReader jsonReader = Json.createReader(new StringReader(jsonData)); - javax.json.JsonObject jsonObject = jsonReader.readObject(); + javax.json.JsonObject jsonObject = JsonUtil.getJsonObject(jsonData); String incomingLabel = jsonObject.getString("label", null); String incomingDirectoryLabel = jsonObject.getString("directoryLabel", null); String existingLabel = df.getFileMetadata().getLabel(); @@ -452,6 +443,76 @@ public Response updateFileMetadata(@FormDataParam("jsonData") String jsonData, .build(); } + @GET + @Path("{id}/draft") + public Response getFileDataDraft(@PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WrappedResponse, Exception { + return getFileDataResponse(fileIdOrPersistentId, uriInfo, headers, response, true); + } + + @GET + @Path("{id}") + public Response getFileData(@PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WrappedResponse, Exception { + return getFileDataResponse(fileIdOrPersistentId, uriInfo, headers, response, false); + } + + private Response getFileDataResponse(String fileIdOrPersistentId, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response, boolean draft ){ + + DataverseRequest req; + try { + req = createDataverseRequest(findUserOrDie()); + } catch (Exception e) { + return error(BAD_REQUEST, "Error attempting to request information. Maybe a bad API token?"); + } + final DataFile df; + try { + df = execCommand(new GetDataFileCommand(req, findDataFileOrDie(fileIdOrPersistentId))); + } catch (Exception e) { + return error(BAD_REQUEST, "Error attempting get the requested data file."); + } + + FileMetadata fm; + + if (draft) { + try { + fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, df)); + } catch (WrappedResponse w) { + return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); + } + if (null == fm) { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); + } + } else { + //first get latest published + //if not available get draft if permissible + + try { + fm = df.getLatestPublishedFileMetadata(); + + } catch (UnsupportedOperationException e) { + try { + fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, df)); + } catch (WrappedResponse w) { + return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); + } + if (null == fm) { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); + } + } + + } + + if (fm.getDatasetVersion().isReleased()) { + MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountLoggingServiceBean.MakeDataCountEntry(uriInfo, headers, dvRequestService, df); + mdcLogService.logEntry(entry); + } + + return Response.ok(Json.createObjectBuilder() + .add("status", STATUS_OK) + .add("data", json(fm)).build()) + .type(MediaType.APPLICATION_JSON) + .build(); + } + @GET @Path("{id}/metadata") public Response getFileMetadata(@PathParam("id") String fileIdOrPersistentId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, Boolean getDraft) throws WrappedResponse, Exception { @@ -477,7 +538,7 @@ public Response getFileMetadata(@PathParam("id") String fileIdOrPersistentId, @P return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset." ); } if(null == fm) { - return error(BAD_REQUEST, "No draft availabile for this dataset"); + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); } } else { fm = df.getLatestPublishedFileMetadata(); @@ -493,6 +554,7 @@ public Response getFileMetadata(@PathParam("id") String fileIdOrPersistentId, @P .type(MediaType.TEXT_PLAIN) //Our plain text string is already json .build(); } + @GET @Path("{id}/metadata/draft") public Response getFileMetadataDraft(@PathParam("id") String fileIdOrPersistentId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, Boolean getDraft) throws WrappedResponse, Exception { @@ -625,6 +687,27 @@ public Response redetectDatafile(@PathParam("id") String id, @QueryParam("dryRun } } + @Path("{id}/extractNcml") + @POST + public Response extractNcml(@PathParam("id") String id) { + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + if (!au.isSuperuser()) { + // We can always make a command in the future if there's a need + // for non-superusers to call this API. + return error(Response.Status.FORBIDDEN, "This API call can be used by superusers only"); + } + DataFile dataFileIn = findDataFileOrDie(id); + java.nio.file.Path tempLocationPath = null; + boolean successOrFail = ingestService.extractMetadataNcml(dataFileIn, tempLocationPath); + NullSafeJsonBuilder result = NullSafeJsonBuilder.jsonObjectBuilder() + .add("result", successOrFail); + return ok(result); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + /** * Attempting to run metadata export, for all the formats for which we have * metadata Exporters. diff --git a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java index 42534514b68..9aea3adab8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java @@ -15,6 +15,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import javax.json.JsonObjectBuilder; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import java.io.IOException; @@ -88,7 +89,7 @@ public Response harvestingClients(@QueryParam("key") String apiKey) throws IOExc } if (retrievedHarvestingClient != null) { - hcArr.add(harvestingConfigAsJson(retrievedHarvestingClient)); + hcArr.add(JsonPrinter.json(retrievedHarvestingClient)); } } @@ -136,7 +137,7 @@ public Response harvestingClient(@PathParam("nickName") String nickName, @QueryP } try { - return ok(harvestingConfigAsJson(retrievedHarvestingClient)); + return ok(JsonPrinter.json(retrievedHarvestingClient)); } catch (Exception ex) { logger.warning("Unknown exception caught while trying to format harvesting client config as json: "+ex.getMessage()); return error( Response.Status.BAD_REQUEST, @@ -216,7 +217,7 @@ public Response createHarvestingClient(String jsonBody, @PathParam("nickName") S DataverseRequest req = createDataverseRequest(findUserOrDie()); harvestingClient = execCommand(new CreateHarvestingClientCommand(req, harvestingClient)); - return created( "/harvest/clients/" + nickName, harvestingConfigAsJson(harvestingClient)); + return created( "/harvest/clients/" + nickName, JsonPrinter.json(harvestingClient)); } catch (JsonParseException ex) { return error( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); @@ -268,6 +269,8 @@ public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") S } // Go through the supported editable fields and update the client accordingly: + // TODO: We may want to reevaluate whether we really want/need *all* + // of these fields to be editable. if (newHarvestingClient.getHarvestingUrl() != null) { harvestingClient.setHarvestingUrl(newHarvestingClient.getHarvestingUrl()); @@ -287,10 +290,13 @@ public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") S if (newHarvestingClient.getHarvestStyle() != null) { harvestingClient.setHarvestStyle(newHarvestingClient.getHarvestStyle()); } + if (newHarvestingClient.getCustomHttpHeaders() != null) { + harvestingClient.setCustomHttpHeaders(newHarvestingClient.getCustomHttpHeaders()); + } // TODO: Make schedule configurable via this API too. harvestingClient = execCommand( new UpdateHarvestingClientCommand(req, harvestingClient)); - return ok( "/harvest/clients/" + nickName, harvestingConfigAsJson(harvestingClient)); + return ok( "/harvest/clients/" + nickName, JsonPrinter.json(harvestingClient)); // harvestingConfigAsJson(harvestingClient)); } catch (JsonParseException ex) { return error( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); @@ -373,13 +379,13 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname, } if (authenticatedUser == null || !authenticatedUser.isSuperuser()) { - return error(Response.Status.FORBIDDEN, "Only the Dataverse Admin user can run harvesting jobs"); + return error(Response.Status.FORBIDDEN, "Only admin users can run harvesting jobs"); } HarvestingClient harvestingClient = harvestingClientService.findByNickname(clientNickname); if (harvestingClient == null) { - return error(Response.Status.NOT_FOUND, "No such dataverse: "+clientNickname); + return error(Response.Status.NOT_FOUND, "No such client: "+clientNickname); } DataverseRequest dataverseRequest = createDataverseRequest(authenticatedUser); @@ -390,59 +396,4 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname, } return this.accepted(); } - - // This GET shows the status of the harvesting run in progress for this - // client, if present: - // @GET - // @Path("{nickName}/run") - // TODO: - - // This DELETE kills the harvesting run in progress for this client, - // if present: - // @DELETE - // @Path("{nickName}/run") - // TODO: - - - - - - /* Auxiliary, helper methods: */ - - /* - @Deprecated - public static JsonArrayBuilder harvestingConfigsAsJsonArray(List harvestingDataverses) { - JsonArrayBuilder hdArr = Json.createArrayBuilder(); - - for (Dataverse hd : harvestingDataverses) { - hdArr.add(harvestingConfigAsJson(hd.getHarvestingClientConfig())); - } - return hdArr; - }*/ - - public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvestingConfig) { - if (harvestingConfig == null) { - return null; - } - - - return jsonObjectBuilder().add("nickName", harvestingConfig.getName()). - add("dataverseAlias", harvestingConfig.getDataverse().getAlias()). - add("type", harvestingConfig.getHarvestType()). - add("style", harvestingConfig.getHarvestStyle()). - add("harvestUrl", harvestingConfig.getHarvestingUrl()). - add("archiveUrl", harvestingConfig.getArchiveUrl()). - add("archiveDescription",harvestingConfig.getArchiveDescription()). - add("metadataFormat", harvestingConfig.getMetadataPrefix()). - add("set", harvestingConfig.getHarvestingSet() == null ? "N/A" : harvestingConfig.getHarvestingSet()). - add("schedule", harvestingConfig.isScheduled() ? harvestingConfig.getScheduleDescription() : "none"). - add("status", harvestingConfig.isHarvestingNow() ? "inProgress" : "inActive"). - add("lastHarvest", harvestingConfig.getLastHarvestTime() == null ? "N/A" : harvestingConfig.getLastHarvestTime().toString()). - add("lastResult", harvestingConfig.getLastResult()). - add("lastSuccessful", harvestingConfig.getLastSuccessfulHarvestTime() == null ? "N/A" : harvestingConfig.getLastSuccessfulHarvestTime().toString()). - add("lastNonEmpty", harvestingConfig.getLastNonEmptyHarvestTime() == null ? "N/A" : harvestingConfig.getLastNonEmptyHarvestTime().toString()). - add("lastDatasetsHarvested", harvestingConfig.getLastHarvestedDatasetCount() == null ? "N/A" : harvestingConfig.getLastHarvestedDatasetCount().toString()). - add("lastDatasetsDeleted", harvestingConfig.getLastDeletedDatasetCount() == null ? "N/A" : harvestingConfig.getLastDeletedDatasetCount().toString()). - add("lastDatasetsFailed", harvestingConfig.getLastFailedDatasetCount() == null ? "N/A" : harvestingConfig.getLastFailedDatasetCount().toString()); - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Info.java b/src/main/java/edu/harvard/iq/dataverse/api/Info.java index 4fe5cba5b9f..fd7824c15cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Info.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Info.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; import javax.ejb.EJB; @@ -44,7 +45,7 @@ public Response getInfo() { @GET @Path("server") public Response getServer() { - return response( req -> ok(systemConfig.getDataverseServer())); + return response( req -> ok(JvmSettings.FQDN.lookup())); } @GET diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java index b532fbd4154..42caa95b9f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java @@ -63,7 +63,9 @@ public Response getExternalToolsForFile(@PathParam("id") String idSupplied, @Que ApiToken apiToken = externalToolService.getApiToken(getRequestApiKey()); ExternalToolHandler externalToolHandler = new ExternalToolHandler(tool, dataFile, apiToken, dataFile.getFileMetadata(), null); JsonObjectBuilder toolToJson = externalToolService.getToolAsJsonWithQueryParameters(externalToolHandler); - tools.add(toolToJson); + if (externalToolService.meetsRequirements(tool, dataFile)) { + tools.add(toolToJson); + } } return ok(tools); } catch (WrappedResponse wr) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java index ce5f9415fcc..1e506c6a0b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.api.datadeposit; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.File; import java.util.Arrays; @@ -86,37 +87,32 @@ public boolean storeAndCheckBinary() { @Override public String getTempDirectory() { - String tmpFileDir = System.getProperty(SystemConfig.FILES_DIRECTORY); - if (tmpFileDir != null) { - String swordDirString = tmpFileDir + File.separator + "sword"; - File swordDirFile = new File(swordDirString); - /** - * @todo Do we really need this check? It seems like we do because - * if you create a dataset via the native API and then later try to - * upload a file via SWORD, the directory defined by - * dataverse.files.directory may not exist and we get errors deep in - * the SWORD library code. Could maybe use a try catch in the doPost - * method of our SWORDv2MediaResourceServlet. - */ - if (swordDirFile.exists()) { + // will throw a runtime exception when not found + String tmpFileDir = JvmSettings.FILES_DIRECTORY.lookup(); + + String swordDirString = tmpFileDir + File.separator + "sword"; + File swordDirFile = new File(swordDirString); + /** + * @todo Do we really need this check? It seems like we do because + * if you create a dataset via the native API and then later try to + * upload a file via SWORD, the directory defined by + * dataverse.files.directory may not exist and we get errors deep in + * the SWORD library code. Could maybe use a try catch in the doPost + * method of our SWORDv2MediaResourceServlet. + */ + if (swordDirFile.exists()) { + return swordDirString; + } else { + boolean mkdirSuccess = swordDirFile.mkdirs(); + if (mkdirSuccess) { + logger.info("Created directory " + swordDirString); return swordDirString; } else { - boolean mkdirSuccess = swordDirFile.mkdirs(); - if (mkdirSuccess) { - logger.info("Created directory " + swordDirString); - return swordDirString; - } else { - String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details."); - logger.severe(msgForSwordUsers + " Failed to create " + swordDirString); - // sadly, must throw RunTimeException to communicate with SWORD user - throw new RuntimeException(msgForSwordUsers); - } + String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details."); + logger.severe(msgForSwordUsers + " Failed to create " + swordDirString); + // sadly, must throw RunTimeException to communicate with SWORD user + throw new RuntimeException(msgForSwordUsers); } - } else { - String msgForSwordUsers = ("JVM option \"" + SystemConfig.FILES_DIRECTORY + "\" not defined. Check logs for details."); - logger.severe(msgForSwordUsers); - // sadly, must throw RunTimeException to communicate with SWORD user - throw new RuntimeException(msgForSwordUsers); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index a4e78b33a3c..d9433832309 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -1352,7 +1352,9 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th } else if (xmlr.getLocalName().equals("prodDate")) { citation.getFields().add(FieldDTO.createPrimitiveFieldDTO("productionDate", parseDate(xmlr, "prodDate"))); } else if (xmlr.getLocalName().equals("prodPlac")) { - citation.getFields().add(FieldDTO.createPrimitiveFieldDTO("productionPlace", parseDate(xmlr, "prodPlac"))); + List prodPlac = new ArrayList<>(); + prodPlac.add(parseText(xmlr, "prodPlac")); + citation.getFields().add(FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.productionPlace, prodPlac)); } else if (xmlr.getLocalName().equals("software")) { HashSet set = new HashSet<>(); addToSet(set,"softwareVersion", xmlr.getAttributeValue(null, "version")); diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java index d92ed78681b..9bf53116efa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java @@ -940,4 +940,14 @@ public List getWorkflowCommentsByAuthenticatedUser(Authenticat return query.getResultList(); } + public ApiToken getValidApiTokenForUser(AuthenticatedUser user) { + ApiToken apiToken = null; + apiToken = findApiTokenByUser(user); + if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { + logger.fine("Created apiToken for user: " + user.getIdentifier()); + apiToken = generateApiTokenForUser(user); + } + return apiToken; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index 3ae8ce9b883..a5ba9a00bd2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -57,8 +57,10 @@ import javax.inject.Named; import javax.servlet.http.HttpServletRequest; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; +import java.io.File; import java.io.FileReader; import java.io.IOException; import java.sql.Timestamp; @@ -79,7 +81,7 @@ @Dependent public class FileRecordJobListener implements ItemReadListener, StepListener, JobListener { - public static final String SEP = System.getProperty("file.separator"); + public static final String SEP = File.separator; private static final UserNotification.Type notifyType = UserNotification.Type.FILESYSTEMIMPORT; @@ -433,8 +435,10 @@ private void loadChecksumManifest() { manifest = checksumManifest; getJobLogger().log(Level.INFO, "Checksum manifest = " + manifest + " (FileSystemImportJob.xml property)"); } - // construct full path - String manifestAbsolutePath = System.getProperty("dataverse.files.directory") + + // Construct full path - retrieve base dir via MPCONFIG. + // (Has sane default /tmp/dataverse from META-INF/microprofile-config.properties) + String manifestAbsolutePath = JvmSettings.FILES_DIRECTORY.lookup() + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java index b3d3a7107a6..a4f8ffd2378 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java @@ -24,6 +24,7 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.filefilter.NotFileFilter; import org.apache.commons.io.filefilter.WildcardFileFilter; @@ -54,7 +55,7 @@ @Dependent public class FileRecordReader extends AbstractItemReader { - public static final String SEP = System.getProperty("file.separator"); + public static final String SEP = File.separator; @Inject JobContext jobContext; @@ -96,9 +97,11 @@ public void init() { @Override public void open(Serializable checkpoint) throws Exception { - - directory = new File(System.getProperty("dataverse.files.directory") - + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder); + + // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties + String baseDir = JvmSettings.FILES_DIRECTORY.lookup(); + + directory = new File(baseDir + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder); // TODO: // The above goes directly to the filesystem directory configured by the // old "dataverse.files.directory" JVM option (otherwise used for temp diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java index 4a778dc7abb..a2f76ca953d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java @@ -154,8 +154,8 @@ public static Logger getJobLogger(String jobId) { try { Logger jobLogger = Logger.getLogger("job-"+jobId); FileHandler fh; - String logDir = System.getProperty("com.sun.aas.instanceRoot") + System.getProperty("file.separator") - + "logs" + System.getProperty("file.separator") + "batch-jobs" + System.getProperty("file.separator"); + String logDir = System.getProperty("com.sun.aas.instanceRoot") + File.separator + + "logs" + File.separator + "batch-jobs" + File.separator; checkCreateLogDirectory( logDir ); fh = new FileHandler(logDir + "job-" + jobId + ".log"); logger.log(Level.INFO, "JOB LOG: " + logDir + "job-" + jobId + ".log"); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index d5f00b9868f..8ee3f0cf53c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -33,9 +33,11 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; +import java.util.function.Predicate; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; // Dataverse imports: import edu.harvard.iq.dataverse.DataFile; @@ -683,4 +685,56 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { } return true; } + + private List listAllFiles() throws IOException { + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This FileAccessIO object hasn't been properly initialized."); + } + + Path datasetDirectoryPath = Paths.get(dataset.getAuthorityForFileStorage(), dataset.getIdentifierForFileStorage()); + if (datasetDirectoryPath == null) { + throw new IOException("Could not determine the filesystem directory of the dataset."); + } + + DirectoryStream dirStream = Files.newDirectoryStream(Paths.get(this.getFilesRootDirectory(), datasetDirectoryPath.toString())); + + List res = new ArrayList<>(); + if (dirStream != null) { + for (Path filePath : dirStream) { + res.add(filePath.getFileName().toString()); + } + dirStream.close(); + } + + return res; + } + + private void deleteFile(String fileName) throws IOException { + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This FileAccessIO object hasn't been properly initialized."); + } + + Path datasetDirectoryPath = Paths.get(dataset.getAuthorityForFileStorage(), dataset.getIdentifierForFileStorage()); + if (datasetDirectoryPath == null) { + throw new IOException("Could not determine the filesystem directory of the dataset."); + } + + Path p = Paths.get(this.getFilesRootDirectory(), datasetDirectoryPath.toString(), fileName); + Files.delete(p); + } + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + if (dryRun) { + return toDelete; + } + for (String f : toDelete) { + this.deleteFile(f); + } + return toDelete; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index c9796d24b27..be6f9df0254 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -14,6 +14,7 @@ import java.nio.channels.WritableByteChannel; import java.nio.file.Path; import java.util.List; +import java.util.function.Predicate; import java.util.logging.Logger; /** @@ -159,5 +160,9 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); } - + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver."); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index c8e42349318..66c6a4cc2ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -24,6 +24,7 @@ import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; import java.util.List; +import java.util.function.Predicate; import java.util.logging.Logger; import org.apache.http.Header; @@ -630,5 +631,9 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { public static String getBaseStoreIdFor(String driverId) { return System.getProperty("dataverse.files." + driverId + ".base-store"); } - + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + return baseStore.cleanUp(filter, dryRun); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 3c9cef04980..f396b07d788 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -60,7 +60,10 @@ import java.util.HashMap; import java.util.List; import java.util.Random; +import java.util.function.Predicate; import java.util.logging.Logger; +import java.util.stream.Collectors; + import org.apache.commons.io.IOUtils; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.ConfigProvider; @@ -1306,5 +1309,75 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { return true; } + private List listAllFiles() throws IOException { + if (!this.canWrite()) { + open(); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This S3AccessIO object hasn't been properly initialized."); + } + String prefix = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; + + List ret = new ArrayList<>(); + ListObjectsRequest req = new ListObjectsRequest().withBucketName(bucketName).withPrefix(prefix); + ObjectListing storedFilesList = null; + try { + storedFilesList = s3.listObjects(req); + } catch (SdkClientException sce) { + throw new IOException ("S3 listObjects: failed to get a listing for " + prefix); + } + if (storedFilesList == null) { + return ret; + } + List storedFilesSummary = storedFilesList.getObjectSummaries(); + try { + while (storedFilesList.isTruncated()) { + logger.fine("S3 listObjects: going to next page of list"); + storedFilesList = s3.listNextBatchOfObjects(storedFilesList); + if (storedFilesList != null) { + storedFilesSummary.addAll(storedFilesList.getObjectSummaries()); + } + } + } catch (AmazonClientException ase) { + //logger.warning("Caught an AmazonServiceException in S3AccessIO.listObjects(): " + ase.getMessage()); + throw new IOException("S3AccessIO: Failed to get objects for listing."); + } -} + for (S3ObjectSummary item : storedFilesSummary) { + String fileName = item.getKey().substring(prefix.length()); + ret.add(fileName); + } + return ret; + } + + private void deleteFile(String fileName) throws IOException { + if (!this.canWrite()) { + open(); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This S3AccessIO object hasn't been properly initialized."); + } + String prefix = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; + + try { + DeleteObjectRequest dor = new DeleteObjectRequest(bucketName, prefix + fileName); + s3.deleteObject(dor); + } catch (AmazonClientException ase) { + logger.warning("S3AccessIO: Unable to delete object " + ase.getMessage()); + } + } + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + if (dryRun) { + return toDelete; + } + for (String f : toDelete) { + this.deleteFile(f); + } + return toDelete; + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 90e4a54dbe8..bfd5c5f0d8f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -39,6 +39,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -622,4 +623,6 @@ protected static boolean usesStandardNamePattern(String identifier) { return m.find(); } + public abstract List cleanUp(Predicate filter, boolean dryRun) throws IOException; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index b1725b040a3..6c84009de3e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -22,7 +22,10 @@ import java.util.Formatter; import java.util.List; import java.util.Properties; +import java.util.function.Predicate; import java.util.logging.Logger; +import java.util.stream.Collectors; + import javax.crypto.Mac; import javax.crypto.spec.SecretKeySpec; import org.javaswift.joss.client.factory.AccountFactory; @@ -864,13 +867,16 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException } } + private String getSwiftContainerName(Dataset dataset) { + String authorityNoSlashes = dataset.getAuthorityForFileStorage().replace("/", swiftFolderPathSeparator); + return dataset.getProtocolForFileStorage() + swiftFolderPathSeparator + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + + swiftFolderPathSeparator + dataset.getIdentifierForFileStorage(); + } + @Override public String getSwiftContainerName() { if (dvObject instanceof DataFile) { - String authorityNoSlashes = this.getDataFile().getOwner().getAuthorityForFileStorage().replace("/", swiftFolderPathSeparator); - return this.getDataFile().getOwner().getProtocolForFileStorage() + swiftFolderPathSeparator - + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + - swiftFolderPathSeparator + this.getDataFile().getOwner().getIdentifierForFileStorage(); + return getSwiftContainerName(this.getDataFile().getOwner()); } return null; } @@ -893,5 +899,59 @@ public static String calculateRFC2104HMAC(String data, String key) mac.init(signingKey); return toHexString(mac.doFinal(data.getBytes())); } - + + private List listAllFiles() throws IOException { + if (!this.canWrite()) { + open(DataAccessOption.WRITE_ACCESS); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This SwiftAccessIO object hasn't been properly initialized."); + } + String prefix = getSwiftContainerName(dataset) + swiftFolderPathSeparator; + + Collection items; + String lastItemName = null; + List ret = new ArrayList<>(); + + while ((items = this.swiftContainer.list(prefix, lastItemName, LIST_PAGE_LIMIT)) != null && items.size() > 0) { + for (StoredObject item : items) { + lastItemName = item.getName().substring(prefix.length()); + ret.add(lastItemName); + } + } + + return ret; + } + + private void deleteFile(String fileName) throws IOException { + if (!this.canWrite()) { + open(DataAccessOption.WRITE_ACCESS); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This SwiftAccessIO object hasn't been properly initialized."); + } + String prefix = getSwiftContainerName(dataset) + swiftFolderPathSeparator; + + StoredObject fileObject = this.swiftContainer.getObject(prefix + fileName); + + if (!fileObject.exists()) { + throw new FileNotFoundException("SwiftAccessIO/Direct Access: " + fileName + " does not exist"); + } + + fileObject.delete(); + } + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + if (dryRun) { + return toDelete; + } + for (String f : toDelete) { + this.deleteFile(f); + } + return toDelete; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java index 0b6b37af9f0..782f7f3a52d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java @@ -365,8 +365,8 @@ public void subsetFile(String infile, String outfile, List columns, Lon public void subsetFile(String infile, String outfile, List columns, Long numCases, String delimiter) { - try { - subsetFile(new FileInputStream(new File(infile)), outfile, columns, numCases, delimiter); + try (FileInputStream fis = new FileInputStream(new File(infile))){ + subsetFile(fis, outfile, columns, numCases, delimiter); } catch (IOException ex) { throw new RuntimeException("Could not open file "+infile); } @@ -375,33 +375,28 @@ public void subsetFile(String infile, String outfile, List columns, Lon public void subsetFile(InputStream in, String outfile, List columns, Long numCases, String delimiter) { - try { - Scanner scanner = new Scanner(in); - scanner.useDelimiter("\\n"); - - BufferedWriter out = new BufferedWriter(new FileWriter(outfile)); - for (long caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split(delimiter,-1); - List ln = new ArrayList(); - for (Integer i : columns) { - ln.add(line[i]); + try (Scanner scanner = new Scanner(in); BufferedWriter out = new BufferedWriter(new FileWriter(outfile))) { + scanner.useDelimiter("\\n"); + + for (long caseIndex = 0; caseIndex < numCases; caseIndex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split(delimiter,-1); + List ln = new ArrayList(); + for (Integer i : columns) { + ln.add(line[i]); + } + out.write(StringUtils.join(ln,"\t")+"\n"); + } else { + throw new RuntimeException("Tab file has fewer rows than the determined number of cases."); } - out.write(StringUtils.join(ln,"\t")+"\n"); - } else { - throw new RuntimeException("Tab file has fewer rows than the determined number of cases."); } - } - while (scanner.hasNext()) { - if (!"".equals(scanner.next()) ) { - throw new RuntimeException("Tab file has extra nonempty rows than the determined number of cases."); + while (scanner.hasNext()) { + if (!"".equals(scanner.next()) ) { + throw new RuntimeException("Tab file has extra nonempty rows than the determined number of cases."); + } } - } - - scanner.close(); - out.close(); } catch (FileNotFoundException e) { e.printStackTrace(); @@ -418,50 +413,48 @@ public void subsetFile(InputStream in, String outfile, List columns, Lo public static Double[] subsetDoubleVector(InputStream in, int column, int numCases) { Double[] retVector = new Double[numCases]; - Scanner scanner = new Scanner(in); - scanner.useDelimiter("\\n"); - - for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - - // Verified: new Double("nan") works correctly, - // resulting in Double.NaN; - // Double("[+-]Inf") doesn't work however; - // (the constructor appears to be expecting it - // to be spelled as "Infinity", "-Infinity", etc. - if ("inf".equalsIgnoreCase(line[column]) || "+inf".equalsIgnoreCase(line[column])) { - retVector[caseIndex] = java.lang.Double.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(line[column])) { - retVector[caseIndex] = java.lang.Double.NEGATIVE_INFINITY; - } else if (line[column] == null || line[column].equals("")) { - // missing value: - retVector[caseIndex] = null; - } else { - try { - retVector[caseIndex] = new Double(line[column]); - } catch (NumberFormatException ex) { - retVector[caseIndex] = null; // missing value + try (Scanner scanner = new Scanner(in)) { + scanner.useDelimiter("\\n"); + + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split("\t", -1); + + // Verified: new Double("nan") works correctly, + // resulting in Double.NaN; + // Double("[+-]Inf") doesn't work however; + // (the constructor appears to be expecting it + // to be spelled as "Infinity", "-Infinity", etc. + if ("inf".equalsIgnoreCase(line[column]) || "+inf".equalsIgnoreCase(line[column])) { + retVector[caseIndex] = java.lang.Double.POSITIVE_INFINITY; + } else if ("-inf".equalsIgnoreCase(line[column])) { + retVector[caseIndex] = java.lang.Double.NEGATIVE_INFINITY; + } else if (line[column] == null || line[column].equals("")) { + // missing value: + retVector[caseIndex] = null; + } else { + try { + retVector[caseIndex] = new Double(line[column]); + } catch (NumberFormatException ex) { + retVector[caseIndex] = null; // missing value + } } - } - } else { - scanner.close(); - throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); + } else { + throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); + } } - } - int tailIndex = numCases; - while (scanner.hasNext()) { - String nextLine = scanner.next(); - if (!"".equals(nextLine)) { - scanner.close(); - throw new RuntimeException("Column " + column + ": tab file has more nonempty rows than the stored number of cases (" + numCases + ")! current index: " + tailIndex + ", line: " + nextLine); + int tailIndex = numCases; + while (scanner.hasNext()) { + String nextLine = scanner.next(); + if (!"".equals(nextLine)) { + throw new RuntimeException("Column " + column + ": tab file has more nonempty rows than the stored number of cases (" + numCases + ")! current index: " + tailIndex + ", line: " + nextLine); + } + tailIndex++; } - tailIndex++; - } - scanner.close(); + } return retVector; } @@ -472,48 +465,46 @@ public static Double[] subsetDoubleVector(InputStream in, int column, int numCas */ public static Float[] subsetFloatVector(InputStream in, int column, int numCases) { Float[] retVector = new Float[numCases]; - Scanner scanner = new Scanner(in); - scanner.useDelimiter("\\n"); - - for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - // Verified: new Float("nan") works correctly, - // resulting in Float.NaN; - // Float("[+-]Inf") doesn't work however; - // (the constructor appears to be expecting it - // to be spelled as "Infinity", "-Infinity", etc. - if ("inf".equalsIgnoreCase(line[column]) || "+inf".equalsIgnoreCase(line[column])) { - retVector[caseIndex] = java.lang.Float.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(line[column])) { - retVector[caseIndex] = java.lang.Float.NEGATIVE_INFINITY; - } else if (line[column] == null || line[column].equals("")) { - // missing value: - retVector[caseIndex] = null; - } else { - try { - retVector[caseIndex] = new Float(line[column]); - } catch (NumberFormatException ex) { - retVector[caseIndex] = null; // missing value + try (Scanner scanner = new Scanner(in)) { + scanner.useDelimiter("\\n"); + + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split("\t", -1); + // Verified: new Float("nan") works correctly, + // resulting in Float.NaN; + // Float("[+-]Inf") doesn't work however; + // (the constructor appears to be expecting it + // to be spelled as "Infinity", "-Infinity", etc. + if ("inf".equalsIgnoreCase(line[column]) || "+inf".equalsIgnoreCase(line[column])) { + retVector[caseIndex] = java.lang.Float.POSITIVE_INFINITY; + } else if ("-inf".equalsIgnoreCase(line[column])) { + retVector[caseIndex] = java.lang.Float.NEGATIVE_INFINITY; + } else if (line[column] == null || line[column].equals("")) { + // missing value: + retVector[caseIndex] = null; + } else { + try { + retVector[caseIndex] = new Float(line[column]); + } catch (NumberFormatException ex) { + retVector[caseIndex] = null; // missing value + } } + } else { + throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); } - } else { - scanner.close(); - throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); } - } - int tailIndex = numCases; - while (scanner.hasNext()) { - String nextLine = scanner.next(); - if (!"".equals(nextLine)) { - scanner.close(); - throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + int tailIndex = numCases; + while (scanner.hasNext()) { + String nextLine = scanner.next(); + if (!"".equals(nextLine)) { + throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + } + tailIndex++; } - tailIndex++; - } - scanner.close(); + } return retVector; } @@ -524,34 +515,32 @@ public static Float[] subsetFloatVector(InputStream in, int column, int numCases */ public static Long[] subsetLongVector(InputStream in, int column, int numCases) { Long[] retVector = new Long[numCases]; - Scanner scanner = new Scanner(in); - scanner.useDelimiter("\\n"); + try (Scanner scanner = new Scanner(in)) { + scanner.useDelimiter("\\n"); - for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - try { - retVector[caseIndex] = new Long(line[column]); - } catch (NumberFormatException ex) { - retVector[caseIndex] = null; // assume missing value + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split("\t", -1); + try { + retVector[caseIndex] = new Long(line[column]); + } catch (NumberFormatException ex) { + retVector[caseIndex] = null; // assume missing value + } + } else { + throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); } - } else { - scanner.close(); - throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); } - } - int tailIndex = numCases; - while (scanner.hasNext()) { - String nextLine = scanner.next(); - if (!"".equals(nextLine)) { - scanner.close(); - throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + int tailIndex = numCases; + while (scanner.hasNext()) { + String nextLine = scanner.next(); + if (!"".equals(nextLine)) { + throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + } + tailIndex++; } - tailIndex++; - } - scanner.close(); + } return retVector; } @@ -562,75 +551,72 @@ public static Long[] subsetLongVector(InputStream in, int column, int numCases) */ public static String[] subsetStringVector(InputStream in, int column, int numCases) { String[] retVector = new String[numCases]; - Scanner scanner = new Scanner(in); - scanner.useDelimiter("\\n"); + try (Scanner scanner = new Scanner(in)) { + scanner.useDelimiter("\\n"); - for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - retVector[caseIndex] = line[column]; - - - if ("".equals(line[column])) { - // An empty string is a string missing value! - // An empty string in quotes is an empty string! - retVector[caseIndex] = null; - } else { - // Strip the outer quotes: - line[column] = line[column].replaceFirst("^\\\"", ""); - line[column] = line[column].replaceFirst("\\\"$", ""); - - // We need to restore the special characters that - // are stored in tab files escaped - quotes, new lines - // and tabs. Before we do that however, we need to - // take care of any escaped backslashes stored in - // the tab file. I.e., "foo\t" should be transformed - // to "foo"; but "foo\\t" should be transformed - // to "foo\t". This way new lines and tabs that were - // already escaped in the original data are not - // going to be transformed to unescaped tab and - // new line characters! - String[] splitTokens = line[column].split(Matcher.quoteReplacement("\\\\"), -2); - - // (note that it's important to use the 2-argument version - // of String.split(), and set the limit argument to a - // negative value; otherwise any trailing backslashes - // are lost.) - for (int i = 0; i < splitTokens.length; i++) { - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\""); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r"); - } - // TODO: - // Make (some of?) the above optional; for ex., we - // do need to restore the newlines when calculating UNFs; - // But if we are subsetting these vectors in order to - // create a new tab-delimited file, they will - // actually break things! -- L.A. Jul. 28 2014 + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split("\t", -1); + retVector[caseIndex] = line[column]; - line[column] = StringUtils.join(splitTokens, '\\'); + if ("".equals(line[column])) { + // An empty string is a string missing value! + // An empty string in quotes is an empty string! + retVector[caseIndex] = null; + } else { + // Strip the outer quotes: + line[column] = line[column].replaceFirst("^\\\"", ""); + line[column] = line[column].replaceFirst("\\\"$", ""); + + // We need to restore the special characters that + // are stored in tab files escaped - quotes, new lines + // and tabs. Before we do that however, we need to + // take care of any escaped backslashes stored in + // the tab file. I.e., "foo\t" should be transformed + // to "foo"; but "foo\\t" should be transformed + // to "foo\t". This way new lines and tabs that were + // already escaped in the original data are not + // going to be transformed to unescaped tab and + // new line characters! + String[] splitTokens = line[column].split(Matcher.quoteReplacement("\\\\"), -2); + + // (note that it's important to use the 2-argument version + // of String.split(), and set the limit argument to a + // negative value; otherwise any trailing backslashes + // are lost.) + for (int i = 0; i < splitTokens.length; i++) { + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\""); + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t"); + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n"); + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r"); + } + // TODO: + // Make (some of?) the above optional; for ex., we + // do need to restore the newlines when calculating UNFs; + // But if we are subsetting these vectors in order to + // create a new tab-delimited file, they will + // actually break things! -- L.A. Jul. 28 2014 - retVector[caseIndex] = line[column]; - } + line[column] = StringUtils.join(splitTokens, '\\'); - } else { - scanner.close(); - throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); + retVector[caseIndex] = line[column]; + } + + } else { + throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); + } } - } - int tailIndex = numCases; - while (scanner.hasNext()) { - String nextLine = scanner.next(); - if (!"".equals(nextLine)) { - scanner.close(); - throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + int tailIndex = numCases; + while (scanner.hasNext()) { + String nextLine = scanner.next(); + if (!"".equals(nextLine)) { + throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + } + tailIndex++; } - tailIndex++; - } - scanner.close(); + } return retVector; } @@ -643,42 +629,40 @@ public static String[] subsetStringVector(InputStream in, int column, int numCas */ public static Double[][] subsetDoubleVectors(InputStream in, Set columns, int numCases) throws IOException { Double[][] retVector = new Double[columns.size()][numCases]; - Scanner scanner = new Scanner(in); - scanner.useDelimiter("\\n"); - - for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - int j = 0; - for (Integer i : columns) { - try { - // TODO: verify that NaN and +-Inf are going to be - // handled correctly here! -- L.A. - // NO, "+-Inf" is not handled correctly; see the - // comment further down below. - retVector[j][caseIndex] = new Double(line[i]); - } catch (NumberFormatException ex) { - retVector[j][caseIndex] = null; // missing value + try (Scanner scanner = new Scanner(in)) { + scanner.useDelimiter("\\n"); + + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split("\t", -1); + int j = 0; + for (Integer i : columns) { + try { + // TODO: verify that NaN and +-Inf are going to be + // handled correctly here! -- L.A. + // NO, "+-Inf" is not handled correctly; see the + // comment further down below. + retVector[j][caseIndex] = new Double(line[i]); + } catch (NumberFormatException ex) { + retVector[j][caseIndex] = null; // missing value + } + j++; } - j++; + } else { + throw new IOException("Tab file has fewer rows than the stored number of cases!"); } - } else { - scanner.close(); - throw new IOException("Tab file has fewer rows than the stored number of cases!"); } - } - int tailIndex = numCases; - while (scanner.hasNext()) { - String nextLine = scanner.next(); - if (!"".equals(nextLine)) { - scanner.close(); - throw new IOException("Tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + int tailIndex = numCases; + while (scanner.hasNext()) { + String nextLine = scanner.next(); + if (!"".equals(nextLine)) { + throw new IOException("Tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + } + tailIndex++; } - tailIndex++; - } - scanner.close(); + } return retVector; } @@ -839,237 +823,238 @@ public Object[] subsetObjectVector(File tabfile, int column, int varcount, int c columnOffset = varcount * 8; columnLength = columnEndOffsets[0] - varcount * 8; } + int caseindex = 0; - FileChannel fc = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), StandardOpenOption.READ)); - fc.position(columnOffset); - int MAX_COLUMN_BUFFER = 8192; - - ByteBuffer in = ByteBuffer.allocate(MAX_COLUMN_BUFFER); - - if (columnLength < MAX_COLUMN_BUFFER) { - in.limit((int)(columnLength)); - } - - long bytesRead = 0; - long bytesReadTotal = 0; - int caseindex = 0; - int byteoffset = 0; - byte[] leftover = null; - - while (bytesReadTotal < columnLength) { - bytesRead = fc.read(in); - byte[] columnBytes = in.array(); - int bytecount = 0; + try (FileChannel fc = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), + StandardOpenOption.READ))) { + fc.position(columnOffset); + int MAX_COLUMN_BUFFER = 8192; - - while (bytecount < bytesRead) { - if (columnBytes[bytecount] == '\n') { - /* - String token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8"); - - if (leftover != null) { - String leftoverString = new String (leftover, "UTF8"); - token = leftoverString + token; - leftover = null; - } - */ - /* - * Note that the way I was doing it at first - above - - * was not quite the correct way - because I was creating UTF8 - * strings from the leftover bytes, and the bytes in the - * current buffer *separately*; which means, if a multi-byte - * UTF8 character got split in the middle between one buffer - * and the next, both chunks of it would become junk - * characters, on each side! - * The correct way of doing it, of course, is to create a - * merged byte buffer, and then turn it into a UTF8 string. - * -- L.A. 4.0 - */ - String token = null; - - if (leftover == null) { - token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8"); - } else { - byte[] merged = new byte[leftover.length + bytecount-byteoffset]; - - System.arraycopy(leftover, 0, merged, 0, leftover.length); - System.arraycopy(columnBytes, byteoffset, merged, leftover.length, bytecount-byteoffset); - token = new String (merged, "UTF8"); - leftover = null; - merged = null; - } - - if (isString) { - if ("".equals(token)) { - // An empty string is a string missing value! - // An empty string in quotes is an empty string! - retVector[caseindex] = null; + ByteBuffer in = ByteBuffer.allocate(MAX_COLUMN_BUFFER); + + if (columnLength < MAX_COLUMN_BUFFER) { + in.limit((int) (columnLength)); + } + + long bytesRead = 0; + long bytesReadTotal = 0; + + int byteoffset = 0; + byte[] leftover = null; + + while (bytesReadTotal < columnLength) { + bytesRead = fc.read(in); + byte[] columnBytes = in.array(); + int bytecount = 0; + + while (bytecount < bytesRead) { + if (columnBytes[bytecount] == '\n') { + /* + String token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8"); + + if (leftover != null) { + String leftoverString = new String (leftover, "UTF8"); + token = leftoverString + token; + leftover = null; + } + */ + /* + * Note that the way I was doing it at first - above - + * was not quite the correct way - because I was creating UTF8 + * strings from the leftover bytes, and the bytes in the + * current buffer *separately*; which means, if a multi-byte + * UTF8 character got split in the middle between one buffer + * and the next, both chunks of it would become junk + * characters, on each side! + * The correct way of doing it, of course, is to create a + * merged byte buffer, and then turn it into a UTF8 string. + * -- L.A. 4.0 + */ + String token = null; + + if (leftover == null) { + token = new String(columnBytes, byteoffset, bytecount - byteoffset, "UTF8"); } else { - // Strip the outer quotes: - token = token.replaceFirst("^\\\"", ""); - token = token.replaceFirst("\\\"$", ""); - - // We need to restore the special characters that - // are stored in tab files escaped - quotes, new lines - // and tabs. Before we do that however, we need to - // take care of any escaped backslashes stored in - // the tab file. I.e., "foo\t" should be transformed - // to "foo"; but "foo\\t" should be transformed - // to "foo\t". This way new lines and tabs that were - // already escaped in the original data are not - // going to be transformed to unescaped tab and - // new line characters! - - String[] splitTokens = token.split(Matcher.quoteReplacement("\\\\"), -2); - - // (note that it's important to use the 2-argument version - // of String.split(), and set the limit argument to a - // negative value; otherwise any trailing backslashes - // are lost.) - - for (int i = 0; i < splitTokens.length; i++) { - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\""); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r"); - } - // TODO: - // Make (some of?) the above optional; for ex., we - // do need to restore the newlines when calculating UNFs; - // But if we are subsetting these vectors in order to - // create a new tab-delimited file, they will - // actually break things! -- L.A. Jul. 28 2014 - - token = StringUtils.join(splitTokens, '\\'); - - // "compatibility mode" - a hack, to be able to produce - // unfs identical to those produced by the "early" - // unf5 jar; will be removed in production 4.0. - // -- L.A. (TODO: ...) - if (compatmode && !"".equals(token)) { - if (token.length() > 128) { - if ("".equals(token.trim())) { - // don't ask... - token = token.substring(0, 129); + byte[] merged = new byte[leftover.length + bytecount - byteoffset]; + + System.arraycopy(leftover, 0, merged, 0, leftover.length); + System.arraycopy(columnBytes, byteoffset, merged, leftover.length, bytecount - byteoffset); + token = new String(merged, "UTF8"); + leftover = null; + merged = null; + } + + if (isString) { + if ("".equals(token)) { + // An empty string is a string missing value! + // An empty string in quotes is an empty string! + retVector[caseindex] = null; + } else { + // Strip the outer quotes: + token = token.replaceFirst("^\\\"", ""); + token = token.replaceFirst("\\\"$", ""); + + // We need to restore the special characters that + // are stored in tab files escaped - quotes, new lines + // and tabs. Before we do that however, we need to + // take care of any escaped backslashes stored in + // the tab file. I.e., "foo\t" should be transformed + // to "foo"; but "foo\\t" should be transformed + // to "foo\t". This way new lines and tabs that were + // already escaped in the original data are not + // going to be transformed to unescaped tab and + // new line characters! + + String[] splitTokens = token.split(Matcher.quoteReplacement("\\\\"), -2); + + // (note that it's important to use the 2-argument version + // of String.split(), and set the limit argument to a + // negative value; otherwise any trailing backslashes + // are lost.) + + for (int i = 0; i < splitTokens.length; i++) { + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\""); + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t"); + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n"); + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r"); + } + // TODO: + // Make (some of?) the above optional; for ex., we + // do need to restore the newlines when calculating UNFs; + // But if we are subsetting these vectors in order to + // create a new tab-delimited file, they will + // actually break things! -- L.A. Jul. 28 2014 + + token = StringUtils.join(splitTokens, '\\'); + + // "compatibility mode" - a hack, to be able to produce + // unfs identical to those produced by the "early" + // unf5 jar; will be removed in production 4.0. + // -- L.A. (TODO: ...) + if (compatmode && !"".equals(token)) { + if (token.length() > 128) { + if ("".equals(token.trim())) { + // don't ask... + token = token.substring(0, 129); + } else { + token = token.substring(0, 128); + // token = String.format(loc, "%.128s", token); + token = token.trim(); + // dbgLog.info("formatted and trimmed: "+token); + } } else { - token = token.substring(0, 128); - //token = String.format(loc, "%.128s", token); - token = token.trim(); - //dbgLog.info("formatted and trimmed: "+token); + if ("".equals(token.trim())) { + // again, don't ask; + // - this replicates some bugginness + // that happens inside unf5; + token = "null"; + } else { + token = token.trim(); + } } + } + + retVector[caseindex] = token; + } + } else if (isDouble) { + try { + // TODO: verify that NaN and +-Inf are + // handled correctly here! -- L.A. + // Verified: new Double("nan") works correctly, + // resulting in Double.NaN; + // Double("[+-]Inf") doesn't work however; + // (the constructor appears to be expecting it + // to be spelled as "Infinity", "-Infinity", etc. + if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { + retVector[caseindex] = java.lang.Double.POSITIVE_INFINITY; + } else if ("-inf".equalsIgnoreCase(token)) { + retVector[caseindex] = java.lang.Double.NEGATIVE_INFINITY; + } else if (token == null || token.equals("")) { + // missing value: + retVector[caseindex] = null; } else { - if ("".equals(token.trim())) { - // again, don't ask; - // - this replicates some bugginness - // that happens inside unf5; - token = "null"; - } else { - token = token.trim(); - } + retVector[caseindex] = new Double(token); } + } catch (NumberFormatException ex) { + dbgLog.warning("NumberFormatException thrown for " + token + " as Double"); + + retVector[caseindex] = null; // missing value + // TODO: ? } - - retVector[caseindex] = token; - } - } else if (isDouble) { - try { - // TODO: verify that NaN and +-Inf are - // handled correctly here! -- L.A. - // Verified: new Double("nan") works correctly, - // resulting in Double.NaN; - // Double("[+-]Inf") doesn't work however; - // (the constructor appears to be expecting it - // to be spelled as "Infinity", "-Infinity", etc. - if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Double.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Double.NEGATIVE_INFINITY; - } else if (token == null || token.equals("")) { - // missing value: - retVector[caseindex] = null; - } else { - retVector[caseindex] = new Double(token); + } else if (isLong) { + try { + retVector[caseindex] = new Long(token); + } catch (NumberFormatException ex) { + retVector[caseindex] = null; // assume missing value } - } catch (NumberFormatException ex) { - dbgLog.warning("NumberFormatException thrown for "+token+" as Double"); - - retVector[caseindex] = null; // missing value - // TODO: ? - } - } else if (isLong) { - try { - retVector[caseindex] = new Long(token); - } catch (NumberFormatException ex) { - retVector[caseindex] = null; // assume missing value - } - } else if (isFloat) { - try { - if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Float.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Float.NEGATIVE_INFINITY; - } else if (token == null || token.equals("")) { - // missing value: - retVector[caseindex] = null; - } else { - retVector[caseindex] = new Float(token); + } else if (isFloat) { + try { + if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { + retVector[caseindex] = java.lang.Float.POSITIVE_INFINITY; + } else if ("-inf".equalsIgnoreCase(token)) { + retVector[caseindex] = java.lang.Float.NEGATIVE_INFINITY; + } else if (token == null || token.equals("")) { + // missing value: + retVector[caseindex] = null; + } else { + retVector[caseindex] = new Float(token); + } + } catch (NumberFormatException ex) { + dbgLog.warning("NumberFormatException thrown for " + token + " as Float"); + retVector[caseindex] = null; // assume missing value (TODO: ?) } - } catch (NumberFormatException ex) { - dbgLog.warning("NumberFormatException thrown for "+token+" as Float"); - retVector[caseindex] = null; // assume missing value (TODO: ?) } - } - caseindex++; - - if (bytecount == bytesRead - 1) { - byteoffset = 0; - } else { - byteoffset = bytecount + 1; - } - } else { - if (bytecount == bytesRead - 1) { - // We've reached the end of the buffer; - // This means we'll save whatever unused bytes left in - // it - i.e., the bytes between the last new line - // encountered and the end - in the leftover buffer. - - // *EXCEPT*, there may be a case of a very long String - // that is actually longer than MAX_COLUMN_BUFFER, in - // which case it is possible that we've read through - // an entire buffer of bytes without finding any - // new lines... in this case we may need to add this - // entire byte buffer to an already existing leftover - // buffer! - if (leftover == null) { - leftover = new byte[(int)bytesRead - byteoffset]; - System.arraycopy(columnBytes, byteoffset, leftover, 0, (int)bytesRead - byteoffset); + caseindex++; + + if (bytecount == bytesRead - 1) { + byteoffset = 0; } else { - if (byteoffset != 0) { + byteoffset = bytecount + 1; + } + } else { + if (bytecount == bytesRead - 1) { + // We've reached the end of the buffer; + // This means we'll save whatever unused bytes left in + // it - i.e., the bytes between the last new line + // encountered and the end - in the leftover buffer. + + // *EXCEPT*, there may be a case of a very long String + // that is actually longer than MAX_COLUMN_BUFFER, in + // which case it is possible that we've read through + // an entire buffer of bytes without finding any + // new lines... in this case we may need to add this + // entire byte buffer to an already existing leftover + // buffer! + if (leftover == null) { + leftover = new byte[(int) bytesRead - byteoffset]; + System.arraycopy(columnBytes, byteoffset, leftover, 0, (int) bytesRead - byteoffset); + } else { + if (byteoffset != 0) { throw new IOException("Reached the end of the byte buffer, with some leftover left from the last read; yet the offset is not zero!"); + } + byte[] merged = new byte[leftover.length + (int) bytesRead]; + + System.arraycopy(leftover, 0, merged, 0, leftover.length); + System.arraycopy(columnBytes, byteoffset, merged, leftover.length, (int) bytesRead); + // leftover = null; + leftover = merged; + merged = null; } - byte[] merged = new byte[leftover.length + (int)bytesRead]; + byteoffset = 0; - System.arraycopy(leftover, 0, merged, 0, leftover.length); - System.arraycopy(columnBytes, byteoffset, merged, leftover.length, (int)bytesRead); - //leftover = null; - leftover = merged; - merged = null; } - byteoffset = 0; - } + bytecount++; + } + + bytesReadTotal += bytesRead; + in.clear(); + if (columnLength - bytesReadTotal < MAX_COLUMN_BUFFER) { + in.limit((int) (columnLength - bytesReadTotal)); } - bytecount++; - } - - bytesReadTotal += bytesRead; - in.clear(); - if (columnLength - bytesReadTotal < MAX_COLUMN_BUFFER) { - in.limit((int)(columnLength - bytesReadTotal)); } + } - - fc.close(); if (caseindex != casecount) { throw new IOException("Faile to read "+casecount+" tokens for column "+column); @@ -1080,31 +1065,31 @@ public Object[] subsetObjectVector(File tabfile, int column, int varcount, int c } private long[] extractColumnOffsets (File rotatedImageFile, int varcount, int casecount) throws IOException { - BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotatedImageFile)); - - byte[] offsetHeader = new byte[varcount * 8]; long[] byteOffsets = new long[varcount]; - - int readlen = rotfileStream.read(offsetHeader); - - if (readlen != varcount * 8) { - throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file."); - } - - for (int varindex = 0; varindex < varcount; varindex++) { - byte[] offsetBytes = new byte[8]; - System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8); - - ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); - byteOffsets[varindex] = offsetByteBuffer.getLong(); - - //System.out.println(byteOffsets[varindex]); + try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotatedImageFile))) { + + byte[] offsetHeader = new byte[varcount * 8]; + + int readlen = rotfileStream.read(offsetHeader); + + if (readlen != varcount * 8) { + throw new IOException("Could not read " + varcount * 8 + " header bytes from the rotated file."); + } + + for (int varindex = 0; varindex < varcount; varindex++) { + byte[] offsetBytes = new byte[8]; + System.arraycopy(offsetHeader, varindex * 8, offsetBytes, 0, 8); + + ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); + byteOffsets[varindex] = offsetByteBuffer.getLong(); + + // System.out.println(byteOffsets[varindex]); + } + } - - rotfileStream.close(); - - return byteOffsets; + + return byteOffsets; } private File getRotatedImage(File tabfile, int varcount, int casecount) throws IOException { @@ -1149,85 +1134,84 @@ private File generateRotatedImage (File tabfile, int varcount, int casecount) th // read the tab-delimited file: - FileInputStream tabfileStream = new FileInputStream(tabfile); - - Scanner scanner = new Scanner(tabfileStream); - scanner.useDelimiter("\\n"); - - for (int caseindex = 0; caseindex < casecount; caseindex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - // TODO: throw an exception if there are fewer tab-delimited - // tokens than the number of variables specified. - String token = ""; - int tokensize = 0; - for (int varindex = 0; varindex < varcount; varindex++) { - // TODO: figure out the safest way to convert strings to - // bytes here. Is it going to be safer to use getBytes("UTF8")? - // we are already making the assumption that the values - // in the tab file are in UTF8. -- L.A. - token = line[varindex] + "\n"; - tokensize = token.getBytes().length; - if (bufferedSizes[varindex]+tokensize > MAX_COLUMN_BUFFER) { - // fill the buffer and dump its contents into the temp file: - // (do note that there may be *several* MAX_COLUMN_BUFFERs - // worth of bytes in the token!) - - int tokenoffset = 0; - - if (bufferedSizes[varindex] != MAX_COLUMN_BUFFER) { - tokenoffset = MAX_COLUMN_BUFFER-bufferedSizes[varindex]; - System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokenoffset); - } // (otherwise the buffer is already full, and we should - // simply dump it into the temp file, without adding any - // extra bytes to it) - - File bufferTempFile = columnTempFiles[varindex]; - if (bufferTempFile == null) { - bufferTempFile = File.createTempFile("columnBufferFile", "bytes"); - columnTempFiles[varindex] = bufferTempFile; - } - - // *append* the contents of the buffer to the end of the - // temp file, if already exists: - BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream (bufferTempFile, true)); - outputStream.write(bufferedColumns[varindex], 0, MAX_COLUMN_BUFFER); - cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; - - // keep writing MAX_COLUMN_BUFFER-size chunks of bytes into - // the temp file, for as long as there's more than MAX_COLUMN_BUFFER - // bytes left in the token: - - while (tokensize - tokenoffset > MAX_COLUMN_BUFFER) { - outputStream.write(token.getBytes(), tokenoffset, MAX_COLUMN_BUFFER); - cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; - tokenoffset += MAX_COLUMN_BUFFER; + try (FileInputStream tabfileStream = new FileInputStream(tabfile); + Scanner scanner = new Scanner(tabfileStream)) { + scanner.useDelimiter("\\n"); + + for (int caseindex = 0; caseindex < casecount; caseindex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split("\t", -1); + // TODO: throw an exception if there are fewer tab-delimited + // tokens than the number of variables specified. + String token = ""; + int tokensize = 0; + for (int varindex = 0; varindex < varcount; varindex++) { + // TODO: figure out the safest way to convert strings to + // bytes here. Is it going to be safer to use getBytes("UTF8")? + // we are already making the assumption that the values + // in the tab file are in UTF8. -- L.A. + token = line[varindex] + "\n"; + tokensize = token.getBytes().length; + if (bufferedSizes[varindex] + tokensize > MAX_COLUMN_BUFFER) { + // fill the buffer and dump its contents into the temp file: + // (do note that there may be *several* MAX_COLUMN_BUFFERs + // worth of bytes in the token!) + + int tokenoffset = 0; + + if (bufferedSizes[varindex] != MAX_COLUMN_BUFFER) { + tokenoffset = MAX_COLUMN_BUFFER - bufferedSizes[varindex]; + System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokenoffset); + } // (otherwise the buffer is already full, and we should + // simply dump it into the temp file, without adding any + // extra bytes to it) + + File bufferTempFile = columnTempFiles[varindex]; + if (bufferTempFile == null) { + bufferTempFile = File.createTempFile("columnBufferFile", "bytes"); + columnTempFiles[varindex] = bufferTempFile; + } + + // *append* the contents of the buffer to the end of the + // temp file, if already exists: + try (BufferedOutputStream outputStream = new BufferedOutputStream( + new FileOutputStream(bufferTempFile, true))) { + outputStream.write(bufferedColumns[varindex], 0, MAX_COLUMN_BUFFER); + cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; + + // keep writing MAX_COLUMN_BUFFER-size chunks of bytes into + // the temp file, for as long as there's more than MAX_COLUMN_BUFFER + // bytes left in the token: + + while (tokensize - tokenoffset > MAX_COLUMN_BUFFER) { + outputStream.write(token.getBytes(), tokenoffset, MAX_COLUMN_BUFFER); + cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; + tokenoffset += MAX_COLUMN_BUFFER; + } + + } + + // buffer the remaining bytes and reset the buffered + // byte counter: + + System.arraycopy(token.getBytes(), + tokenoffset, + bufferedColumns[varindex], + 0, + tokensize - tokenoffset); + + bufferedSizes[varindex] = tokensize - tokenoffset; + + } else { + // continue buffering + System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokensize); + bufferedSizes[varindex] += tokensize; } - - outputStream.close(); - - // buffer the remaining bytes and reset the buffered - // byte counter: - - System.arraycopy(token.getBytes(), - tokenoffset, - bufferedColumns[varindex], - 0, - tokensize - tokenoffset); - - bufferedSizes[varindex] = tokensize - tokenoffset; - - } else { - // continue buffering - System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokensize); - bufferedSizes[varindex] += tokensize; } + } else { + throw new IOException("Tab file has fewer rows than the stored number of cases!"); } - } else { - scanner.close(); - throw new IOException("Tab file has fewer rows than the stored number of cases!"); } - } // OK, we've created the individual byte vectors of the tab file columns; @@ -1235,60 +1219,61 @@ private File generateRotatedImage (File tabfile, int varcount, int casecount) th // We now need to go through all these buffers and create the final // rotated image file. - BufferedOutputStream finalOut = new BufferedOutputStream(new FileOutputStream (new File(rotatedImageFileName))); - - // but first we should create the offset header and write it out into - // the final file; because it should be at the head, doh! - - long columnOffset = varcount * 8; - // (this is the offset of the first column vector; it is equal to the - // size of the offset header, i.e. varcount * 8 bytes) - - for (int varindex = 0; varindex < varcount; varindex++) { - long totalColumnBytes = cachedfileSizes[varindex] + bufferedSizes[varindex]; - columnOffset+=totalColumnBytes; - //totalColumnBytes; - byte[] columnOffsetByteArray = ByteBuffer.allocate(8).putLong(columnOffset).array(); - System.arraycopy(columnOffsetByteArray, 0, offsetHeader, varindex * 8, 8); - } - - finalOut.write(offsetHeader, 0, varcount * 8); - - for (int varindex = 0; varindex < varcount; varindex++) { - long cachedBytesRead = 0; - - // check if there is a cached temp file: - - File cachedTempFile = columnTempFiles[varindex]; - if (cachedTempFile != null) { - byte[] cachedBytes = new byte[MAX_COLUMN_BUFFER]; - BufferedInputStream cachedIn = new BufferedInputStream(new FileInputStream(cachedTempFile)); - int readlen = 0; - while ((readlen = cachedIn.read(cachedBytes)) > -1) { - finalOut.write(cachedBytes, 0, readlen); - cachedBytesRead += readlen; - } - cachedIn.close(); - // delete the temp file: - cachedTempFile.delete(); - + try (BufferedOutputStream finalOut = new BufferedOutputStream( + new FileOutputStream(new File(rotatedImageFileName)))) { + + // but first we should create the offset header and write it out into + // the final file; because it should be at the head, doh! + + long columnOffset = varcount * 8; + // (this is the offset of the first column vector; it is equal to the + // size of the offset header, i.e. varcount * 8 bytes) + + for (int varindex = 0; varindex < varcount; varindex++) { + long totalColumnBytes = cachedfileSizes[varindex] + bufferedSizes[varindex]; + columnOffset += totalColumnBytes; + // totalColumnBytes; + byte[] columnOffsetByteArray = ByteBuffer.allocate(8).putLong(columnOffset).array(); + System.arraycopy(columnOffsetByteArray, 0, offsetHeader, varindex * 8, 8); } - - if (cachedBytesRead != cachedfileSizes[varindex]) { - finalOut.close(); - throw new IOException("Could not read the correct number of bytes cached for column "+varindex+"; "+ + + finalOut.write(offsetHeader, 0, varcount * 8); + + for (int varindex = 0; varindex < varcount; varindex++) { + long cachedBytesRead = 0; + + // check if there is a cached temp file: + + File cachedTempFile = columnTempFiles[varindex]; + if (cachedTempFile != null) { + byte[] cachedBytes = new byte[MAX_COLUMN_BUFFER]; + try (BufferedInputStream cachedIn = new BufferedInputStream(new FileInputStream(cachedTempFile))) { + int readlen = 0; + while ((readlen = cachedIn.read(cachedBytes)) > -1) { + finalOut.write(cachedBytes, 0, readlen); + cachedBytesRead += readlen; + } + } + + // delete the temp file: + cachedTempFile.delete(); + + } + + if (cachedBytesRead != cachedfileSizes[varindex]) { + throw new IOException("Could not read the correct number of bytes cached for column "+varindex+"; "+ cachedfileSizes[varindex] + " bytes expected, "+cachedBytesRead+" read."); + } + + // then check if there are any bytes buffered for this column: + + if (bufferedSizes[varindex] > 0) { + finalOut.write(bufferedColumns[varindex], 0, bufferedSizes[varindex]); + } + } - - // then check if there are any bytes buffered for this column: - - if (bufferedSizes[varindex] > 0) { - finalOut.write(bufferedColumns[varindex], 0, bufferedSizes[varindex]); - } - } - finalOut.close(); return new File(rotatedImageFileName); } @@ -1305,88 +1290,87 @@ private File generateRotatedImage (File tabfile, int varcount, int casecount) th */ private void reverseRotatedImage (File rotfile, int varcount, int casecount) throws IOException { // open the file, read in the offset header: - BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotfile)); - - byte[] offsetHeader = new byte[varcount * 8]; - long[] byteOffsets = new long[varcount]; - - int readlen = rotfileStream.read(offsetHeader); - - if (readlen != varcount * 8) { - throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file."); - } - - for (int varindex = 0; varindex < varcount; varindex++) { - byte[] offsetBytes = new byte[8]; - System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8); - - ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); - byteOffsets[varindex] = offsetByteBuffer.getLong(); - - //System.out.println(byteOffsets[varindex]); - } - - String [][] reversedMatrix = new String[casecount][varcount]; - - long offset = varcount * 8; - byte[] columnBytes; - - for (int varindex = 0; varindex < varcount; varindex++) { - long columnLength = byteOffsets[varindex] - offset; + try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotfile))) { + byte[] offsetHeader = new byte[varcount * 8]; + long[] byteOffsets = new long[varcount]; + int readlen = rotfileStream.read(offsetHeader); - - columnBytes = new byte[(int)columnLength]; - readlen = rotfileStream.read(columnBytes); - - if (readlen != columnLength) { - throw new IOException ("Could not read "+columnBytes+" bytes for column "+varindex); + if (readlen != varcount * 8) { + throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file."); } - /* - String columnString = new String(columnBytes); - //System.out.print(columnString); - String[] values = columnString.split("\n", -1); - if (values.length < casecount) { - throw new IOException("count mismatch: "+values.length+" tokens found for column "+varindex); + for (int varindex = 0; varindex < varcount; varindex++) { + byte[] offsetBytes = new byte[8]; + System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8); + + ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); + byteOffsets[varindex] = offsetByteBuffer.getLong(); + + //System.out.println(byteOffsets[varindex]); } - for (int caseindex = 0; caseindex < casecount; caseindex++) { - reversedMatrix[caseindex][varindex] = values[caseindex]; - }*/ + String [][] reversedMatrix = new String[casecount][varcount]; + + long offset = varcount * 8; + byte[] columnBytes; - int bytecount = 0; - int byteoffset = 0; - int caseindex = 0; - //System.out.println("generating value vector for column "+varindex); - while (bytecount < columnLength) { - if (columnBytes[bytecount] == '\n') { - String token = new String(columnBytes, byteoffset, bytecount-byteoffset); - reversedMatrix[caseindex++][varindex] = token; - byteoffset = bytecount + 1; + for (int varindex = 0; varindex < varcount; varindex++) { + long columnLength = byteOffsets[varindex] - offset; + + + + columnBytes = new byte[(int)columnLength]; + readlen = rotfileStream.read(columnBytes); + + if (readlen != columnLength) { + throw new IOException ("Could not read "+columnBytes+" bytes for column "+varindex); + } + /* + String columnString = new String(columnBytes); + //System.out.print(columnString); + String[] values = columnString.split("\n", -1); + + if (values.length < casecount) { + throw new IOException("count mismatch: "+values.length+" tokens found for column "+varindex); } - bytecount++; + + for (int caseindex = 0; caseindex < casecount; caseindex++) { + reversedMatrix[caseindex][varindex] = values[caseindex]; + }*/ + + int bytecount = 0; + int byteoffset = 0; + int caseindex = 0; + //System.out.println("generating value vector for column "+varindex); + while (bytecount < columnLength) { + if (columnBytes[bytecount] == '\n') { + String token = new String(columnBytes, byteoffset, bytecount-byteoffset); + reversedMatrix[caseindex++][varindex] = token; + byteoffset = bytecount + 1; + } + bytecount++; + } + + if (caseindex != casecount) { + throw new IOException("count mismatch: "+caseindex+" tokens found for column "+varindex); + } + offset = byteOffsets[varindex]; } - if (caseindex != casecount) { - throw new IOException("count mismatch: "+caseindex+" tokens found for column "+varindex); - } - offset = byteOffsets[varindex]; - } - - for (int caseindex = 0; caseindex < casecount; caseindex++) { - for (int varindex = 0; varindex < varcount; varindex++) { - System.out.print(reversedMatrix[caseindex][varindex]); - if (varindex < varcount-1) { - System.out.print("\t"); - } else { - System.out.print("\n"); + for (int caseindex = 0; caseindex < casecount; caseindex++) { + for (int varindex = 0; varindex < varcount; varindex++) { + System.out.print(reversedMatrix[caseindex][varindex]); + if (varindex < varcount-1) { + System.out.print("\t"); + } else { + System.out.print("\n"); + } } } + } - rotfileStream.close(); - } diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index febbb249a91..1d0ec0f19d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -26,20 +26,22 @@ import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; -import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import edu.harvard.iq.dataverse.util.json.JsonPrinter; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.logging.Level; @@ -47,10 +49,10 @@ import javax.ejb.EJBException; import javax.json.Json; import javax.json.JsonArrayBuilder; +import javax.json.JsonNumber; import javax.json.JsonObject; import javax.json.JsonArray; import javax.json.JsonObjectBuilder; -import javax.json.JsonReader; import javax.validation.ConstraintViolation; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; @@ -114,10 +116,9 @@ public class AddReplaceFileHelper{ public static String FILE_ADD_OPERATION = "FILE_ADD_OPERATION"; public static String FILE_REPLACE_OPERATION = "FILE_REPLACE_OPERATION"; public static String FILE_REPLACE_FORCE_OPERATION = "FILE_REPLACE_FORCE_OPERATION"; - public static String MULTIPLEFILES_ADD_OPERATION = "MULTIPLEFILES_ADD_OPERATION"; - + private String currentOperation; - + boolean multifile = false; // ----------------------------------- // All the needed EJBs, passed to the constructor // ----------------------------------- @@ -127,8 +128,6 @@ public class AddReplaceFileHelper{ private PermissionServiceBean permissionService; private EjbDataverseEngine commandEngine; private SystemConfig systemConfig; - private LicenseServiceBean licenseServiceBean; - // ----------------------------------- // Instance variables directly added // ----------------------------------- @@ -144,10 +143,6 @@ public class AddReplaceFileHelper{ // -- Optional private DataFile fileToReplace; // step 25 - // ----------------------------------- - // Instance variables derived from other input - // ----------------------------------- - private User user; private DatasetVersion workingVersion; private DatasetVersion clone; List initialFileList; @@ -256,13 +251,12 @@ public void resetFileHelper(){ * @param dvRequest */ public AddReplaceFileHelper(DataverseRequest dvRequest, - IngestServiceBean ingestService, + IngestServiceBean ingestService, DatasetServiceBean datasetService, DataFileServiceBean fileService, PermissionServiceBean permissionService, EjbDataverseEngine commandEngine, - SystemConfig systemConfig, - LicenseServiceBean licenseServiceBean){ + SystemConfig systemConfig){ // --------------------------------- // make sure DataverseRequest isn't null and has a user @@ -304,16 +298,12 @@ public AddReplaceFileHelper(DataverseRequest dvRequest, this.permissionService = permissionService; this.commandEngine = commandEngine; this.systemConfig = systemConfig; - this.licenseServiceBean = licenseServiceBean; - - - initErrorHandling(); // Initiate instance vars this.dataset = null; this.dvRequest = dvRequest; - this.user = dvRequest.getUser(); + dvRequest.getUser(); } @@ -336,7 +326,7 @@ public boolean runAddFileByDataset(Dataset chosenDataset, } - public boolean runAddFileByDataset(Dataset chosenDataset, + private boolean runAddFileByDataset(Dataset chosenDataset, String newFileName, String newFileContentType, String newStorageIdentifier, @@ -348,12 +338,8 @@ public boolean runAddFileByDataset(Dataset chosenDataset, initErrorHandling(); - if(multipleFiles) { - this.currentOperation = MULTIPLEFILES_ADD_OPERATION; - } - else { - this.currentOperation = FILE_ADD_OPERATION; - } + multifile=multipleFiles; + this.currentOperation = FILE_ADD_OPERATION; if (!this.step_001_loadDataset(chosenDataset)){ return false; @@ -393,6 +379,11 @@ public boolean runAddFile(Dataset dataset, }*/ + public boolean runForceReplaceFile(long fileToReplaceId, String newFilename, String newFileContentType, + String newStorageIdentifier, InputStream newFileInputStream, Dataset ds, OptionalFileParams optionalFileParams) { + return runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, newFileInputStream, ds, optionalFileParams, false); + } /** * After the constructor, this method is called to replace a file * @@ -403,16 +394,19 @@ public boolean runAddFile(Dataset dataset, * @param newFileInputStream * @return */ - public boolean runForceReplaceFile(Long oldFileId, + private boolean runForceReplaceFile(Long oldFileId, String newFileName, String newFileContentType, String newStorageIdentifier, InputStream newFileInputStream, - OptionalFileParams optionalFileParams){ + Dataset ds, + OptionalFileParams optionalFileParams, + boolean multipleFiles){ msgt(">> runForceReplaceFile"); initErrorHandling(); + multifile=multipleFiles; this.currentOperation = FILE_REPLACE_FORCE_OPERATION; @@ -426,22 +420,35 @@ public boolean runForceReplaceFile(Long oldFileId, if (!this.step_005_loadFileToReplaceById(oldFileId)){ return false; } + if(!ds.getId().equals(fileToReplace.getOwner().getId())) { + this.addErrorSevere(getBundleErr("existing_file_to_replace_not_in_dataset")); + return false; + } + // ds may include changes not yet in the copy created when loading the file from the db, as in replaceFiles() + return this.runAddReplaceFile(ds, newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams); + } + + public boolean runReplaceFile(long fileToReplaceId, String newFilename, String newFileContentType, + String newStorageIdentifier, InputStream newFileInputStream, Dataset ds, OptionalFileParams optionalFileParams) { + return runReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, newFileInputStream, ds, optionalFileParams, false); - return this.runAddReplaceFile(fileToReplace.getOwner(), newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams); } - - public boolean runReplaceFile(Long oldFileId, + private boolean runReplaceFile(Long oldFileId, String newFileName, String newFileContentType, String newStorageIdentifier, InputStream newFileInputStream, - OptionalFileParams optionalFileParams){ + Dataset ds, + OptionalFileParams optionalFileParams, + boolean multipleFiles){ msgt(">> runReplaceFile"); initErrorHandling(); + multifile=multipleFiles; this.currentOperation = FILE_REPLACE_OPERATION; if (oldFileId==null){ @@ -455,7 +462,13 @@ public boolean runReplaceFile(Long oldFileId, if (!this.step_005_loadFileToReplaceById(oldFileId)){ return false; } - return this.runAddReplaceFile(fileToReplace.getOwner(), newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams); + + if(!ds.getId().equals(fileToReplace.getOwner().getId())) { + this.addErrorSevere(getBundleErr("existing_file_to_replace_not_in_dataset")); + return false; + } + // ds may include changes not yet in the copy created when loading the file from the db, as in replaceFiles() + return this.runAddReplaceFile(ds, newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams); } @@ -759,19 +772,15 @@ private boolean runAddReplacePhase2(boolean tabIngest){ return false; } - - if (this.isFileReplaceOperation()){ + if (this.isFileReplaceOperation()) { msgt("step_080_run_update_dataset_command_for_replace"); - if (!this.step_080_run_update_dataset_command_for_replace()){ - return false; + if (!this.step_080_run_update_dataset_command_for_replace()) { + return false; } - - }else{ + } else if (!multifile) { msgt("step_070_run_update_dataset_command"); - if (!this.isMultipleFilesAddOperation()) { - if (!this.step_070_run_update_dataset_command()) { - return false; - } + if (!this.step_070_run_update_dataset_command()) { + return false; } } @@ -834,16 +843,6 @@ public boolean isFileAddOperation(){ return this.currentOperation.equals(FILE_ADD_OPERATION); } - /** - * Is this a multiple files add operation ? - * @return - */ - - public boolean isMultipleFilesAddOperation(){ - - return this.currentOperation.equals(MULTIPLEFILES_ADD_OPERATION); - } - /** * Initialize error handling vars */ @@ -1201,7 +1200,10 @@ private boolean step_030_createNewFilesViaIngest(){ // Load the working version of the Dataset workingVersion = dataset.getOrCreateEditVersion(); - clone = workingVersion.cloneDatasetVersion(); + if(!multifile) { + //Don't repeatedly update the clone (losing changes) in multifile case + clone = workingVersion.cloneDatasetVersion(); + } try { CreateDataFileResult result = FileUtil.createDataFiles(workingVersion, this.newFileInputStream, @@ -1292,9 +1294,6 @@ private boolean step_040_auto_checkForDuplicates(){ // Initialize new file list this.finalFileList = new ArrayList<>(); - String warningMessage = null; - - if (isFileReplaceOperation() && this.fileToReplace == null){ // This error shouldn't happen if steps called correctly this.addErrorSevere(getBundleErr("existing_file_to_replace_is_null") + " (This error shouldn't happen if steps called in sequence....checkForFileReplaceDuplicate)"); @@ -1511,10 +1510,7 @@ private boolean step_050_checkForConstraintViolations(){ return true; } - // ----------------------------------------------------------- - // violations found: gather all error messages - // ----------------------------------------------------------- - List errMsgs = new ArrayList<>(); + new ArrayList<>(); for (ConstraintViolation violation : constraintViolations) { /* for 8859 return conflict response status if the validation fails @@ -1566,7 +1562,7 @@ private boolean step_055_loadOptionalFileParams(OptionalFileParams optionalFileP } } catch (DataFileTagException ex) { - Logger.getLogger(AddReplaceFileHelper.class.getName()).log(Level.SEVERE, null, ex); + logger.log(Level.SEVERE, null, ex); addError(ex.getMessage()); return false; } catch (CommandException ex) { @@ -1605,70 +1601,81 @@ private boolean step_060_addFilesViaIngestService(boolean tabIngest){ return true; } + List filesToDelete = new ArrayList(); + Map deleteFileStorageLocations = new HashMap<>(); /** * Create and run the update dataset command * * @return */ - private boolean step_070_run_update_dataset_command(){ - - if (this.hasError()){ + private boolean step_070_run_update_dataset_command() { + //Note -only single file operations and multifile replace call this, multifile add does not + if (this.hasError()) { return false; } - Command update_cmd; + Command update_cmd = null; String deleteStorageLocation = null; - long deleteFileId=-1; - if(isFileReplaceOperation()) { - List filesToDelete = new ArrayList(); + long deleteFileId = -1; + if (isFileReplaceOperation()) { + if (!multifile) { + filesToDelete.clear(); + deleteFileStorageLocations.clear(); + } filesToDelete.add(fileToReplace.getFileMetadata()); - - if(!fileToReplace.isReleased()) { - //If file is only in draft version, also need to delete the physical file - deleteStorageLocation = fileService.getPhysicalFileToDelete(fileToReplace); - deleteFileId=fileToReplace.getId(); + + if (!fileToReplace.isReleased()) { + // If file is only in draft version, also need to delete the physical file + deleteStorageLocation = fileService.getPhysicalFileToDelete(fileToReplace); + deleteFileId = fileToReplace.getId(); + deleteFileStorageLocations.put(deleteFileId, deleteStorageLocation); + } + if (!multifile) { + // Adding the file to the delete list for the command will delete this + // filemetadata and, if the file hasn't been released, the datafile itself. + update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone); } - //Adding the file to the delete list for the command will delete this filemetadata and, if the file hasn't been released, the datafile itself. - update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone); } else { - update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); + update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); } - ((UpdateDatasetVersionCommand) update_cmd).setValidateLenient(true); - - try { - // Submit the update dataset command - // and update the local dataset object - // - dataset = commandEngine.submit(update_cmd); - } catch (CommandException ex) { - /** - * @todo Add a test to exercise this error. - */ - this.addErrorSevere(getBundleErr("add.add_file_error")); - logger.severe(ex.getMessage()); - return false; - }catch (EJBException ex) { - /** - * @todo Add a test to exercise this error. - */ - this.addErrorSevere("add.add_file_error (see logs)"); - logger.severe(ex.getMessage()); - return false; + if (!multifile) { + //Avoid NPE in multifile replace case + ((UpdateDatasetVersionCommand) update_cmd).setValidateLenient(true); } - //Sanity check - if(isFileReplaceOperation()) { - if (deleteStorageLocation != null) { - // Finalize the delete of the physical file - // (File service will double-check that the datafile no - // longer exists in the database, before proceeding to - // delete the physical file) - try { - fileService.finalizeFileDelete(deleteFileId, deleteStorageLocation); - } catch (IOException ioex) { - logger.warning("Failed to delete the physical file associated with the deleted datafile id=" - + deleteFileId + ", storage location: " + deleteStorageLocation); - } + if (!multifile) { + try { + // Submit the update dataset command + // and update the local dataset object + // + dataset = commandEngine.submit(update_cmd); + } catch (CommandException ex) { + /** + * @todo Add a test to exercise this error. + */ + this.addErrorSevere(getBundleErr("add.add_file_error")); + logger.severe(ex.getMessage()); + return false; + } catch (EJBException ex) { + /** + * @todo Add a test to exercise this error. + */ + this.addErrorSevere("add.add_file_error (see logs)"); + logger.severe(ex.getMessage()); + return false; + } + } + + if (isFileReplaceOperation() && deleteFileId!=-1 && !multifile) { + // Finalize the delete of the physical file + // (File service will double-check that the datafile no + // longer exists in the database, before proceeding to + // delete the physical file) + try { + fileService.finalizeFileDelete(deleteFileId, deleteStorageLocation); + } catch (IOException ioex) { + logger.warning("Failed to delete the physical file associated with the deleted datafile id=" + + deleteFileId + ", storage location: " + deleteStorageLocation); } } return true; @@ -1766,7 +1773,7 @@ private boolean step_080_run_update_dataset_command_for_replace(){ } /* - * Go through the final file list, settting the rootFileId and previousFileId + * Go through the final file list, setting the rootFileId and previousFileId */ for (DataFile df : finalFileList) { df.setPreviousDataFileId(fileToReplace.getId()); @@ -1775,7 +1782,7 @@ private boolean step_080_run_update_dataset_command_for_replace(){ } } - // Call the update dataset command which will delete the replaced filemetadata and file in needed (if file is not released) + // Call the update dataset command which will delete the replaced filemetadata and file if needed (if file is not released) // return step_070_run_update_dataset_command(); @@ -1927,7 +1934,7 @@ private boolean step_100_startIngestJobs(){ //return true; //} - if (!this.isMultipleFilesAddOperation()) { + if (!multifile) { msg("pre ingest start"); // start the ingest! ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); @@ -2021,6 +2028,13 @@ public void setDuplicateFileWarning(String duplicateFileWarning) { this.duplicateFileWarning = duplicateFileWarning; } + /** Add multiple pre-positioned files listed in the jsonData. Works with direct upload, Globus, and other out-of-band methods. + * + * @param jsonData - an array of jsonData entries (one per file) using the single add file jsonData format + * @param dataset + * @param authUser + * @return + */ public Response addFiles(String jsonData, Dataset dataset, User authUser) { msgt("(addFilesToDataset) jsonData: " + jsonData.toString()); @@ -2033,15 +2047,14 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { // ----------------------------------------------------------- // Read jsonData and Parse files information from jsondata : // ----------------------------------------------------------- - try (StringReader rdr = new StringReader(jsonData)) { - JsonReader dbJsonReader = Json.createReader(rdr); - filesJson = dbJsonReader.readArray(); - dbJsonReader.close(); + try { + filesJson = JsonUtil.getJsonArray(jsonData); if (filesJson != null) { totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); - + workingVersion = dataset.getOrCreateEditVersion(); + clone = workingVersion.cloneDatasetVersion(); for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { OptionalFileParams optionalFileParams = null; @@ -2065,10 +2078,9 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } msgt("ADD! = " + newFilename); - if (!hasError()) { - runAddFileByDataset(dataset, newFilename, newFileContentType, newStorageIdentifier, - null, optionalFileParams, true); - } + + runAddFileByDataset(dataset, newFilename, newFileContentType, newStorageIdentifier, null, + optionalFileParams, true); if (hasError()) { JsonObjectBuilder fileoutput = Json.createObjectBuilder() .add("storageIdentifier", newStorageIdentifier) @@ -2103,7 +2115,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } } catch (DataFileTagException ex) { - Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + logger.log(Level.SEVERE, null, ex); JsonObjectBuilder fileoutput = Json.createObjectBuilder() .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) .add("message", ex.getMessage()) @@ -2112,7 +2124,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } catch (NoFilesException ex) { - Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + logger.log(Level.SEVERE, null, ex); JsonObjectBuilder fileoutput = Json.createObjectBuilder() .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) .add("message", BundleUtil.getStringFromBundle("NoFileException! Serious Error! See administrator!")) @@ -2131,7 +2143,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } try { - Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest); + Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); commandEngine.submit(cmd); } catch (CommandException ex) { @@ -2140,9 +2152,6 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { dataset = datasetService.find(dataset.getId()); - List s = dataset.getFiles(); - for (DataFile dataFile : s) { - } //ingest job ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); @@ -2166,6 +2175,174 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { .add("status", STATUS_OK) .add("data", Json.createObjectBuilder().add("Files", jarr).add("Result", result)).build() ).build(); } + + /** + * Replace multiple files with prepositioned replacements as listed in the + * jsonData. Works with direct upload, Globus, and other out-of-band methods. + * + * @param jsonData - must include fileToReplaceId key with file ID and may include forceReplace key with true/false(default) + * @param dataset + * @param authUser + * @return + */ + + public Response replaceFiles(String jsonData, Dataset ds, User authUser) { + msgt("(replaceFilesInDataset) jsonData: " + jsonData.toString()); + + this.dataset = ds; + JsonArrayBuilder jarr = Json.createArrayBuilder(); + + JsonArray filesJson = null; + + int totalNumberofFiles = 0; + int successNumberofFiles = 0; + // ----------------------------------------------------------- + // Read jsonData and Parse files information from jsondata : + // ----------------------------------------------------------- + try { + filesJson = JsonUtil.getJsonArray(jsonData); + + + if (filesJson != null) { + totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); + workingVersion = dataset.getOrCreateEditVersion(); + clone = workingVersion.cloneDatasetVersion(); + for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { + boolean forceReplace = false; + // (2a) Check for optional "forceReplace" + if ((fileJson.containsKey("forceReplace"))) { + forceReplace = fileJson.getBoolean("forceReplace", false); + } + long fileToReplaceId = -1; + JsonNumber ftri = fileJson.getJsonNumber("fileToReplaceId"); + if(ftri !=null) { + fileToReplaceId = ftri.longValueExact(); + } + + OptionalFileParams optionalFileParams = null; + try { + // (2b) Load up optional params via JSON + // - Will skip extra attributes which includes fileToReplaceId and forceReplace + optionalFileParams = new OptionalFileParams(fileJson.toString()); + + String newFilename = null; + String newFileContentType = null; + String newStorageIdentifier = null; + if ((fileToReplaceId !=-1) && optionalFileParams.hasStorageIdentifier()) { + newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + newStorageIdentifier = DataAccess.expandStorageIdentifierIfNeeded(newStorageIdentifier); + if(!DataAccess.uploadToDatasetAllowed(dataset, newStorageIdentifier)) { + addErrorSevere("Dataset store configuration does not allow provided storageIdentifier."); + } + if (optionalFileParams.hasFileName()) { + newFilename = optionalFileParams.getFileName(); + if (optionalFileParams.hasMimetype()) { + newFileContentType = optionalFileParams.getMimeType(); + } + } + + msgt("REPLACE! = " + newFilename); + if (forceReplace) { + runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, null, dataset, optionalFileParams, true); + } else { + runReplaceFile(fileToReplaceId, newFilename, newFileContentType, newStorageIdentifier, + null, dataset, optionalFileParams, true); + } + if (hasError()) { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("errorMessage", getHttpErrorCode().toString() +":"+ getErrorMessagesAsString("\n")) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + } else { + JsonObject successresult = getSuccessResultAsJsonObjectBuilder().build(); + String duplicateWarning = getDuplicateFileWarning(); + + if (duplicateWarning != null && !duplicateWarning.isEmpty()) { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("warningMessage", getDuplicateFileWarning()) + .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("successMessage", "Replaced successfully in the dataset") + .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } + successNumberofFiles = successNumberofFiles + 1; + } + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorMessage", "You must provide a fileToReplaceId, storageidentifier, filename, and mimetype.") + .add("fileDetails", fileJson); + + jarr.add(fileoutput); + } + + } catch (DataFileTagException ex) { + logger.log(Level.SEVERE, null, ex); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) + .add("message", ex.getMessage()) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + + } + catch (NoFilesException ex) { + logger.log(Level.SEVERE, null, ex); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) + .add("message", BundleUtil.getStringFromBundle("NoFileException! Serious Error! See administrator!")) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + } + }// End of adding files + + DatasetLock eipLock = dataset.getLockFor(DatasetLock.Reason.EditInProgress); + if (eipLock == null) { + logger.warning("Dataset not locked for EditInProgress "); + } else { + datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); + logger.info("Removed EditInProgress lock "); + } + + try { + Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone); + ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); + commandEngine.submit(cmd); + } catch (CommandException ex) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "CommandException updating DatasetVersion from addFiles job: " + ex.getMessage()); + } + + fileService.finalizeFileDeletes(deleteFileStorageLocations); + + dataset = datasetService.find(dataset.getId()); + + //ingest job + ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + + } + } + catch ( javax.json.stream.JsonParsingException ex) { + ex.printStackTrace(); + return error(BAD_REQUEST, "Json Parsing Exception :" + ex.getMessage()); + } + catch (Exception e) { + e.printStackTrace(); + return error(BAD_REQUEST, e.getMessage()); + } + + JsonObjectBuilder result = Json.createObjectBuilder() + .add("Total number of files", totalNumberofFiles) + .add("Number of files successfully replaced", successNumberofFiles); + + return Response.ok().entity(Json.createObjectBuilder() + .add("status", STATUS_OK) + .add("data", Json.createObjectBuilder().add("Files", jarr).add("Result", result)).build() ).build(); + } protected static Response error(Response.Status sts, String msg ) { return Response.status(sts) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java index 66ba00bcf55..ca5bf1d3f2c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java @@ -99,6 +99,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { logger.severe("Draft version of dataset: " + tempDataset.getId() + " has: " + newFileCount + " while last published version has " + pubFileCount); throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasetversion.update.failure"), this); } + Long thumbId = null; + if(tempDataset.getThumbnailFile()!=null) { + thumbId = tempDataset.getThumbnailFile().getId(); + }; for (FileMetadata publishedFmd : pubFmds) { DataFile dataFile = publishedFmd.getDataFile(); FileMetadata draftFmd = dataFile.getLatestFileMetadata(); @@ -136,6 +140,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { for (DataFileCategory cat : tempDataset.getCategories()) { cat.getFileMetadatas().remove(draftFmd); } + //And any thumbnail reference + if(publishedFmd.getDataFile().getId()==thumbId) { + tempDataset.setThumbnailFile(publishedFmd.getDataFile()); + } } // Update modification time on the published version and the dataset diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 89666f02db2..f23033f09fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -305,7 +305,10 @@ public static String createJWTString(Algorithm algorithmRSA, String installation String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); logger.fine("Canonical body: " + canonicalBody); String digest = DigestUtils.sha256Hex(canonicalBody); - return JWT.create().withIssuer(BrandingUtil.getInstallationBrandName()).withIssuedAt(Date.from(Instant.now())) + if(installationBrandName==null) { + installationBrandName = BrandingUtil.getInstallationBrandName(); + } + return JWT.create().withIssuer(installationBrandName).withIssuedAt(Date.from(Instant.now())) .withExpiresAt(Date.from(Instant.now().plusSeconds(60 * expirationInMinutes))) .withKeyId("defaultDataverse").withClaim("bodySHA256Hash", digest).sign(algorithmRSA); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 5d017173685..da2701a41e7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -1,16 +1,27 @@ package edu.harvard.iq.dataverse.engine.command.impl; +import com.google.auth.oauth2.ServiceAccountCredentials; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Bucket; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageException; +import com.google.cloud.storage.StorageOptions; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; +import org.apache.commons.codec.binary.Hex; +import javax.json.Json; +import javax.json.JsonObjectBuilder; +import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.PipedInputStream; @@ -21,17 +32,6 @@ import java.util.Map; import java.util.logging.Logger; -import javax.json.Json; -import javax.json.JsonObjectBuilder; - -import org.apache.commons.codec.binary.Hex; -import com.google.auth.oauth2.ServiceAccountCredentials; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.Bucket; -import com.google.cloud.storage.Storage; -import com.google.cloud.storage.StorageException; -import com.google.cloud.storage.StorageOptions; - @RequiredPermissions(Permission.PublishDataset) public class GoogleCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { @@ -56,10 +56,11 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); - try { - FileInputStream fis = new FileInputStream(System.getProperty("dataverse.files.directory") + System.getProperty("file.separator") + "googlecloudkey.json"); + String cloudKeyFile = JvmSettings.FILES_DIRECTORY.lookup() + File.separator + "googlecloudkey.json"; + + try (FileInputStream cloudKeyStream = new FileInputStream(cloudKeyFile)) { storage = StorageOptions.newBuilder() - .setCredentials(ServiceAccountCredentials.fromStream(fis)) + .setCredentials(ServiceAccountCredentials.fromStream(cloudKeyStream)) .setProjectId(projectName) .build() .getService(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java index 64beba82450..5f31ea756eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java @@ -12,17 +12,20 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import java.io.File; -import java.util.Properties; -import java.util.logging.Level; -import java.util.logging.Logger; +import edu.harvard.iq.dataverse.settings.JvmSettings; + import javax.batch.operations.JobOperator; import javax.batch.operations.JobSecurityException; import javax.batch.operations.JobStartException; import javax.batch.runtime.BatchRuntime; import javax.json.JsonObject; import javax.json.JsonObjectBuilder; +import java.io.File; +import java.util.Properties; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @RequiredPermissions(Permission.EditDataset) public class ImportFromFileSystemCommand extends AbstractCommand { @@ -69,18 +72,20 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { logger.info(error); throw new IllegalCommandException(error, this); } - File directory = new File(System.getProperty("dataverse.files.directory") - + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier()); - // TODO: - // The above goes directly to the filesystem directory configured by the - // old "dataverse.files.directory" JVM option (otherwise used for temp - // files only, after the Multistore implementation (#6488). - // We probably want package files to be able to use specific stores instead. - // More importantly perhaps, the approach above does not take into account - // if the dataset may have an AlternativePersistentIdentifier, that may be - // designated isStorageLocationDesignator() - i.e., if a different identifer - // needs to be used to name the storage directory, instead of the main/current - // persistent identifier above. + + File directory = new File( + String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(), + dataset.getAuthority(), dataset.getIdentifier())); + + // TODO: The above goes directly to the filesystem directory configured by the + // old "dataverse.files.directory" JVM option (otherwise used for temp + // files only, after the Multistore implementation (#6488). + // We probably want package files to be able to use specific stores instead. + // More importantly perhaps, the approach above does not take into account + // if the dataset may have an AlternativePersistentIdentifier, that may be + // designated isStorageLocationDesignator() - i.e., if a different identifer + // needs to be used to name the storage directory, instead of the main/current + // persistent identifier above. if (!isValidDirectory(directory)) { String error = "Dataset directory is invalid. " + directory; logger.info(error); @@ -93,11 +98,10 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { throw new IllegalCommandException(error, this); } - File uploadDirectory = new File(System.getProperty("dataverse.files.directory") - + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier() - + File.separator + uploadFolder); - // TODO: - // see the comment above. + File uploadDirectory = new File(String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(), + dataset.getAuthority(), dataset.getIdentifier(), uploadFolder)); + + // TODO: see the comment above. if (!isValidDirectory(uploadDirectory)) { String error = "Upload folder is not a valid directory."; logger.info(error); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 4bbcd653ac3..eb7632dd03c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -32,18 +32,15 @@ import edu.harvard.iq.dataverse.export.DDIExporter; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import static edu.harvard.iq.dataverse.util.SystemConfig.FQDN; -import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.net.InetAddress; -import java.net.UnknownHostException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -1292,7 +1289,7 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da // harvesting *all* files are encoded as otherMats; even tabular ones. private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos) throws XMLStreamException { // The preferred URL for this dataverse, for cooking up the file access API links: - String dataverseUrl = getDataverseSiteUrl(); + String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); for (FileDTO fileDTo : fileDtos) { // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat, @@ -1339,7 +1336,7 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, List fileMetadatas) throws XMLStreamException { // The preferred URL for this dataverse, for cooking up the file access API links: - String dataverseUrl = getDataverseSiteUrl(); + String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); for (FileMetadata fileMetadata : fileMetadatas) { // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat, @@ -1555,33 +1552,6 @@ private static void saveJsonToDisk(String datasetVersionAsJson) throws IOExcepti Files.write(Paths.get("/tmp/out.json"), datasetVersionAsJson.getBytes()); } - /** - * The "official", designated URL of the site; - * can be defined as a complete URL; or derived from the - * "official" hostname. If none of these options is set, - * defaults to the InetAddress.getLocalHOst() and https; - */ - private static String getDataverseSiteUrl() { - String hostUrl = System.getProperty(SITE_URL); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; - } - String hostName = System.getProperty(FQDN); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - hostName = null; - } - } - - if (hostName != null) { - return "https://" + hostName; - } - - return "http://localhost:8080"; - } - @@ -1893,7 +1863,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, DataVariable dv, FileMeta } private static void createFileDscr(XMLStreamWriter xmlw, DatasetVersion datasetVersion) throws XMLStreamException { - String dataverseUrl = getDataverseSiteUrl(); + String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { DataFile dataFile = fileMetadata.getDataFile(); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 49fe203b96d..bea3858a60e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -256,7 +256,10 @@ public static void writeCreatorsElement(XMLStreamWriter xmlw, DatasetVersionDTO creator_map.put("nameType", "Personal"); nameType_check = true; } - + // ToDo - the algorithm to determine if this is a Person or Organization here + // has been abstracted into a separate + // edu.harvard.iq.dataverse.util.PersonOrOrgUtil class that could be used here + // to avoid duplication/variants of the algorithm creatorName = Cleanup.normalize(creatorName); // Datacite algorithm, https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 if (creatorName.contains(",")) { @@ -706,6 +709,11 @@ public static void writeContributorElement(XMLStreamWriter xmlw, String contribu boolean nameType_check = false; Map contributor_map = new HashMap(); + // ToDo - the algorithm to determine if this is a Person or Organization here + // has been abstracted into a separate + // edu.harvard.iq.dataverse.util.PersonOrOrgUtil class that could be used here + // to avoid duplication/variants of the algorithm + contributorName = Cleanup.normalize(contributorName); // Datacite algorithm, https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 if (contributorName.contains(",")) { @@ -717,6 +725,9 @@ public static void writeContributorElement(XMLStreamWriter xmlw, String contribu // givenName ok contributor_map.put("nameType", "Personal"); nameType_check = true; + // re: the above toDo - the ("ContactPerson".equals(contributorType) && + // !isValidEmailAddress(contributorName)) clause in the next line could/should + // be sent as the OrgIfTied boolean parameter } else if (isOrganization || ("ContactPerson".equals(contributorType) && !isValidEmailAddress(contributorName))) { contributor_map.put("nameType", "Organizational"); } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java index 1789b7a90c3..0a238eb5198 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java @@ -39,6 +39,7 @@ public class ExternalTool implements Serializable { public static final String CONTENT_TYPE = "contentType"; public static final String TOOL_NAME = "toolName"; public static final String ALLOWED_API_CALLS = "allowedApiCalls"; + public static final String REQUIREMENTS = "requirements"; @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @@ -103,6 +104,15 @@ public class ExternalTool implements Serializable { @Column(nullable = true, columnDefinition = "TEXT") private String allowedApiCalls; + /** + * When non-null, the tool has indicated that it has certain requirements + * that must be met before it should be shown to the user. This + * functionality was added for tools that operate on aux files rather than + * data files so "auxFilesExist" is one of the possible values. + */ + @Column(nullable = true, columnDefinition = "TEXT") + private String requirements; + /** * This default constructor is only here to prevent this error at * deployment: @@ -118,10 +128,10 @@ public ExternalTool() { } public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType) { - this(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, null); + this(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, null, null); } - public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType, String allowedApiCalls) { + public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType, String allowedApiCalls, String requirements) { this.displayName = displayName; this.toolName = toolName; this.description = description; @@ -131,6 +141,7 @@ public ExternalTool(String displayName, String toolName, String description, Lis this.toolParameters = toolParameters; this.contentType = contentType; this.allowedApiCalls = allowedApiCalls; + this.requirements = requirements; } public enum Type { @@ -326,5 +337,12 @@ public void setAllowedApiCalls(String allowedApiCalls) { this.allowedApiCalls = allowedApiCalls; } + public String getRequirements() { + return requirements; + } + + public void setRequirements(String requirements) { + this.requirements = requirements; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java index a65ad2427ba..f38cd7301ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java @@ -1,5 +1,7 @@ package edu.harvard.iq.dataverse.externaltools; +import edu.harvard.iq.dataverse.AuxiliaryFile; +import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; @@ -30,6 +32,8 @@ import static edu.harvard.iq.dataverse.externaltools.ExternalTool.*; import java.util.stream.Collectors; import java.util.stream.Stream; +import javax.ejb.EJB; +import javax.json.JsonValue; @Stateless @Named @@ -40,6 +44,9 @@ public class ExternalToolServiceBean { @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; + @EJB + AuxiliaryFileServiceBean auxiliaryFileService; + public List findAll() { TypedQuery typedQuery = em.createQuery("SELECT OBJECT(o) FROM ExternalTool AS o ORDER BY o.id", ExternalTool.class); return typedQuery.getResultList(); @@ -133,13 +140,13 @@ public ExternalTool save(ExternalTool externalTool) { * file supports The list of tools is passed in so it doesn't hit the * database each time */ - public static List findExternalToolsByFile(List allExternalTools, DataFile file) { + public List findExternalToolsByFile(List allExternalTools, DataFile file) { List externalTools = new ArrayList<>(); //Map tabular data to it's mimetype (the isTabularData() check assures that this code works the same as before, but it may need to change if tabular data is split into subtypes with differing mimetypes) final String contentType = file.isTabularData() ? DataFileServiceBean.MIME_TYPE_TSV_ALT : file.getContentType(); allExternalTools.forEach((externalTool) -> { - //Match tool and file type - if (contentType.equals(externalTool.getContentType())) { + //Match tool and file type, then check requirements + if (contentType.equals(externalTool.getContentType()) && meetsRequirements(externalTool, file)) { externalTools.add(externalTool); } }); @@ -147,6 +154,31 @@ public static List findExternalToolsByFile(List allE return externalTools; } + public boolean meetsRequirements(ExternalTool externalTool, DataFile dataFile) { + String requirements = externalTool.getRequirements(); + if (requirements == null) { + logger.fine("Data file id" + dataFile.getId() + ": no requirements for tool id " + externalTool.getId()); + return true; + } + boolean meetsRequirements = true; + JsonObject requirementsObj = JsonUtil.getJsonObject(requirements); + JsonArray auxFilesExist = requirementsObj.getJsonArray("auxFilesExist"); + for (JsonValue jsonValue : auxFilesExist) { + String formatTag = jsonValue.asJsonObject().getString("formatTag"); + String formatVersion = jsonValue.asJsonObject().getString("formatVersion"); + AuxiliaryFile auxFile = auxiliaryFileService.lookupAuxiliaryFile(dataFile, formatTag, formatVersion); + if (auxFile == null) { + logger.fine("Data file id" + dataFile.getId() + ": cannot find required aux file. formatTag=" + formatTag + ". formatVersion=" + formatVersion); + meetsRequirements = false; + break; + } else { + logger.fine("Data file id" + dataFile.getId() + ": found required aux file. formatTag=" + formatTag + ". formatVersion=" + formatVersion); + meetsRequirements = true; + } + } + return meetsRequirements; + } + public static ExternalTool parseAddExternalToolManifest(String manifest) { if (manifest == null || manifest.isEmpty()) { @@ -170,6 +202,7 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) { JsonObject toolParametersObj = jsonObject.getJsonObject(TOOL_PARAMETERS); JsonArray queryParams = toolParametersObj.getJsonArray("queryParameters"); JsonArray allowedApiCallsArray = jsonObject.getJsonArray(ALLOWED_API_CALLS); + JsonObject requirementsObj = jsonObject.getJsonObject(REQUIREMENTS); boolean allRequiredReservedWordsFound = false; if (scope.equals(Scope.FILE)) { @@ -227,8 +260,12 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) { if(allowedApiCallsArray !=null) { allowedApiCalls = allowedApiCallsArray.toString(); } + String requirements = null; + if (requirementsObj != null) { + requirements = requirementsObj.toString(); + } - return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, allowedApiCalls); + return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, allowedApiCalls, requirements); } private static String getRequiredTopLevelField(JsonObject jsonObject, String key) { diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java index 5b3e4df331d..402d0d8ef91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java @@ -19,8 +19,8 @@ */ package edu.harvard.iq.dataverse.harvest.client; +import edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler; import java.io.IOException; -import java.io.FileNotFoundException; import java.io.InputStream; import java.io.StringReader; @@ -31,9 +31,14 @@ import java.io.FileOutputStream; import java.io.PrintWriter; -import java.net.HttpURLConnection; +import static java.net.HttpURLConnection.HTTP_OK; import java.net.MalformedURLException; -import java.net.URL; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.Map; +import java.util.Optional; import java.util.zip.GZIPInputStream; import java.util.zip.InflaterInputStream; @@ -84,17 +89,18 @@ public class FastGetRecord { /** * Client-side GetRecord verb constructor * - * @param baseURL the baseURL of the server to be queried + * @param oaiHandler the configured OaiHande running this harvest + * @param identifier Record identifier + * @param httpClient jdk HttpClient used to make http requests * @exception MalformedURLException the baseURL is bad * @exception SAXException the xml response is bad * @exception IOException an I/O error occurred + * @exception TransformerException if it fails to parse the service portion of the record */ - public FastGetRecord(String baseURL, String identifier, String metadataPrefix) - throws IOException, ParserConfigurationException, SAXException, + public FastGetRecord(OaiHandler oaiHandler, String identifier, HttpClient httpClient) throws IOException, ParserConfigurationException, SAXException, TransformerException { - harvestRecord (baseURL, identifier, metadataPrefix); - + harvestRecord (oaiHandler.getBaseOaiUrl(), identifier, oaiHandler.getMetadataPrefix(), oaiHandler.getCustomHeaders(), httpClient); } private String errorMessage = null; @@ -117,57 +123,63 @@ public boolean isDeleted () { } - public void harvestRecord(String baseURL, String identifier, String metadataPrefix) throws IOException, - ParserConfigurationException, SAXException, TransformerException { + public void harvestRecord(String baseURL, String identifier, String metadataPrefix, Map customHeaders, HttpClient httpClient) throws IOException, + ParserConfigurationException, SAXException, TransformerException{ xmlInputFactory = javax.xml.stream.XMLInputFactory.newInstance(); - String requestURL = getRequestURL(baseURL, identifier, metadataPrefix); + InputStream in; + + // This was one other place where the Harvester code was still using + // the obsolete java.net.ttpUrlConnection that didn't get replaced with + // the new java.net.http.HttpClient during the first pas of the XOAI + // rewrite. (L.A.) - InputStream in = null; - URL url = new URL(requestURL); - HttpURLConnection con = null; - int responseCode = 0; - - con = (HttpURLConnection) url.openConnection(); - con.setRequestProperty("User-Agent", "DataverseHarvester/3.0"); - con.setRequestProperty("Accept-Encoding", - "compress, gzip, identify"); - try { - responseCode = con.getResponseCode(); - //logger.debug("responseCode=" + responseCode); - } catch (FileNotFoundException e) { - //logger.info(requestURL, e); - responseCode = HttpURLConnection.HTTP_UNAVAILABLE; - } - - // TODO: -- L.A. - // - // support for cookies; - // support for limited retry attempts -- ? - // implement reading of the stream as filterinputstream -- ? - // -- that could make it a little faster still. -- L.A. - - - - if (responseCode == 200) { - - String contentEncoding = con.getHeaderField("Content-Encoding"); - //logger.debug("contentEncoding=" + contentEncoding); - - // support for the standard compress/gzip/deflate compression - // schemes: - if ("compress".equals(contentEncoding)) { - ZipInputStream zis = new ZipInputStream(con.getInputStream()); - zis.getNextEntry(); - in = zis; - } else if ("gzip".equals(contentEncoding)) { - in = new GZIPInputStream(con.getInputStream()); - } else if ("deflate".equals(contentEncoding)) { - in = new InflaterInputStream(con.getInputStream()); - } else { - in = con.getInputStream(); + if (httpClient == null) { + throw new IOException("Null Http Client, cannot make a GetRecord call to obtain the metadata."); + } + + HttpRequest.Builder requestBuilder = HttpRequest.newBuilder() + .uri(URI.create(requestURL)) + .GET() + .header("User-Agent", "XOAI Service Provider v5 (Dataverse)") + .header("Accept-Encoding", "compress, gzip"); + + if (customHeaders != null) { + for (String headerName : customHeaders.keySet()) { + requestBuilder.header(headerName, customHeaders.get(headerName)); + } + } + + HttpRequest request = requestBuilder.build(); + HttpResponse response; + + try { + response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IOException("Failed to connect to the remote dataverse server to obtain GetRecord metadata"); + } + + int responseCode = response.statusCode(); + + if (responseCode == HTTP_OK) { + InputStream inputStream = response.body(); + Optional contentEncoding = response.headers().firstValue("Content-Encoding"); + + // support for the standard gzip encoding: + in = inputStream; + if (contentEncoding.isPresent()) { + if (contentEncoding.get().equals("compress")) { + ZipInputStream zis = new ZipInputStream(inputStream); + zis.getNextEntry(); + in = zis; + } else if (contentEncoding.get().equals("gzip")) { + in = new GZIPInputStream(inputStream); + } else if (contentEncoding.get().equals("deflate")) { + in = new InflaterInputStream(inputStream); + } } // We are going to read the OAI header and SAX-parse it for the @@ -185,9 +197,7 @@ public void harvestRecord(String baseURL, String identifier, String metadataPref FileOutputStream tempFileStream = null; PrintWriter metadataOut = null; - savedMetadataFile = File.createTempFile("meta", ".tmp"); - - + savedMetadataFile = File.createTempFile("meta", ".tmp"); int mopen = 0; int mclose = 0; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index 058a20451d6..40bd45ecb30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -228,11 +228,9 @@ private void harvestOAI(DataverseRequest dataverseRequest, HarvestingClient harv throw new IOException(errorMessage); } - if (DATAVERSE_PROPRIETARY_METADATA_FORMAT.equals(oaiHandler.getMetadataPrefix())) { - // If we are harvesting native Dataverse json, we'll also need this - // jdk http client to make direct calls to the remote Dataverse API: - httpClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.ALWAYS).build(); - } + // We will use this jdk http client to make direct calls to the remote + // OAI (or remote Dataverse API) to obtain the metadata records + httpClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.ALWAYS).build(); try { for (Iterator
idIter = oaiHandler.runListIdentifiers(); idIter.hasNext();) { @@ -295,7 +293,7 @@ private Long processRecord(DataverseRequest dataverseRequest, Logger hdLogger, P tempFile = retrieveProprietaryDataverseMetadata(httpClient, metadataApiUrl); } else { - FastGetRecord record = oaiHandler.runGetRecord(identifier); + FastGetRecord record = oaiHandler.runGetRecord(identifier, httpClient); errMessage = record.getErrorMessage(); deleted = record.isDeleted(); tempFile = record.getMetadataFile(); @@ -360,7 +358,7 @@ File retrieveProprietaryDataverseMetadata (HttpClient client, String remoteApiUr HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(remoteApiUrl)) .GET() - .header("User-Agent", "DataverseHarvester/6.0") + .header("User-Agent", "XOAI Service Provider v5 (Dataverse)") .build(); HttpResponse response; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index aeb010fad6d..d27ddc41b7f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -234,6 +234,16 @@ public void setMetadataPrefix(String metadataPrefix) { this.metadataPrefix = metadataPrefix; } + private String customHttpHeaders; + + public String getCustomHttpHeaders() { + return customHttpHeaders; + } + + public void setCustomHttpHeaders(String customHttpHeaders) { + this.customHttpHeaders = customHttpHeaders; + } + // TODO: do we need "orphanRemoval=true"? -- L.A. 4.4 // TODO: should it be @OrderBy("startTime")? -- L.A. 4.4 @OneToMany(mappedBy="harvestingClient", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) @@ -345,95 +355,7 @@ public Long getLastDeletedDatasetCount() { return lastNonEmptyHarvest.getDeletedDatasetCount(); } return null; - } - - /* move the fields below to the new HarvestingClientRun class: - private String harvestResult; - - public String getResult() { - return harvestResult; - } - - public void setResult(String harvestResult) { - this.harvestResult = harvestResult; - } - - // "Last Harvest Time" is the last time we *attempted* to harvest - // from this remote resource. - // It wasn't necessarily a successful attempt! - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastHarvestTime; - - public Date getLastHarvestTime() { - return lastHarvestTime; - } - - public void setLastHarvestTime(Date lastHarvestTime) { - this.lastHarvestTime = lastHarvestTime; - } - - // This is the last "successful harvest" - i.e., the last time we - // tried to harvest, and got a response from the remote server. - // We may not have necessarily harvested any useful content though; - // the result may have been a "no content" or "no changes since the last harvest" - // response. - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastSuccessfulHarvestTime; - - public Date getLastSuccessfulHarvestTime() { - return lastSuccessfulHarvestTime; - } - - public void setLastSuccessfulHarvestTime(Date lastSuccessfulHarvestTime) { - this.lastSuccessfulHarvestTime = lastSuccessfulHarvestTime; - } - - // Finally, this is the time stamp from the last "non-empty" harvest. - // I.e. the last time we ran a harvest that actually resulted in - // some Datasets created, updated or deleted: - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastNonEmptyHarvestTime; - - public Date getLastNonEmptyHarvestTime() { - return lastNonEmptyHarvestTime; - } - - public void setLastNonEmptyHarvestTime(Date lastNonEmptyHarvestTime) { - this.lastNonEmptyHarvestTime = lastNonEmptyHarvestTime; - } - - // And these are the Dataset counts from that last "non-empty" harvest: - private Long harvestedDatasetCount; - private Long failedDatasetCount; - private Long deletedDatasetCount; - - public Long getLastHarvestedDatasetCount() { - return harvestedDatasetCount; - } - - public void setHarvestedDatasetCount(Long harvestedDatasetCount) { - this.harvestedDatasetCount = harvestedDatasetCount; - } - - public Long getLastFailedDatasetCount() { - return failedDatasetCount; - } - - public void setFailedDatasetCount(Long failedDatasetCount) { - this.failedDatasetCount = failedDatasetCount; - } - - public Long getLastDeletedDatasetCount() { - return deletedDatasetCount; - } - - public void setDeletedDatasetCount(Long deletedDatasetCount) { - this.deletedDatasetCount = deletedDatasetCount; - } - */ + } private boolean scheduled; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java index c0a039e2d2b..bb3dc06972c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java @@ -5,7 +5,6 @@ import io.gdcc.xoai.model.oaipmh.results.MetadataFormat; import io.gdcc.xoai.model.oaipmh.results.Set; import io.gdcc.xoai.serviceprovider.ServiceProvider; -import io.gdcc.xoai.serviceprovider.client.JdkHttpOaiClient; import io.gdcc.xoai.serviceprovider.exceptions.BadArgumentException; import io.gdcc.xoai.serviceprovider.exceptions.InvalidOAIResponse; import io.gdcc.xoai.serviceprovider.exceptions.NoSetHierarchyException; @@ -15,8 +14,10 @@ import edu.harvard.iq.dataverse.harvest.client.FastGetRecord; import static edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.DATAVERSE_PROPRIETARY_METADATA_API; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import io.gdcc.xoai.serviceprovider.client.JdkHttpOaiClient; import java.io.IOException; import java.io.Serializable; +import java.net.http.HttpClient; import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.lang3.StringUtils; @@ -24,14 +25,18 @@ import javax.xml.transform.TransformerException; import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; +import java.util.logging.Logger; /** * * @author Leonid Andreev */ public class OaiHandler implements Serializable { + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler"); public OaiHandler() { @@ -65,6 +70,8 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException this.fromDate = harvestingClient.getLastNonEmptyHarvestTime(); + this.customHeaders = makeCustomHeaders(harvestingClient.getCustomHttpHeaders()); + this.harvestingClient = harvestingClient; } @@ -74,6 +81,7 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException private String setName; private Date fromDate; private Boolean setListTruncated = false; + private Map customHeaders = null; private ServiceProvider serviceProvider; @@ -119,6 +127,14 @@ public boolean isSetListTruncated() { return setListTruncated; } + public Map getCustomHeaders() { + return this.customHeaders; + } + + public void setCustomHeaders(Map customHeaders) { + this.customHeaders = customHeaders; + } + public ServiceProvider getServiceProvider() throws OaiHandlerException { if (serviceProvider == null) { if (baseOaiUrl == null) { @@ -128,8 +144,15 @@ public ServiceProvider getServiceProvider() throws OaiHandlerException { context.withBaseUrl(baseOaiUrl); context.withGranularity(Granularity.Second); - // builds the client with the default parameters and the JDK http client: - context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(baseOaiUrl).build()); + + JdkHttpOaiClient.Builder xoaiClientBuilder = JdkHttpOaiClient.newBuilder().withBaseUrl(getBaseOaiUrl()); + if (getCustomHeaders() != null) { + for (String headerName : getCustomHeaders().keySet()) { + logger.fine("adding custom header; name: "+headerName+", value: "+getCustomHeaders().get(headerName)); + } + xoaiClientBuilder = xoaiClientBuilder.withCustomHeaders(getCustomHeaders()); + } + context.withOAIClient(xoaiClientBuilder.build()); serviceProvider = new ServiceProvider(context); } @@ -235,7 +258,7 @@ public Iterator
runListIdentifiers() throws OaiHandlerException { } - public FastGetRecord runGetRecord(String identifier) throws OaiHandlerException { + public FastGetRecord runGetRecord(String identifier, HttpClient httpClient) throws OaiHandlerException { if (StringUtils.isEmpty(this.baseOaiUrl)) { throw new OaiHandlerException("Attempted to execute GetRecord without server URL specified."); } @@ -244,7 +267,7 @@ public FastGetRecord runGetRecord(String identifier) throws OaiHandlerException } try { - return new FastGetRecord(this.baseOaiUrl, identifier, this.metadataPrefix); + return new FastGetRecord(this, identifier, httpClient); } catch (ParserConfigurationException pce) { throw new OaiHandlerException("ParserConfigurationException executing GetRecord: "+pce.getMessage()); } catch (SAXException se) { @@ -293,4 +316,28 @@ public void runIdentify() { // (we will need it, both for validating the remote server, // and to learn about its extended capabilities) } + + public Map makeCustomHeaders(String headersString) { + if (headersString != null) { + String[] parts = headersString.split("\\\\n"); + HashMap ret = new HashMap<>(); + logger.info("found "+parts.length+" parts"); + int count = 0; + for (int i = 0; i < parts.length; i++) { + if (parts[i].indexOf(':') > 0) { + String headerName = parts[i].substring(0, parts[i].indexOf(':')); + String headerValue = parts[i].substring(parts[i].indexOf(':')+1).strip(); + + ret.put(headerName, headerValue); + count++; + } + // simply skipping it if malformed; or we could throw an exception - ? + } + if (ret.size() > 0) { + logger.info("returning the array with "+ret.size()+" name/value pairs"); + return ret; + } + } + return null; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java index 6cdc4e5c277..5a8f2f41d31 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java @@ -375,4 +375,16 @@ public List findDeletedOaiRecordsBySetName(String setName) { } } + public Instant getEarliestDate() { + String queryString = "SELECT min(r.lastUpdateTime) FROM OAIRecord r"; + TypedQuery query = em.createQuery(queryString, Date.class); + Date retDate = query.getSingleResult(); + if (retDate != null) { + return retDate.toInstant(); + } + + // if there are no records yet, return the default "now" + return new Date().toInstant(); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index f778fd56644..8840d433ae1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -31,10 +31,13 @@ import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import io.gdcc.xoai.exceptions.OAIException; +import io.gdcc.xoai.model.oaipmh.Granularity; +import io.gdcc.xoai.services.impl.SimpleResumptionTokenFormat; import org.apache.commons.lang3.StringUtils; import java.io.IOException; +import java.time.Instant; import java.util.logging.Logger; import javax.ejb.EJB; import javax.inject.Inject; @@ -123,14 +126,13 @@ public void init(ServletConfig config) throws ServletException { } setRepository = new DataverseXoaiSetRepository(setService); - itemRepository = new DataverseXoaiItemRepository(recordService, datasetService, systemConfig.getDataverseSiteUrl()); + itemRepository = new DataverseXoaiItemRepository(recordService, datasetService, SystemConfig.getDataverseSiteUrlStatic()); repositoryConfiguration = createRepositoryConfiguration(); - xoaiRepository = new Repository() + xoaiRepository = new Repository(repositoryConfiguration) .withSetRepository(setRepository) - .withItemRepository(itemRepository) - .withConfiguration(repositoryConfiguration); + .withItemRepository(itemRepository); dataProvider = new DataProvider(getXoaiContext(), getXoaiRepository()); } @@ -193,23 +195,30 @@ private RepositoryConfiguration createRepositoryConfiguration() { } // The admin email address associated with this installation: // (Note: if the setting does not exist, we are going to assume that they - // have a reason not to want to advertise their email address, so no - // email will be shown in the output of Identify. + // have a reason not to want to configure their email address, if it is + // a developer's instance, for example; or a reason not to want to + // advertise it to the world.) InternetAddress systemEmailAddress = MailUtil.parseSystemAddress(settingsService.getValueForKey(SettingsServiceBean.Key.SystemEmail)); - - RepositoryConfiguration repositoryConfiguration = RepositoryConfiguration.defaults() - .withEnableMetadataAttributes(true) - .withRepositoryName(repositoryName) - .withBaseUrl(systemConfig.getDataverseSiteUrl()+"/oai") + String systemEmailLabel = systemEmailAddress != null ? systemEmailAddress.getAddress() : "donotreply@localhost"; + + RepositoryConfiguration configuration = new RepositoryConfiguration.RepositoryConfigurationBuilder() + .withAdminEmail(systemEmailLabel) .withCompression("gzip") .withCompression("deflate") - .withAdminEmail(systemEmailAddress != null ? systemEmailAddress.getAddress() : null) - .withDeleteMethod(DeletedRecord.TRANSIENT) + .withGranularity(Granularity.Lenient) + .withResumptionTokenFormat(new SimpleResumptionTokenFormat().withGranularity(Granularity.Second)) + .withRepositoryName(repositoryName) + .withBaseUrl(systemConfig.getDataverseSiteUrl()+"/oai") + .withEarliestDate(recordService.getEarliestDate()) .withMaxListIdentifiers(maxListIdentifiers) + .withMaxListSets(maxListSets) .withMaxListRecords(maxListRecords) - .withMaxListSets(maxListSets); + .withDeleteMethod(DeletedRecord.TRANSIENT) + .withEnableMetadataAttributes(true) + .withRequireFromAfterEarliest(false) + .build(); - return repositoryConfiguration; + return configuration; } /** diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java index faf3cf9ddc4..147d42648fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java @@ -49,7 +49,7 @@ public DataverseXoaiItemRepository (OAIRecordServiceBean recordService, DatasetS } @Override - public ItemIdentifier getItem(String identifier) throws IdDoesNotExistException { + public ItemIdentifier getItemIdentifier(String identifier) throws IdDoesNotExistException { // This method is called when ListMetadataFormats request specifies // the identifier, requesting the formats available for this specific record. // In our case, under the current implementation, we need to simply look diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index b03bae618a4..9c6acd964c1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -20,6 +20,8 @@ package edu.harvard.iq.dataverse.ingest; +import edu.harvard.iq.dataverse.AuxiliaryFile; +import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean; import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.datavariable.VariableCategory; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; @@ -72,6 +74,7 @@ //import edu.harvard.iq.dvn.unf.*; import org.dataverse.unf.*; import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -81,6 +84,7 @@ import java.nio.channels.FileChannel; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; +import java.nio.charset.StandardCharsets; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; @@ -113,6 +117,9 @@ import javax.jms.QueueSession; import javax.jms.Message; import javax.faces.application.FacesMessage; +import javax.ws.rs.core.MediaType; +import ucar.nc2.NetcdfFile; +import ucar.nc2.NetcdfFiles; /** * @@ -134,6 +141,8 @@ public class IngestServiceBean { @EJB DataFileServiceBean fileService; @EJB + AuxiliaryFileServiceBean auxiliaryFileService; + @EJB SystemConfig systemConfig; @Resource(lookup = "java:app/jms/queue/ingest") @@ -232,6 +241,9 @@ public List saveAndAddFilesToDataset(DatasetVersion version, savedSuccess = true; logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); + // TODO: reformat this file to remove the many tabs added in cc08330 + extractMetadataNcml(dataFile, tempLocationPath); + } catch (IOException ioex) { logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); } finally { @@ -343,6 +355,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, try { // FITS is the only type supported for metadata // extraction, as of now. -- L.A. 4.0 + // Note that extractMetadataNcml() is used for NetCDF/HDF5. dataFile.setContentType("application/fits"); metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); } catch (IOException mex) { @@ -565,7 +578,6 @@ public int compare(DataFile d1, DataFile d2) { return sb.toString(); } - public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException { /* logger.info("Skipping summary statistics and UNF."); @@ -1206,7 +1218,104 @@ public boolean extractMetadata(String tempFileLocation, DataFile dataFile, Datas return ingestSuccessful; } - + /** + * @param dataFile The DataFile from which to attempt NcML extraction + * (NetCDF or HDF5 format) + * @param tempLocationPath Null if the file is already saved to permanent + * storage. Otherwise, the path to the temp location of the files, as during + * initial upload. + * @return True if the Ncml files was created. False on any error or if the + * NcML file already exists. + */ + public boolean extractMetadataNcml(DataFile dataFile, Path tempLocationPath) { + boolean ncmlFileCreated = false; + logger.fine("extractMetadataNcml: dataFileIn: " + dataFile + ". tempLocationPath: " + tempLocationPath); + InputStream inputStream = null; + String dataFileLocation = null; + if (tempLocationPath != null) { + // This file was just uploaded and hasn't been saved to S3 or local storage. + dataFileLocation = tempLocationPath.toString(); + } else { + // This file is already on S3 or local storage. + File tempFile = null; + File localFile; + StorageIO storageIO; + try { + storageIO = dataFile.getStorageIO(); + storageIO.open(); + if (storageIO.isLocalFile()) { + localFile = storageIO.getFileSystemPath().toFile(); + dataFileLocation = localFile.getAbsolutePath(); + logger.fine("extractMetadataNcml: file is local. Path: " + dataFileLocation); + } else { + // Need to create a temporary local file: + tempFile = File.createTempFile("tempFileExtractMetadataNcml", ".tmp"); + try ( ReadableByteChannel targetFileChannel = (ReadableByteChannel) storageIO.getReadChannel(); FileChannel tempFileChannel = new FileOutputStream(tempFile).getChannel();) { + tempFileChannel.transferFrom(targetFileChannel, 0, storageIO.getSize()); + } + dataFileLocation = tempFile.getAbsolutePath(); + logger.fine("extractMetadataNcml: file is on S3. Downloaded and saved to temp path: " + dataFileLocation); + } + } catch (IOException ex) { + logger.info("While attempting to extract NcML, could not use storageIO for data file id " + dataFile.getId() + ". Exception: " + ex); + } + } + if (dataFileLocation != null) { + try ( NetcdfFile netcdfFile = NetcdfFiles.open(dataFileLocation)) { + logger.fine("trying to open " + dataFileLocation); + if (netcdfFile != null) { + // For now, empty string. What should we pass as a URL to toNcml()? The filename (including the path) most commonly at https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_cookbook.html + // With an empty string the XML will show 'location="file:"'. + String ncml = netcdfFile.toNcml(""); + inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8)); + } else { + logger.info("NetcdfFiles.open() could not open file id " + dataFile.getId() + " (null returned)."); + } + } catch (IOException ex) { + logger.info("NetcdfFiles.open() could not open file id " + dataFile.getId() + ". Exception caught: " + ex); + } + } else { + logger.info("dataFileLocation is null for file id " + dataFile.getId() + ". Can't extract NcML."); + } + if (inputStream != null) { + // If you change NcML, you must also change the previewer. + String formatTag = "NcML"; + // 0.1 is arbitrary. It's our first attempt to put out NcML so we're giving it a low number. + // If you bump the number here, be sure the bump the number in the previewer as well. + // We could use 2.2 here since that's the current version of NcML. + String formatVersion = "0.1"; + String origin = "netcdf-java"; + boolean isPublic = true; + // See also file.auxfiles.types.NcML in Bundle.properties. Used to group aux files in UI. + String type = "NcML"; + // XML because NcML doesn't have its own MIME/content type at https://www.iana.org/assignments/media-types/media-types.xhtml + MediaType mediaType = new MediaType("text", "xml"); + try { + // Let the cascade do the save if the file isn't yet on permanent storage. + boolean callSave = false; + if (tempLocationPath == null) { + callSave = true; + // Check for an existing NcML file + logger.fine("Checking for existing NcML aux file for file id " + dataFile.getId()); + AuxiliaryFile existingAuxiliaryFile = auxiliaryFileService.lookupAuxiliaryFile(dataFile, formatTag, formatVersion); + if (existingAuxiliaryFile != null) { + logger.fine("Aux file already exists for NetCDF/HDF5 file for file id " + dataFile.getId()); + return false; + } + } + AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, callSave); + logger.fine("Aux file extracted from NetCDF/HDF5 file saved to storage (but not to the database yet) from file id " + dataFile.getId()); + ncmlFileCreated = true; + } catch (Exception ex) { + logger.info("exception throw calling processAuxiliaryFile: " + ex); + } + } else { + logger.info("extractMetadataNcml: input stream is null! dataFileLocation was " + dataFileLocation); + } + + return ncmlFileCreated; + } + private void processDatasetMetadata(FileMetadataIngest fileMetadataIngest, DatasetVersion editVersion) throws IOException { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java index c2899b29d1f..6d17a5bd553 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java @@ -31,6 +31,7 @@ import javax.inject.Inject; // Rosuda Wrappers and Methods for R-calls to Rserve +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.rosuda.REngine.REXP; import org.rosuda.REngine.REXPMismatchException; import org.rosuda.REngine.RList; @@ -88,10 +89,10 @@ public class RDATAFileReader extends TabularDataFileReader { static private String RSCRIPT_WRITE_DVN_TABLE = ""; // RServe static variables - private static String RSERVE_HOST = System.getProperty("dataverse.rserve.host"); - private static String RSERVE_USER = System.getProperty("dataverse.rserve.user"); - private static String RSERVE_PASSWORD = System.getProperty("dataverse.rserve.password"); - private static int RSERVE_PORT; + private final String RSERVE_HOST; + private final int RSERVE_PORT; + private final String RSERVE_USER; + private final String RSERVE_PASSWORD; // TODO: // we're not using these time/data formats for anything, are we? @@ -138,24 +139,6 @@ public class RDATAFileReader extends TabularDataFileReader { * This is primarily to construct the R-Script */ static { - /* - * Set defaults fallbacks for class properties - */ - if (RSERVE_HOST == null) - RSERVE_HOST = "localhost"; - - if (RSERVE_USER == null) - RSERVE_USER = "rserve"; - - if (RSERVE_PASSWORD == null) - RSERVE_PASSWORD = "rserve"; - - if (System.getProperty("dataverse.ingest.rserve.port") == null) - RSERVE_PORT = 6311; - else - RSERVE_PORT = Integer.parseInt(System.getProperty("dataverse.rserve.port")); - - // Load R Scripts into memory, so that we can run them via R-serve RSCRIPT_WRITE_DVN_TABLE = readLocalResource("scripts/write.table.R"); RSCRIPT_GET_DATASET = readLocalResource("scripts/get.dataset.R"); @@ -451,7 +434,20 @@ public RDATAFileReader(TabularDataFileReaderSpi originator) { super(originator); - + // These settings have sane defaults in resources/META-INF/microprofile-config.properties, + // ready to be overridden by a sysadmin. Every time a file would be read with this file reader, + // a new reader will be created, reading from the cached config source settings with minimal overhead. + this.RSERVE_HOST = JvmSettings.RSERVE_HOST.lookup(); + int port; + try { + port = JvmSettings.RSERVE_PORT.lookup(Integer.class); + } catch (IllegalArgumentException e) { + LOG.log(Level.SEVERE, "Could not parse value for " + JvmSettings.RSERVE_PORT.getScopedKey() + ", defaulting to 6311", e); + port = 6311; + } + this.RSERVE_PORT = port; + this.RSERVE_USER = JvmSettings.RSERVE_USER.lookup(); + this.RSERVE_PASSWORD = JvmSettings.RSERVE_PASSWORD.lookup(); LOG.fine("RDATAFileReader: INSIDE RDATAFileReader"); diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 0189faf6598..50c8c4098a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -513,7 +513,9 @@ public JsonArray fileDownloads(String yyyymm, Dataverse d, boolean uniqueCounts) for (Object[] result : results) { JsonObjectBuilder job = Json.createObjectBuilder(); job.add(MetricsUtil.ID, (int) result[0]); - job.add(MetricsUtil.PID, (String) result[1]); + if(result[1]!=null) { + job.add(MetricsUtil.PID, (String) result[1]); + } job.add(MetricsUtil.COUNT, (long) result[2]); jab.add(job); } diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java index 90b61bcb29c..72d8f5402bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java @@ -227,7 +227,9 @@ public static JsonArray timeSeriesByIDAndPIDToJson(List results) { JsonObjectBuilder job = Json.createObjectBuilder(); job.add(MetricsUtil.DATE, date); job.add(ID, id); - job.add(PID, pids.get(id)); + if(pids.get(id)!=null) { + job.add(PID, pids.get(id)); + } job.add(COUNT, totals.get(id)); jab.add(job); } diff --git a/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java b/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java index f13b6f11434..df2e44ecb27 100644 --- a/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java +++ b/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java @@ -41,6 +41,7 @@ import java.util.Set; import java.util.logging.Logger; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.RandomStringUtils; @@ -72,57 +73,33 @@ public class RemoteDataFrameService { private static String TMP_TABDATA_FILE_EXT = ".tab"; private static String TMP_RDATA_FILE_EXT = ".RData"; - - private static String RSERVE_HOST = null; - private static String RSERVE_USER = null; - private static String RSERVE_PWD = null; - private static int RSERVE_PORT = -1; + + // These settings have sane defaults in resources/META-INF/microprofile-config.properties, + // ready to be overridden by a sysadmin + private final String RSERVE_HOST; + private final String RSERVE_USER; + private final String RSERVE_PWD; + private final int RSERVE_PORT; + private final String RSERVE_TMP_DIR; private static String DATAVERSE_R_FUNCTIONS = "scripts/dataverse_r_functions.R"; private static String DATAVERSE_R_PREPROCESSING = "scripts/preprocess.R"; - - public static String LOCAL_TEMP_DIR = System.getProperty("java.io.tmpdir"); - public static String RSERVE_TMP_DIR=null; public String PID = null; public String tempFileNameIn = null; public String tempFileNameOut = null; - - static { - - RSERVE_TMP_DIR = System.getProperty("dataverse.rserve.tempdir"); - - if (RSERVE_TMP_DIR == null){ - RSERVE_TMP_DIR = "/tmp/"; - } - - RSERVE_HOST = System.getProperty("dataverse.rserve.host"); - if (RSERVE_HOST == null){ - RSERVE_HOST= "localhost"; - } - - RSERVE_USER = System.getProperty("dataverse.rserve.user"); - if (RSERVE_USER == null){ - RSERVE_USER= "rserve"; - } - - RSERVE_PWD = System.getProperty("dataverse.rserve.password"); - if (RSERVE_PWD == null){ - RSERVE_PWD= "rserve"; - } - - - if (System.getProperty("dataverse.rserve.port") == null ){ - RSERVE_PORT= 6311; - } else { - RSERVE_PORT = Integer.parseInt(System.getProperty("dataverse.rserve.port")); - } - - } - - public RemoteDataFrameService() { + // These settings have sane defaults in resources/META-INF/microprofile-config.properties, + // ready to be overridden by a sysadmin. Config sources have their own caches, so adding + // these here means the setting can be changed dynamically without too much overhead. + this.RSERVE_HOST = JvmSettings.RSERVE_HOST.lookup(); + this.RSERVE_USER = JvmSettings.RSERVE_USER.lookup(); + this.RSERVE_PWD = JvmSettings.RSERVE_PASSWORD.lookup(); + this.RSERVE_PORT = JvmSettings.RSERVE_PORT.lookup(Integer.class); + this.RSERVE_TMP_DIR = JvmSettings.RSERVE_TEMPDIR.lookup(); + + // initialization PID = RandomStringUtils.randomNumeric(6); @@ -703,15 +680,12 @@ public Map runDataFrameRequest(RJobRequest jobRequest, RConnecti public File transferRemoteFile(RConnection connection, String targetFilename, String tmpFilePrefix, String tmpFileExt, int fileSize) { - // set up a local temp file: - + // set up a local temp file: File tmpResultFile = null; - String resultFile = tmpFilePrefix + PID + "." + tmpFileExt; - RFileInputStream rInStream = null; OutputStream outbr = null; try { - tmpResultFile = new File(LOCAL_TEMP_DIR, resultFile); + tmpResultFile = File.createTempFile(tmpFilePrefix + PID, "."+tmpFileExt); outbr = new BufferedOutputStream(new FileOutputStream(tmpResultFile)); // open the input stream rInStream = connection.openFile(targetFilename); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java b/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java index a7a89def449..ef37569ac54 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java @@ -111,7 +111,8 @@ private String constructDatasetQuery() { List queryStrings = new ArrayList<>(); for (DatasetFieldType dsfType : metadataFieldList) { if (dsfType.getSearchValue() != null && !dsfType.getSearchValue().equals("")) { - queryStrings.add(constructQuery(dsfType.getSolrField().getNameSearchable(), dsfType.getSearchValue())); + //CVoc fields return term URIs - add quotes around them to avoid solr breaking them into individual search words + queryStrings.add(constructQuery(dsfType.getSolrField().getNameSearchable(), dsfType.getSearchValue(), getCVocConf().containsKey(dsfType.getId()))); } else if (dsfType.getListValues() != null && !dsfType.getListValues().isEmpty()) { List listQueryStrings = new ArrayList<>(); for (String value : dsfType.getListValues()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 4661e9c1cd5..e73cce8acbe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -31,6 +31,7 @@ import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; @@ -88,6 +89,8 @@ import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.sax.BodyContentHandler; +import org.eclipse.microprofile.config.Config; +import org.eclipse.microprofile.config.ConfigProvider; import org.xml.sax.ContentHandler; @Stateless @@ -95,6 +98,7 @@ public class IndexServiceBean { private static final Logger logger = Logger.getLogger(IndexServiceBean.class.getCanonicalName()); + private static final Config config = ConfigProvider.getConfig(); @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; @@ -155,13 +159,18 @@ public class IndexServiceBean { public static final String HARVESTED = "Harvested"; private String rootDataverseName; private Dataverse rootDataverseCached; - private SolrClient solrServer; + SolrClient solrServer; private VariableMetadataUtil variableMetadataUtil; @PostConstruct public void init() { - String urlString = "http://" + systemConfig.getSolrHostColonPort() + "/solr/collection1"; + // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with + // resources/META-INF/microprofile-config.properties. + String protocol = JvmSettings.SOLR_PROT.lookup(); + String path = JvmSettings.SOLR_PATH.lookup(); + + String urlString = protocol + "://" + systemConfig.getSolrHostColonPort() + path; solrServer = new HttpSolrClient.Builder(urlString).build(); rootDataverseName = findRootDataverseCached().getName(); @@ -1488,6 +1497,7 @@ private List findAllLinkingDataverses(DvObject dvObject){ dataset = (Dataset) dvObject; linkingDataverses = dsLinkingService.findLinkingDataverses(dataset.getId()); ancestorList = dataset.getOwner().getOwners(); + ancestorList.add(dataset.getOwner()); //to show dataset in linking dv when parent dv is linked } if(dvObject.isInstanceofDataverse()){ dv = (Dataverse) dvObject; @@ -1652,6 +1662,11 @@ private List retrieveDVOPaths(DvObject dvo) { logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex); } List dataversePaths = getDataversePathsFromSegments(dataverseSegments); + if (dataversePaths.size() > 0 && dvo.isInstanceofDataverse()) { + // removing the dataverse's own id from the paths + // fixes bug where if my parent dv was linked my dv was shown as linked to myself + dataversePaths.remove(dataversePaths.size() - 1); + } /* add linking paths */ diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java index 8a1045a842c..adcc5825766 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java @@ -117,6 +117,10 @@ public static String determineFinalQuery(String userSuppliedQuery) { } public static String constructQuery(String solrField, String userSuppliedQuery) { + return constructQuery(solrField, userSuppliedQuery, false); + } + + public static String constructQuery(String solrField, String userSuppliedQuery, boolean addQuotes) { StringBuilder queryBuilder = new StringBuilder(); String delimiter = "[\"]+"; @@ -134,7 +138,12 @@ public static String constructQuery(String solrField, String userSuppliedQuery) } else { StringTokenizer st = new StringTokenizer(userSuppliedQuery); while (st.hasMoreElements()) { - queryStrings.add(solrField + ":" + st.nextElement()); + String nextElement = (String) st.nextElement(); + //Entries such as URIs will get tokenized into individual words by solr unless they are in quotes + if(addQuotes) { + nextElement = "\"" + nextElement + "\""; + } + queryStrings.add(solrField + ":" + nextElement); } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java index f00ece9aacc..0dc2fe08b54 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java @@ -5,16 +5,18 @@ */ package edu.harvard.iq.dataverse.search; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SystemConfig; -import java.io.IOException; -import java.util.logging.Logger; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.impl.HttpSolrClient; + import javax.annotation.PostConstruct; import javax.annotation.PreDestroy; import javax.ejb.EJB; import javax.ejb.Singleton; import javax.inject.Named; -import org.apache.solr.client.solrj.SolrClient; -import org.apache.solr.client.solrj.impl.HttpSolrClient; +import java.io.IOException; +import java.util.logging.Logger; /** * @@ -38,9 +40,13 @@ public class SolrClientService { @PostConstruct public void init() { - String urlString = "http://" + systemConfig.getSolrHostColonPort() + "/solr/collection1"; - solrClient = new HttpSolrClient.Builder(urlString).build(); + // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with + // resources/META-INF/microprofile-config.properties. + String protocol = JvmSettings.SOLR_PROT.lookup(); + String path = JvmSettings.SOLR_PATH.lookup(); + String urlString = protocol + "://" + systemConfig.getSolrHostColonPort() + path; + solrClient = new HttpSolrClient.Builder(urlString).build(); } @PreDestroy diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index ef4422e8d89..5856004ce53 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -10,9 +10,7 @@ import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; -import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -36,9 +34,7 @@ public class SolrIndexServiceBean { private static final Logger logger = Logger.getLogger(SolrIndexServiceBean.class.getCanonicalName()); - - @EJB - SystemConfig systemConfig; + @EJB DvObjectServiceBean dvObjectService; @EJB diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index e409607346b..ed3a161075b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -41,6 +41,28 @@ public enum JvmSettings { // GENERAL SETTINGS VERSION(PREFIX, "version"), BUILD(PREFIX, "build"), + FQDN(PREFIX, "fqdn"), + SITE_URL(PREFIX, "siteUrl"), + + // FILES SETTINGS + SCOPE_FILES(PREFIX, "files"), + FILES_DIRECTORY(SCOPE_FILES, "directory"), + + // SOLR INDEX SETTINGS + SCOPE_SOLR(PREFIX, "solr"), + SOLR_HOST(SCOPE_SOLR, "host"), + SOLR_PORT(SCOPE_SOLR, "port"), + SOLR_PROT(SCOPE_SOLR, "protocol"), + SOLR_CORE(SCOPE_SOLR, "core"), + SOLR_PATH(SCOPE_SOLR, "path"), + + // RSERVE CONNECTION + SCOPE_RSERVE(PREFIX, "rserve"), + RSERVE_HOST(SCOPE_RSERVE, "host"), + RSERVE_PORT(SCOPE_RSERVE, "port", "dataverse.ingest.rserve.port"), + RSERVE_USER(SCOPE_RSERVE, "user"), + RSERVE_PASSWORD(SCOPE_RSERVE, "password"), + RSERVE_TEMPDIR(SCOPE_RSERVE, "tempdir"), // API SETTINGS SCOPE_API(PREFIX, "api"), diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 102772bdcf3..d84e18d5931 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -174,7 +174,12 @@ public enum Key { * */ SearchRespectPermissionRoot, - /** Solr hostname and port, such as "localhost:8983". */ + /** + * Solr hostname and port, such as "localhost:8983". + * @deprecated New installations should not use this database setting, but use {@link JvmSettings#SOLR_HOST} + * and {@link JvmSettings#SOLR_PORT}. + */ + @Deprecated(forRemoval = true, since = "2022-12-23") SolrHostColonPort, /** Enable full-text indexing in solr up to max file size */ SolrFullTextIndexing, //true or false (default) @@ -563,11 +568,16 @@ Whether Harvesting (OAI) service is enabled /* * Allow a custom JavaScript to control values of specific fields. */ - ControlledVocabularyCustomJavaScript, + ControlledVocabularyCustomJavaScript, /** * A compound setting for disabling signup for remote Auth providers: */ - AllowRemoteAuthSignUp + AllowRemoteAuthSignUp, + /** + * The URL for the DvWebLoader tool (see github.com/gdcc/dvwebloader for details) + */ + WebloaderUrl + ; @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 630f192890b..800c05ae6dc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -16,9 +16,13 @@ // HINT: PGSimpleDataSource would work too, but as we use a connection pool, go with a javax.sql.ConnectionPoolDataSource // HINT: PGXADataSource is unnecessary (no distributed transactions used) and breaks ingest. className = "org.postgresql.ds.PGConnectionPoolDataSource", - user = "${MPCONFIG=dataverse.db.user}", + + // BEWARE: as this resource is created before defaults are read from META-INF/microprofile-config.properties, + // defaults must be provided in this Payara-proprietary manner. + user = "${MPCONFIG=dataverse.db.user:dataverse}", password = "${MPCONFIG=dataverse.db.password}", - url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host}:${MPCONFIG=dataverse.db.port}/${MPCONFIG=dataverse.db.name}", + url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host:localhost}:${MPCONFIG=dataverse.db.port:5432}/${MPCONFIG=dataverse.db.name:dataverse}?${MPCONFIG=dataverse.db.parameters:}", + // If we ever need to change these pool settings, we need to remove this class and create the resource // from web.xml. We can use MicroProfile Config in there for these values, impossible to do in the annotation. // @@ -30,18 +34,30 @@ maxPoolSize = 100, // "The number of seconds that a physical connection should remain unused in the pool before the connection is closed for a connection pool. " // Payara DataSourceDefinitionDeployer default value = 300 (seconds) - maxIdleTime = 300) -// It's possible to add additional properties like this... -// -//properties = { -// "fish.payara.log-jdbc-calls=true" -//}) -// -// ... but at this time we don't think we need any. The full list -// of properties can be found at https://docs.payara.fish/community/docs/5.2021.6/documentation/payara-server/jdbc/advanced-connection-pool-properties.html#full-list-of-properties -// -// All these properties cannot be configured via MPCONFIG as Payara doesn't support this (yet). To be enhanced. -// See also https://github.com/payara/Payara/issues/5024 + maxIdleTime = 300, + + // Set more options via MPCONFIG, including defaults where applicable. + // TODO: Future versions of Payara might support setting integer properties like pool size, + // idle times, etc in a Payara-propietary way. See https://github.com/payara/Payara/pull/5272 + properties = { + // The following options are documented here: + // https://docs.payara.fish/community/docs/documentation/payara-server/jdbc/advanced-connection-pool-properties.html + // VALIDATION + "fish.payara.is-connection-validation-required=${MPCONFIG=dataverse.db.is-connection-validation-required:false}", + "fish.payara.connection-validation-method=${MPCONFIG=dataverse.db.connection-validation-method:}", + "fish.payara.validation-table-name=${MPCONFIG=dataverse.db.validation-table-name:}", + "fish.payara.validation-classname=${MPCONFIG=dataverse.db.validation-classname:}", + "fish.payara.validate-atmost-once-period-in-seconds=${MPCONFIG=dataverse.db.validate-atmost-once-period-in-seconds:0}", + // LEAK DETECTION + "fish.payara.connection-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.connection-leak-timeout-in-seconds:0}", + "fish.payara.connection-leak-reclaim=${MPCONFIG=dataverse.db.connection-leak-reclaim:false}", + "fish.payara.statement-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-leak-timeout-in-seconds:0}", + "fish.payara.statement-leak-reclaim=${MPCONFIG=dataverse.db.statement-leak-reclaim:false}", + // LOGGING, SLOWNESS, PERFORMANCE + "fish.payara.statement-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-timeout-in-seconds:-1}", + "fish.payara.slow-query-threshold-in-seconds=${MPCONFIG=dataverse.db.slow-query-threshold-in-seconds:-1}", + "fish.payara.log-jdbc-calls=${MPCONFIG=dataverse.db.log-jdbc-calls:false}" + }) public class DataSourceProducer { @Resource(lookup = "java:app/jdbc/dataverse") diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 257bc166ea0..c600abfd409 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -40,6 +40,7 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; import edu.harvard.iq.dataverse.ingest.IngestableDataChecker; import edu.harvard.iq.dataverse.license.License; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.file.BagItFileHandler; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory; @@ -1436,11 +1437,8 @@ public static boolean canIngestAsTabular(String mimeType) { } public static String getFilesTempDirectory() { - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } - + + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); String filesTempDirectory = filesRootDirectory + "/temp"; if (!Files.exists(Paths.get(filesTempDirectory))) { @@ -2142,7 +2140,9 @@ public static String jsonArrayOfObjectsToCSV(JsonArray jsonArray, String... head JsonObject jo = (JsonObject) jv; String[] values = new String[headers.length]; for (int i = 0; i < headers.length; i++) { - values[i] = jo.get(headers[i]).toString(); + if(jo.containsKey(headers[i])) { + values[i] = jo.get(headers[i]).toString(); + } } csvSB.append("\n").append(String.join(",", values)); }); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java index d64a1f7cce1..72980c3451a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java @@ -34,8 +34,12 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti List rootDvNameAsList = Arrays.asList(BrandingUtil.getInstallationBrandName()); String datasetDisplayName = ""; - if (objectOfNotification != null && (objectOfNotification instanceof Dataset) ) { - datasetDisplayName = ((Dataset)objectOfNotification).getDisplayName(); + if (objectOfNotification != null) { + if (objectOfNotification instanceof Dataset) { + datasetDisplayName = ((Dataset) objectOfNotification).getDisplayName(); + } else if (objectOfNotification instanceof DatasetVersion) { + datasetDisplayName = ((DatasetVersion) objectOfNotification).getDataset().getDisplayName(); + } } switch (userNotification.getType()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java new file mode 100644 index 00000000000..da33fc9597e --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -0,0 +1,155 @@ +package edu.harvard.iq.dataverse.util; + +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Logger; + +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; +import javax.json.JsonString; + +import edu.harvard.iq.dataverse.export.openaire.Cleanup; +import edu.harvard.iq.dataverse.export.openaire.FirstNames; +import edu.harvard.iq.dataverse.export.openaire.Organizations; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; + +/** + * + * @author qqmyers + * + * Adapted from earlier code in OpenAireExportUtil + * + * Implements an algorithm derived from code at DataCite to determine + * whether a name is that of a Person or Organization and, if the + * former, to pull out the given and family names. + * + * Adds parameters that can improve accuracy: + * + * * e.g. for curated repositories, allowing the code to assume that all + * Person entries are in , order. + * + * * allow local configuration of specific words/phrases that will + * automatically categorize one-off cases that the algorithm would + * otherwise mis-categorize. For example, the code appears to not + * recognize names ending in "Project" as an Organization. + * + */ + +public class PersonOrOrgUtil { + + private static final Logger logger = Logger.getLogger(PersonOrOrgUtil.class.getCanonicalName()); + + static boolean assumeCommaInPersonName = false; + static List orgPhrases; + + static { + setAssumeCommaInPersonName(Boolean.parseBoolean(System.getProperty("dataverse.personOrOrg.assumeCommaInPersonName", "false"))); + setOrgPhraseArray(System.getProperty("dataverse.personOrOrg.orgPhraseArray", null)); + } + + /** + * This method tries to determine if a name belongs to a person or an + * organization and, if it is a person, what the given and family names are. The + * core algorithm is adapted from a Datacite algorithm, see + * https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 + * + * @param name + * - the name to test + * @param organizationIfTied + * - if a given name isn't found, should the name be assumed to be + * from an organization. This could be a generic true/false or + * information from some non-name aspect of the entity, e.g. which + * field is in use, or whether a .edu email exists, etc. + * @param isPerson + * - if this is known to be a person due to other info (i.e. they + * have an ORCID). In this case the algorithm is just looking for + * given/family names. + * @return + */ + public static JsonObject getPersonOrOrganization(String name, boolean organizationIfTied, boolean isPerson) { + name = Cleanup.normalize(name); + + String givenName = null; + String familyName = null; + + boolean isOrganization = !isPerson && Organizations.getInstance().isOrganization(name); + if (!isOrganization) { + for (String phrase : orgPhrases) { + if (name.contains(phrase)) { + isOrganization = true; + break; + } + } + } + if (name.contains(",")) { + givenName = FirstNames.getInstance().getFirstName(name); + // contributorName=, + if (givenName != null && !isOrganization) { + // givenName ok + isOrganization = false; + // contributor_map.put("nameType", "Personal"); + if (!name.replaceFirst(",", "").contains(",")) { + // contributorName=, + String[] fullName = name.split(", "); + givenName = fullName[1]; + familyName = fullName[0]; + } + } else if (isOrganization || organizationIfTied) { + isOrganization = true; + givenName = null; + } + + } else { + if (assumeCommaInPersonName && !isPerson) { + isOrganization = true; + } else { + givenName = FirstNames.getInstance().getFirstName(name); + + if (givenName != null && !isOrganization) { + isOrganization = false; + if (givenName.length() + 1 < name.length()) { + familyName = name.substring(givenName.length() + 1); + } + } else { + // default + if (isOrganization || organizationIfTied) { + isOrganization = true; + givenName=null; + } + } + } + } + JsonObjectBuilder job = new NullSafeJsonBuilder(); + job.add("fullName", name); + job.add("givenName", givenName); + job.add("familyName", familyName); + job.add("isPerson", !isOrganization); + return job.build(); + + } + + // Public for testing + public static void setOrgPhraseArray(String phraseArray) { + orgPhrases = new ArrayList(); + if (!StringUtil.isEmpty(phraseArray)) { + try { + JsonArray phrases = JsonUtil.getJsonArray(phraseArray); + phrases.forEach(val -> { + JsonString strVal = (JsonString) val; + orgPhrases.add(strVal.getString()); + }); + } catch (Exception e) { + logger.warning("Could not parse Org phrase list"); + } + } + + } + + // Public for testing + public static void setAssumeCommaInPersonName(boolean assume) { + assumeCommaInPersonName = assume; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 80af2df081c..c989add6e3d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -1,18 +1,26 @@ package edu.harvard.iq.dataverse.util; import com.ocpsoft.pretty.PrettyContext; - import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; +import org.passay.CharacterRule; + +import javax.ejb.EJB; +import javax.ejb.Stateless; +import javax.inject.Named; +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonReader; +import javax.json.JsonString; +import javax.json.JsonValue; import java.io.StringReader; import java.net.InetAddress; import java.net.UnknownHostException; @@ -23,25 +31,12 @@ import java.util.List; import java.util.Map; import java.util.MissingResourceException; -import java.util.Properties; +import java.util.Optional; import java.util.ResourceBundle; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.json.Json; -import javax.json.JsonArray; -import javax.json.JsonObject; -import javax.json.JsonReader; -import javax.json.JsonString; -import javax.json.JsonValue; - -import org.passay.CharacterRule; -import org.apache.commons.io.IOUtils; - /** * System-wide configuration */ @@ -61,28 +56,7 @@ public class SystemConfig { AuthenticationServiceBean authenticationService; public static final String DATAVERSE_PATH = "/dataverse/"; - - /** - * A JVM option for the advertised fully qualified domain name (hostname) of - * the Dataverse installation, such as "dataverse.example.com", which may - * differ from the hostname that the server knows itself as. - * - * The equivalent in DVN 3.x was "dvn.inetAddress". - */ - public static final String FQDN = "dataverse.fqdn"; - - /** - * A JVM option for specifying the "official" URL of the site. - * Unlike the FQDN option above, this would be a complete URL, - * with the protocol, port number etc. - */ - public static final String SITE_URL = "dataverse.siteUrl"; - - /** - * A JVM option for where files are stored on the file system. - */ - public static final String FILES_DIRECTORY = "dataverse.files.directory"; - + /** * Some installations may not want download URLs to their files to be * available in Schema.org JSON-LD output. @@ -95,12 +69,6 @@ public class SystemConfig { */ private static final String PASSWORD_RESET_TIMEOUT_IN_MINUTES = "dataverse.auth.password-reset-timeout-in-minutes"; - /** - * A common place to find the String for a sane Solr hostname:port - * combination. - */ - private String saneDefaultForSolrHostColonPort = "localhost:8983"; - /** * The default number of datafiles that we allow to be created through * zip file upload. @@ -109,9 +77,8 @@ public class SystemConfig { public static final long defaultZipDownloadLimit = 104857600L; // 100MB private static final int defaultMultipleUploadFilesLimit = 1000; private static final int defaultLoginSessionTimeout = 480; // = 8 hours - - private static String appVersionString = null; - private static String buildNumberString = null; + + private String buildNumber = null; private static final String JVM_TIMER_SERVER_OPTION = "dataverse.timerServer"; @@ -132,137 +99,60 @@ public String getVersion() { // candidate for being moved into some kind of an application-scoped caching // service... some CachingService @Singleton - ? (L.A. 5.8) public String getVersion(boolean withBuildNumber) { - - if (appVersionString == null) { - - // The Version Number is no longer supplied in a .properties file - so - // we can't just do - // return BundleUtil.getStringFromBundle("version.number", null, ResourceBundle.getBundle("VersionNumber", Locale.US)); - // - // Instead, we'll rely on Maven placing the version number into the - // Manifest, and getting it from there: - // (this is considered a better practice, and will also allow us - // to maintain this number in only one place - the pom.xml file) - // -- L.A. 4.0.2 - - // One would assume, that once the version is in the MANIFEST.MF, - // as Implementation-Version:, it would be possible to obtain - // said version simply as - // appVersionString = getClass().getPackage().getImplementationVersion(); - // alas - that's not working, for whatever reason. (perhaps that's - // only how it works with jar-ed packages; not with .war files). - // People on the interwebs suggest that one should instead - // open the Manifest as a resource, then extract its attributes. - // There were some complications with that too. Plus, relying solely - // on the MANIFEST.MF would NOT work for those of the developers who - // are using "in place deployment" (i.e., where - // Netbeans runs their builds directly from the local target - // directory, bypassing the war file deployment; and the Manifest - // is only available in the .war file). For that reason, I am - // going to rely on the pom.properties file, and use java.util.Properties - // to read it. We have to look for this file in 2 different places - // depending on whether this is a .war file deployment, or a - // developers build. (the app-level META-INF is only populated when - // a .war file is built; the "maven-archiver" directory, on the other - // hand, is only available when it's a local build deployment). - // So, long story short, I'm resorting to the convoluted steps below. - // It may look hacky, but it should actually be pretty solid and - // reliable. - - - // First, find the absolute path url of the application persistence file - // always supplied with the Dataverse app: - java.net.URL fileUrl = Thread.currentThread().getContextClassLoader().getResource("META-INF/persistence.xml"); - String filePath = null; - - - if (fileUrl != null) { - filePath = fileUrl.getFile(); - if (filePath != null) { - InputStream mavenPropertiesInputStream = null; - String mavenPropertiesFilePath; - Properties mavenProperties = new Properties(); - - - filePath = filePath.replaceFirst("/[^/]*$", "/"); - // Using a relative path, find the location of the maven pom.properties file. - // First, try to look for it in the app-level META-INF. This will only be - // available if it's a war file deployment: - mavenPropertiesFilePath = filePath.concat("../../../META-INF/maven/edu.harvard.iq/dataverse/pom.properties"); - - try { - mavenPropertiesInputStream = new FileInputStream(mavenPropertiesFilePath); - } catch (IOException ioex) { - // OK, let's hope this is a local dev. build. - // In that case the properties file should be available in - // the maven-archiver directory: - - mavenPropertiesFilePath = filePath.concat("../../../../maven-archiver/pom.properties"); - - // try again: - - try { - mavenPropertiesInputStream = new FileInputStream(mavenPropertiesFilePath); - } catch (IOException ioex2) { - logger.warning("Failed to find and/or open for reading the pom.properties file."); - mavenPropertiesInputStream = null; - } - } - - if (mavenPropertiesInputStream != null) { - try { - mavenProperties.load(mavenPropertiesInputStream); - appVersionString = mavenProperties.getProperty("version"); - } catch (IOException ioex) { - logger.warning("caught IOException trying to read and parse the pom properties file."); - } finally { - IOUtils.closeQuietly(mavenPropertiesInputStream); - } - } - - } else { - logger.warning("Null file path representation of the location of persistence.xml in the webapp root directory!"); - } - } else { - logger.warning("Could not find the location of persistence.xml in the webapp root directory!"); - } - - - if (appVersionString == null) { - // still null? - defaulting to 4.0: - appVersionString = "4.0"; - } - } + // Retrieve the version via MPCONFIG + // NOTE: You may override the version via all methods of MPCONFIG. + // It will default to read from microprofile-config.properties source, + // which contains in the source a Maven property reference to ${project.version}. + // When packaging the app to deploy it, Maven will replace this, rendering it a static entry. + String appVersion = JvmSettings.VERSION.lookup(); if (withBuildNumber) { - if (buildNumberString == null) { - // (build number is still in a .properties file in the source tree; it only - // contains a real build number if this war file was built by - // Jenkins) - + if (buildNumber == null) { + // (build number is still in a .properties file in the source tree; it only + // contains a real build number if this war file was built by Jenkins) + // TODO: might be replaced with same trick as for version via Maven property w/ empty default try { - buildNumberString = ResourceBundle.getBundle("BuildNumber").getString("build.number"); + buildNumber = ResourceBundle.getBundle("BuildNumber").getString("build.number"); } catch (MissingResourceException ex) { - buildNumberString = null; + buildNumber = null; + } + + // Also try to read the build number via MicroProfile Config if not already present from the + // properties file (so can be overridden by env var or other source) + if (buildNumber == null || buildNumber.isEmpty()) { + buildNumber = JvmSettings.BUILD.lookupOptional().orElse(""); } } - if (buildNumberString != null && !buildNumberString.equals("")) { - return appVersionString + " build " + buildNumberString; - } - } + if (!buildNumber.equals("")) { + return appVersion + " build " + buildNumber; + } + } - return appVersionString; + return appVersion; } - + + /** + * Retrieve the Solr endpoint in "host:port" form, to be used with a Solr client. + * + * This will retrieve the setting from either the database ({@link SettingsServiceBean.Key#SolrHostColonPort}) or + * via Microprofile Config API (properties {@link JvmSettings#SOLR_HOST} and {@link JvmSettings#SOLR_PORT}). + * + * A database setting always takes precedence. If not given via other config sources, a default from + * resources/META-INF/microprofile-config.properties is used. (It's possible to use profiles.) + * + * @return Solr endpoint as string "hostname:port" + */ public String getSolrHostColonPort() { - String SolrHost; - if ( System.getenv("SOLR_SERVICE_HOST") != null && System.getenv("SOLR_SERVICE_HOST") != ""){ - SolrHost = System.getenv("SOLR_SERVICE_HOST"); - } - else SolrHost = saneDefaultForSolrHostColonPort; - String solrHostColonPort = settingsService.getValueForKey(SettingsServiceBean.Key.SolrHostColonPort, SolrHost); - return solrHostColonPort; + // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with + // resources/META-INF/microprofile-config.properties. + // NOTE: containers should use system property mp.config.profile=ct to use sane container usage default + String host = JvmSettings.SOLR_HOST.lookup(); + String port = JvmSettings.SOLR_PORT.lookup(); + + // DB setting takes precedence over all. If not present, will return default from above. + return Optional.ofNullable(settingsService.getValueForKey(SettingsServiceBean.Key.SolrHostColonPort)) + .orElse(host + ":" + port); } public boolean isProvCollectionEnabled() { @@ -340,32 +230,58 @@ public static int getMinutesUntilPasswordResetTokenExpires() { } /** - * The "official", designated URL of the site; - * can be defined as a complete URL; or derived from the - * "official" hostname. If none of these options is set, - * defaults to the InetAddress.getLocalHOst() and https; - * These are legacy JVM options. Will be eventualy replaced - * by the Settings Service configuration. + * Lookup (or construct) the designated URL of this instance from configuration. + * + * Can be defined as a complete URL via dataverse.siteUrl; or derived from the hostname + * dataverse.fqdn and HTTPS. If none of these options is set, defaults to the + * {@link InetAddress#getLocalHost} and HTTPS. + * + * NOTE: This method does not provide any validation. + * TODO: The behaviour of this method is subject to a later change, see + * https://github.com/IQSS/dataverse/issues/6636 + * + * @return The designated URL of this instance as per configuration. */ public String getDataverseSiteUrl() { return getDataverseSiteUrlStatic(); } + /** + * Lookup (or construct) the designated URL of this instance from configuration. + * + * Can be defined as a complete URL via dataverse.siteUrl; or derived from the hostname + * dataverse.fqdn and HTTPS. If none of these options is set, defaults to the + * {@link InetAddress#getLocalHost} and HTTPS. + * + * NOTE: This method does not provide any validation. + * TODO: The behaviour of this method is subject to a later change, see + * https://github.com/IQSS/dataverse/issues/6636 + * + * @return The designated URL of this instance as per configuration. + */ public static String getDataverseSiteUrlStatic() { - String hostUrl = System.getProperty(SITE_URL); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; + // If dataverse.siteUrl has been configured, simply return it + Optional siteUrl = JvmSettings.SITE_URL.lookupOptional(); + if (siteUrl.isPresent()) { + return siteUrl.get(); } - String hostName = System.getProperty(FQDN); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } + + // Otherwise try to lookup dataverse.fqdn setting and default to HTTPS + Optional fqdn = JvmSettings.FQDN.lookupOptional(); + if (fqdn.isPresent()) { + return "https://" + fqdn.get(); + } + + // Last resort - get the servers local name and use it. + // BEWARE - this is dangerous. + // 1) A server might have a different name than your repository URL. + // 2) The underlying reverse DNS lookup might point to a different name than your repository URL. + // 3) If this server has multiple IPs assigned, which one will it be for the lookup? + try { + return "https://" + InetAddress.getLocalHost().getCanonicalHostName(); + } catch (UnknownHostException e) { + return null; } - hostUrl = "https://" + hostName; - return hostUrl; } /** @@ -375,22 +291,6 @@ public String getPageURLWithQueryString() { return PrettyContext.getCurrentInstance().getRequestURL().toURL() + PrettyContext.getCurrentInstance().getRequestQueryString().toQueryString(); } - /** - * The "official" server's fully-qualified domain name: - */ - public String getDataverseServer() { - // still reliese on a JVM option: - String fqdn = System.getProperty(FQDN); - if (fqdn == null) { - try { - fqdn = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } - } - return fqdn; - } - public String getGuidesBaseUrl() { String saneDefault = "https://guides.dataverse.org"; String guidesBaseUrl = settingsService.getValueForKey(SettingsServiceBean.Key.GuidesBaseUrl, saneDefault); @@ -862,7 +762,13 @@ public enum FileUploadMethods { * Upload through Globus of large files */ - GLOBUS("globus") + GLOBUS("globus"), + + /** + * Upload folders of files through dvwebloader app + */ + + WEBLOADER("dvwebloader"); ; @@ -999,6 +905,10 @@ public boolean isRsyncUpload(){ public boolean isGlobusUpload(){ return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), true); } + + public boolean isWebloaderUpload(){ + return getMethodAvailable(FileUploadMethods.WEBLOADER.toString(), true); + } // Controls if HTTP upload is enabled for both GUI and API. public boolean isHTTPUpload(){ diff --git a/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java new file mode 100644 index 00000000000..c2d9bf67236 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java @@ -0,0 +1,36 @@ +package edu.harvard.iq.dataverse.util; + +import java.util.Date; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map.Entry; +import java.util.logging.Logger; + +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpSession; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetPage; +import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; + +public class WebloaderUtil { + + private static final Logger logger = Logger.getLogger(WebloaderUtil.class.getCanonicalName()); + + /** + * Create the URL required to launch https://github.com/gdcc/dvwebloader + */ + public static String getWebloaderUrl(Dataset d, ApiToken apiToken, String localeCode, String baseUrl) { + // Use URLTokenUtil for params currently in common with external tools. + URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode); + String appUrl; + appUrl = baseUrl + + "?datasetPid={datasetPid}&siteUrl={siteUrl}&key={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + return tokenUtil.replaceTokensWithValues(appUrl); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 905479c4e0d..22e2c6c8d78 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -908,6 +908,7 @@ public String parseHarvestingClient(JsonObject obj, HarvestingClient harvestingC harvestingClient.setArchiveDescription(obj.getString("archiveDescription", null)); harvestingClient.setMetadataPrefix(obj.getString("metadataFormat",null)); harvestingClient.setHarvestingSet(obj.getString("set",null)); + harvestingClient.setCustomHttpHeaders(obj.getString("customHeaders", null)); return dataverseAlias; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index dc547f2e52c..9f5401f77d1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -37,6 +37,7 @@ import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.globus.FileDetailsHolder; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.DatasetFieldWalker; @@ -579,6 +580,7 @@ public static JsonObjectBuilder json(FileMetadata fmd) { // in a sense that there's no longer the category field in the // fileMetadata object; but there are now multiple, oneToMany file // categories - and we probably need to export them too!) -- L.A. 4.5 + // DONE: catgegories by name .add("description", fmd.getDescription()) .add("label", fmd.getLabel()) // "label" is the filename .add("restricted", fmd.isRestricted()) @@ -616,13 +618,13 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { // (TODO...? L.A. 4.5, Aug 7 2016) String fileName = null; - if (fileMetadata != null) { - fileName = fileMetadata.getLabel(); - } else if (df.getFileMetadata() != null) { + if (fileMetadata == null){ // Note that this may not necessarily grab the file metadata from the // version *you want*! (L.A.) - fileName = df.getFileMetadata().getLabel(); + fileMetadata = df.getFileMetadata(); } + + fileName = fileMetadata.getLabel(); String pidURL = ""; @@ -639,7 +641,8 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { .add("filename", fileName) .add("contentType", df.getContentType()) .add("filesize", df.getFilesize()) - .add("description", df.getDescription()) + .add("description", fileMetadata.getDescription()) + .add("categories", getFileCategories(fileMetadata)) .add("embargo", embargo) //.add("released", df.isReleased()) //.add("restricted", df.isRestricted()) @@ -666,6 +669,32 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { ; } + public static JsonObjectBuilder json(HarvestingClient harvestingClient) { + if (harvestingClient == null) { + return null; + } + + return jsonObjectBuilder().add("nickName", harvestingClient.getName()). + add("dataverseAlias", harvestingClient.getDataverse().getAlias()). + add("type", harvestingClient.getHarvestType()). + add("style", harvestingClient.getHarvestStyle()). + add("harvestUrl", harvestingClient.getHarvestingUrl()). + add("archiveUrl", harvestingClient.getArchiveUrl()). + add("archiveDescription", harvestingClient.getArchiveDescription()). + add("metadataFormat", harvestingClient.getMetadataPrefix()). + add("set", harvestingClient.getHarvestingSet()). + add("schedule", harvestingClient.isScheduled() ? harvestingClient.getScheduleDescription() : "none"). + add("status", harvestingClient.isHarvestingNow() ? "inProgress" : "inActive"). + add("customHeaders", harvestingClient.getCustomHttpHeaders()). + add("lastHarvest", harvestingClient.getLastHarvestTime() == null ? null : harvestingClient.getLastHarvestTime().toString()). + add("lastResult", harvestingClient.getLastResult()). + add("lastSuccessful", harvestingClient.getLastSuccessfulHarvestTime() == null ? null : harvestingClient.getLastSuccessfulHarvestTime().toString()). + add("lastNonEmpty", harvestingClient.getLastNonEmptyHarvestTime() == null ? null : harvestingClient.getLastNonEmptyHarvestTime().toString()). + add("lastDatasetsHarvested", harvestingClient.getLastHarvestedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastHarvestedDatasetCount().toString()). + add("lastDatasetsDeleted", harvestingClient.getLastDeletedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastDeletedDatasetCount().toString()). + add("lastDatasetsFailed", harvestingClient.getLastFailedDatasetCount()); // == null ? "N/A" : harvestingClient.getLastFailedDatasetCount().toString()); + } + public static String format(Date d) { return (d == null) ? null : Util.getDateTimeFormat().format(d); } @@ -702,7 +731,7 @@ public static JsonArrayBuilder getTabularFileTags(DataFile df) { } return tabularTags; } - + private static class DatasetFieldsToJson implements DatasetFieldWalker.Listener { Deque objectStack = new LinkedList<>(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index ef506990f69..d02099eddb5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -63,8 +63,8 @@ public static javax.json.JsonObject getJsonObject(String serializedJson) { return Json.createReader(rdr).readObject(); } } - - public static JsonArray getJsonArray(String serializedJson) { + + public static javax.json.JsonArray getJsonArray(String serializedJson) { try (StringReader rdr = new StringReader(serializedJson)) { return Json.createReader(rdr).readArray(); } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index e8238e79267..45807dc7cde 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -232,12 +232,12 @@ notification.access.revoked.datafile=You have been removed from a role in {0}. notification.checksumfail=One or more files in your upload failed checksum validation for dataset {1}. Please re-run the upload script. If the problem persists, please contact support. notification.ingest.completed=Your Dataset {2} has one or more tabular files that completed the tabular ingest process. These files will be available for download in their original formats and other formats for enhanced archival purposes after you publish the dataset. The archival .tab files are displayed in the file table. Please see the guides for more information about ingest and support for tabular files. notification.ingest.completedwitherrors=Your Dataset {2} has one or more tabular files that have been uploaded successfully but are not supported for tabular ingest. After you publish the dataset, these files will not have additional archival features. Please see the guides for more information about ingest and support for tabular files.

Files with incomplete ingest:{5} -notification.mail.import.filesystem=Globus transfer to Dataset {2} ({0}/dataset.xhtml?persistentId={1}) was successful. File(s) have been uploaded and verified. +notification.mail.import.filesystem=Dataset {2} ({0}/dataset.xhtml?persistentId={1}) has been successfully uploaded and verified. notification.mail.globus.upload.completed=Globus transfer to Dataset {2} was successful. File(s) have been uploaded and verified.

{3}
notification.mail.globus.download.completed=Globus transfer of file(s) from the dataset {2} was successful.

{3}
notification.mail.globus.upload.completedWithErrors=Globus transfer to Dataset {2} is complete with errors.

{3}
notification.mail.globus.download.completedWithErrors=Globus transfer from the dataset {2} is complete with errors.

{3}
-notification.import.filesystem=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. +notification.import.filesystem=Dataset {1} has been successfully uploaded and verified. notification.globus.upload.completed=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. notification.globus.download.completed=Globus transfer from the dataset {1} was successful. notification.globus.upload.completedWithErrors=Globus transfer to Dataset {1} is complete with errors. @@ -538,6 +538,10 @@ harvestclients.newClientDialog.nickname.helptext=Consists of letters, digits, un harvestclients.newClientDialog.nickname.required=Client nickname cannot be empty! harvestclients.newClientDialog.nickname.invalid=Client nickname can contain only letters, digits, underscores (_) and dashes (-); and must be at most 30 characters. harvestclients.newClientDialog.nickname.alreadyused=This nickname is already used. +harvestclients.newClientDialog.customHeader=Custom HTTP Header +harvestclients.newClientDialog.customHeader.helptext=(Optional) Custom HTTP header to add to requests, if required by this OAI server. +harvestclients.newClientDialog.customHeader.watermark=Enter an http header, as in header-name: header-value +harvestclients.newClientDialog.customHeader.invalid=Client header name can only contain letters, digits, underscores (_) and dashes (-); the entire header string must be in the form of "header-name: header-value" harvestclients.newClientDialog.type=Server Protocol harvestclients.newClientDialog.type.helptext=Only the OAI server protocol is currently supported. harvestclients.newClientDialog.type.OAI=OAI @@ -1523,7 +1527,7 @@ dataset.subjectDisplay.title=Subject dataset.contact.tip=Use email button above to contact. dataset.asterisk.tip=Asterisks indicate required fields dataset.message.uploadFiles.label=Upload Dataset Files -dataset.message.uploadFilesSingle.message=All file types are supported for upload and download in their original format. If you are uploading Excel, CSV, TSV, RData, Stata, or SPSS files, see the guides for tabular support and limitations. +dataset.message.uploadFilesSingle.message=All file types are supported for upload and download in their original format. If you are uploading Excel, CSV, TSV, RData, Stata, or SPSS files, see the guides for tabular support and limitations. dataset.message.uploadFilesMultiple.message=Multiple file upload/download methods are available for this dataset. Once you upload a file using one of these methods, your choice will be locked in for this dataset. dataset.message.editMetadata.label=Edit Dataset Metadata dataset.message.editMetadata.message=Add more metadata about this dataset to help others easily find it. @@ -1673,6 +1677,10 @@ file.finishGlobus=Globus Transfer has finished file.downloadFromGlobus=Download through Globus file.globus.transfer=Globus Transfer file.globus.of=of: +file.fromWebloader.tip=Upload a folder of files. This method retains the relative path structure from your local machine. (Using it will cancel any other types of uploads in progress on this page.) +file.fromWebloaderAfterCreate.tip=An option to upload a folder of files will be enabled after this dataset is created. +file.fromWebloader=Upload a Folder + file.api.httpDisabled=File upload via HTTP is not available for this installation of Dataverse. file.api.alreadyHasPackageFile=File upload via HTTP disabled since this dataset already contains a package file. file.replace.original=Original File @@ -2007,6 +2015,7 @@ file.remotelyStored=This file is stored remotely - click for more info file.auxfiles.download.header=Download Auxiliary Files # These types correspond to the AuxiliaryFile.Type enum. file.auxfiles.types.DP=Differentially Private Statistics +file.auxfiles.types.NcML=XML from NetCDF/HDF5 (NcML) # Add more types here file.auxfiles.unspecifiedTypes=Other Auxiliary Files @@ -2547,6 +2556,7 @@ admin.api.deleteUser.success=Authenticated User {0} deleted. #Files.java files.api.metadata.update.duplicateFile=Filename already exists at {0} +files.api.no.draft=No draft available for this file #Datasets.java datasets.api.updatePIDMetadata.failure.dataset.must.be.released=Modify Registration Metadata must be run on a published dataset. diff --git a/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties b/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties index c93bb56151f..97b2eed111c 100644 --- a/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties +++ b/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties @@ -3,6 +3,7 @@ ado=application/x-stata-ado dbf=application/dbf dcm=application/dicom docx=application/vnd.openxmlformats-officedocument.wordprocessingml.document +eln=application/vnd.eln+zip emf=application/x-emf geojson=application/geo+json h5=application/x-h5 diff --git a/src/main/java/propertyFiles/MimeTypeDisplay.properties b/src/main/java/propertyFiles/MimeTypeDisplay.properties index 928419c0405..295ac226fa1 100644 --- a/src/main/java/propertyFiles/MimeTypeDisplay.properties +++ b/src/main/java/propertyFiles/MimeTypeDisplay.properties @@ -169,6 +169,7 @@ application/x-7z-compressed=7Z Archive application/x-xz=XZ Archive application/warc=Web Archive application/x-iso9660-image=Optical Disc Image +application/vnd.eln+zip=ELN Archive # Image image/gif=GIF Image image/jpeg=JPEG Image diff --git a/src/main/java/propertyFiles/MimeTypeFacets.properties b/src/main/java/propertyFiles/MimeTypeFacets.properties index 2cac63a7ad0..aaab66f20ae 100644 --- a/src/main/java/propertyFiles/MimeTypeFacets.properties +++ b/src/main/java/propertyFiles/MimeTypeFacets.properties @@ -170,6 +170,7 @@ application/x-7z-compressed=Archive application/x-xz=Archive application/warc=Archive application/x-iso9660-image=Archive +application/vnd.eln+zip=Archive # Image image/gif=Image image/jpeg=Image @@ -224,4 +225,4 @@ text/xml-graphml=Network Data # Other application/octet-stream=Unknown # Dataverse-specific -application/vnd.dataverse.file-package=Data \ No newline at end of file +application/vnd.dataverse.file-package=Data diff --git a/src/main/java/propertyFiles/astrophysics.properties b/src/main/java/propertyFiles/astrophysics.properties index a49b8b66510..6e04bac590f 100644 --- a/src/main/java/propertyFiles/astrophysics.properties +++ b/src/main/java/propertyFiles/astrophysics.properties @@ -50,9 +50,9 @@ datasetfieldtype.coverage.SkyFraction.description=The fraction of the sky repres datasetfieldtype.coverage.Polarization.description=The polarization coverage datasetfieldtype.redshiftType.description=RedshiftType string C "Redshift"; or "Optical" or "Radio" definitions of Doppler velocity used in the data object. datasetfieldtype.resolution.Redshift.description=The resolution in redshift (unitless) or Doppler velocity (km/s) in the data object. -datasetfieldtype.coverage.RedshiftValue.description=The value of the redshift (unitless) or Doppler velocity (km/s in the data object. -datasetfieldtype.coverage.Redshift.MinimumValue.description=The minimum value of the redshift (unitless) or Doppler velocity (km/s in the data object. -datasetfieldtype.coverage.Redshift.MaximumValue.description=The maximum value of the redshift (unitless) or Doppler velocity (km/s in the data object. +datasetfieldtype.coverage.RedshiftValue.description=The value of the redshift (unitless) or Doppler velocity (km/s) in the data object. +datasetfieldtype.coverage.Redshift.MinimumValue.description=The minimum value of the redshift (unitless) or Doppler velocity (km/s) in the data object. +datasetfieldtype.coverage.Redshift.MaximumValue.description=The maximum value of the redshift (unitless) or Doppler velocity (km/s) in the data object. datasetfieldtype.astroType.watermark= datasetfieldtype.astroFacility.watermark= datasetfieldtype.astroInstrument.watermark= @@ -102,4 +102,4 @@ controlledvocabulary.astroType.observation=Observation controlledvocabulary.astroType.object=Object controlledvocabulary.astroType.value=Value controlledvocabulary.astroType.valuepair=ValuePair -controlledvocabulary.astroType.survey=Survey \ No newline at end of file +controlledvocabulary.astroType.survey=Survey diff --git a/src/main/java/propertyFiles/codeMeta20.properties b/src/main/java/propertyFiles/codeMeta20.properties new file mode 100644 index 00000000000..c0e7eac6d4a --- /dev/null +++ b/src/main/java/propertyFiles/codeMeta20.properties @@ -0,0 +1,85 @@ +metadatablock.name=codeMeta20 +metadatablock.displayName=Software Metadata (CodeMeta 2.0) +datasetfieldtype.codeVersion.title=Software Version +datasetfieldtype.codeVersion.description=Version of the software instance, usually following some convention like SemVer etc. +datasetfieldtype.codeVersion.watermark=e.g. 0.2.1 or 1.3 or 2021.1 etc +datasetfieldtype.developmentStatus.title=Development Status +datasetfieldtype.developmentStatus.description=Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information. +datasetfieldtype.developmentStatus.watermark= Development Status +datasetfieldtype.codeRepository.title=Code Repository +datasetfieldtype.codeRepository.description=Link to the repository where the un-compiled, human-readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.). +datasetfieldtype.codeRepository.watermark=e.g. https://github.com/user/project +datasetfieldtype.applicationCategory.title=Application Category +datasetfieldtype.applicationCategory.description=Type of software application, e.g. Simulation, Analysis, Visualisation. +datasetfieldtype.applicationCategory.watermark= +datasetfieldtype.applicationSubCategory.title=Application Subcategory +datasetfieldtype.applicationSubCategory.description=Subcategory of the application, e.g. Arcade Game. +datasetfieldtype.applicationSubCategory.watermark= +datasetfieldtype.programmingLanguage.title=Programming Language +datasetfieldtype.programmingLanguage.description=The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) +datasetfieldtype.programmingLanguage.watermark= +datasetfieldtype.runtimePlatform.title=Runtime Platform +datasetfieldtype.runtimePlatform.description=Runtime platform or script interpreter dependencies (e.g. Java 11, Python 3.10 or .Net Framework 4.8). +datasetfieldtype.runtimePlatform.watermark=e.g. Python 3.10 +datasetfieldtype.operatingSystem.title=Operating Systems +datasetfieldtype.operatingSystem.description=Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). +datasetfieldtype.operatingSystem.watermark= +datasetfieldtype.targetProduct.title=Target Product +datasetfieldtype.targetProduct.description=Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. +datasetfieldtype.targetProduct.watermark= +datasetfieldtype.buildInstructions.title=Build Instructions +datasetfieldtype.buildInstructions.description=Link to installation instructions/documentation +datasetfieldtype.buildInstructions.watermark=e.g. https://github.com/user/project/blob/main/BUILD.md +datasetfieldtype.softwareRequirementsItem.title=Software Requirements +datasetfieldtype.softwareRequirementsItem.description=Required software dependencies +datasetfieldtype.softwareRequirementsItem.watermark= +datasetfieldtype.softwareRequirements.title=Name & Version +datasetfieldtype.softwareRequirements.description=Name and version of the required software/library dependency +datasetfieldtype.softwareRequirements.watermark=e.g. Pandas 1.4.3 +datasetfieldtype.softwareRequirementsInfoUrl.title=Info URL +datasetfieldtype.softwareRequirementsInfoUrl.description=Link to required software/library homepage or documentation (ideally also versioned) +datasetfieldtype.softwareRequirementsInfoUrl.watermark=e.g. https://pandas.pydata.org/pandas-docs/version/1.4.3 +datasetfieldtype.softwareSuggestionsItem.title=Software Suggestions +datasetfieldtype.softwareSuggestionsItem.description=Optional dependencies, e.g. for optional features, code development, etc. +datasetfieldtype.softwareSuggestionsItem.watermark= +datasetfieldtype.softwareSuggestions.title=Name & Version +datasetfieldtype.softwareSuggestions.description=Name and version of the optional software/library dependency +datasetfieldtype.softwareSuggestions.watermark=e.g. Sphinx 5.0.2 +datasetfieldtype.softwareSuggestionsInfoUrl.title=Info URL +datasetfieldtype.softwareSuggestionsInfoUrl.description=Link to optional software/library homepage or documentation (ideally also versioned) +datasetfieldtype.softwareSuggestionsInfoUrl.watermark=e.g. https://www.sphinx-doc.org +datasetfieldtype.memoryRequirements.title=Memory Requirements +datasetfieldtype.memoryRequirements.description=Minimum memory requirements. +datasetfieldtype.memoryRequirements.watermark= +datasetfieldtype.processorRequirements.title=Processor Requirements +datasetfieldtype.processorRequirements.description=Processor architecture or other CPU requirements to run the application (e.g. IA64). +datasetfieldtype.processorRequirements.watermark= +datasetfieldtype.storageRequirements.title=Storage Requirements +datasetfieldtype.storageRequirements.description=Minimum storage requirements (e.g. free space required). +datasetfieldtype.storageRequirements.watermark= +datasetfieldtype.permissions.title=Permissions +datasetfieldtype.permissions.description=Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). +datasetfieldtype.permissions.watermark= +datasetfieldtype.softwareHelp.title=Software Help/Documentation +datasetfieldtype.softwareHelp.description=Link to help texts or documentation +datasetfieldtype.softwareHelp.watermark=e.g. https://user.github.io/project/docs +datasetfieldtype.readme.title=Readme +datasetfieldtype.readme.description=Link to the README of the project +datasetfieldtype.readme.watermark=e.g. https://github.com/user/project/blob/main/README.md +datasetfieldtype.releaseNotes.title=Release Notes +datasetfieldtype.releaseNotes.description=Link to release notes +datasetfieldtype.releaseNotes.watermark=e.g. https://github.com/user/project/blob/main/docs/release-0.1.md +datasetfieldtype.contIntegration.title=Continuous Integration +datasetfieldtype.contIntegration.description=Link to continuous integration service +datasetfieldtype.contIntegration.watermark=e.g. https://github.com/user/project/actions +datasetfieldtype.issueTracker.title=Issue Tracker +datasetfieldtype.issueTracker.description=Link to software bug reporting or issue tracking system +datasetfieldtype.issueTracker.watermark=e.g. https://github.com/user/project/issues +controlledvocabulary.developmentStatus.concept=Concept +controlledvocabulary.developmentStatus.wip=WIP +controlledvocabulary.developmentStatus.active=Active +controlledvocabulary.developmentStatus.inactive=Inactive +controlledvocabulary.developmentStatus.unsupported=Unsupported +controlledvocabulary.developmentStatus.moved=Moved +controlledvocabulary.developmentStatus.suspended=Suspended +controlledvocabulary.developmentStatus.abandoned=Abandoned diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index be02bb1b090..58592775a98 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -3,11 +3,36 @@ dataverse.version=${project.version} dataverse.build= +# Default only for containers! (keep mimicking the current behaviour - +# changing that is part of https://github.com/IQSS/dataverse/issues/6636) +%ct.dataverse.fqdn=localhost +%ct.dataverse.siteUrl=http://${dataverse.fqdn}:8080 + +# FILES +dataverse.files.directory=/tmp/dataverse + +# SEARCH INDEX +dataverse.solr.host=localhost +# Activating mp config profile -Dmp.config.profile=ct changes default to "solr" as DNS name +%ct.dataverse.solr.host=solr +dataverse.solr.port=8983 +dataverse.solr.protocol=http +dataverse.solr.core=collection1 +dataverse.solr.path=/solr/${dataverse.solr.core} + # DATABASE dataverse.db.host=localhost dataverse.db.port=5432 dataverse.db.user=dataverse dataverse.db.name=dataverse + +# RSERVE +dataverse.rserve.host=localhost +dataverse.rserve.port=6311 +dataverse.rserve.user=rserve +dataverse.rserve.password=rserve +dataverse.rserve.tempdir=/tmp/Rserv + # OAI SERVER dataverse.oai.server.maxidentifiers=100 dataverse.oai.server.maxrecords=10 diff --git a/src/main/resources/db/migration/V5.13.0.1__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.1__8671-sorting_licenses.sql rename to src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql diff --git a/src/main/resources/db/migration/V5.13.0.2__7715-signed-urls-for-tools.sql b/src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.2__7715-signed-urls-for-tools.sql rename to src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql diff --git a/src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql b/src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql new file mode 100644 index 00000000000..91ab5253f9c --- /dev/null +++ b/src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql @@ -0,0 +1 @@ +ALTER TABLE guestbookresponse SET (autovacuum_analyze_scale_factor = 0.01); \ No newline at end of file diff --git a/src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql b/src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql new file mode 100644 index 00000000000..48230d21032 --- /dev/null +++ b/src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql @@ -0,0 +1 @@ +ALTER TABLE externaltool ADD COLUMN IF NOT EXISTS requirements TEXT; diff --git a/src/main/resources/db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql b/src/main/resources/db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql new file mode 100644 index 00000000000..fe6d717b2a3 --- /dev/null +++ b/src/main/resources/db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql @@ -0,0 +1 @@ +ALTER TABLE harvestingclient ADD COLUMN IF NOT EXISTS customhttpheaders TEXT; diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index 8b5c86b9c1c..86e52092622 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -25,6 +25,7 @@ +
-
+ or !empty termsOfUseAndAccess.studyCompletion}">
  diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 4d5e0850083..6b91f815d9a 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -846,6 +846,7 @@ + diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 0fd5bf48fb7..a4e635b8c14 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -158,8 +158,13 @@ widgetVar="fileUploadWidget"> - -
+ +
+

#{bundle['file.fromWebloader.tip']}

+

#{bundle['file.fromWebloaderAfterCreate.tip']}

+ +
+

#{bundle['file.fromDropbox.tip']}

-
+

#{bundle['file.fromGlobus.tip']}

#{bundle['file.fromGlobusAfterCreate.tip']}

- +
@@ -578,7 +583,8 @@

#{EditDatafilesPage.warningMessageForFileTypeDifferentPopUp}

-
diff --git a/src/main/webapp/file.xhtml b/src/main/webapp/file.xhtml index 098fdd46a39..6196780aa82 100644 --- a/src/main/webapp/file.xhtml +++ b/src/main/webapp/file.xhtml @@ -475,16 +475,17 @@ #{bundle['file.metadata.filetags']} - -
- - + +
+ + - - + + +
- + diff --git a/src/main/webapp/harvestclients.xhtml b/src/main/webapp/harvestclients.xhtml index 5c7b3482ed3..3c09ed4ecb0 100644 --- a/src/main/webapp/harvestclients.xhtml +++ b/src/main/webapp/harvestclients.xhtml @@ -277,6 +277,23 @@
+ + +
+ +
+ + + +

#{bundle['harvestclients.newClientDialog.customHeader.helptext']}

+
+
diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index c184c46cee9..a0e81f2a8df 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -884,7 +884,6 @@ div.panel-body.read-terms{max-height:220px; overflow-y:scroll; width:100%; backg .dropin-btn-status.ui-icon {background: url("https://www.dropbox.com/static/images/widgets/dbx-saver-status.png") no-repeat;} .globus-btn.ui-icon {background: url("https://docs.globus.org/images/home/transfer.png") no-repeat;background-size:contain;display:inline-block;} - /* VERSIONS */ div[id$="versionsTable"] th.col-select-width * {display:none;} #version-details-block > div {width:35%; padding:4px 10px; border: 1px solid #DDDDDD;} @@ -928,6 +927,9 @@ thead.ui-datatable-scrollable-theadclone {display:none} #file-title-block span.glyphicon-lock, #file-title-block span.icon-unlock {top:1px;} #file-title-block span.ingest-warning {vertical-align:text-bottom;font-size:30px;} #fileCategoriesBlock span.label {margin-right: 0.5em;} +#file-tags .label { + display:inline-block; +} /* BLOCK UI */ .ui-blockui.ui-widget-overlay {background:#fff; opacity:.6;} diff --git a/src/main/webapp/resources/js/fileupload.js b/src/main/webapp/resources/js/fileupload.js index 564239ee7ef..a478235c09f 100644 --- a/src/main/webapp/resources/js/fileupload.js +++ b/src/main/webapp/resources/js/fileupload.js @@ -144,6 +144,7 @@ var fileUpload = class fileUploadClass { async doUpload() { this.state = UploadState.UPLOADING; var thisFile = curFile-1; + this.id=thisFile; //This appears to be the earliest point when the file table has been populated, and, since we don't know how many table entries have had ids added already, we check var filerows = $('.ui-fileupload-files .ui-fileupload-row'); //Add an id attribute to each entry so we can later match progress and errors with the right entry @@ -318,7 +319,7 @@ var fileUpload = class fileUploadClass { if (directUploadReport) { getMD5(this.file, prog => { var current = 1 + prog; - $('progress').attr({ + $('[upid="' + this.id + '"] progress').attr({ value: current, max: 2 }); diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml index f70356aa393..a6e344afb8c 100644 --- a/src/main/webapp/search-include-fragment.xhtml +++ b/src/main/webapp/search-include-fragment.xhtml @@ -594,7 +594,15 @@
- + + + + + + + + +
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsTest.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsTest.java new file mode 100644 index 00000000000..fded590d9db --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsTest.java @@ -0,0 +1,58 @@ +package edu.harvard.iq.dataverse.api; + +import org.junit.Test; +import java.util.HashSet; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class DatasetsTest { + + /** + * Test cleanup filter + */ + @Test + public void testCleanup() { + Set datasetFiles = new HashSet<>() { + { + add("1837fda0b6c-90779481d439"); + add("1837fda0e17-4b0926f6d44e"); + add("1837fda1b80-46a899909269"); + } + }; + Set filesOnDrive = new HashSet<>() { + { + add("1837fda0b6c-90779481d439"); + add("1837fda0e17-4b0926f6d44e"); + add("1837fda1b80-46a899909269"); + add("prefix_1837fda0b6c-90779481d439"); + add("1837fda0e17-4b0926f6d44e_suffix"); + add("1837fda1b80-extra-46a899909269"); + add("1837fda0e17-4b0926f6d44e.aux"); + add("1837fda1994-5f74d57e6e47"); + add("1837fda17ce-d7b9987fc6e9"); + add("18383198c49-aeda08ccffff"); + add("prefix_1837fda1994-5f74d57e6e47"); + add("1837fda17ce-d7b9987fc6e9_suffix"); + add("18383198c49-extra-aeda08ccffff"); + add("some_other_file"); + add("1837fda17ce-d7b9987fc6e9.aux"); + add("18383198c49.aeda08ccffff"); + add("1837fda17ce-d7b9987fc6xy"); + } + }; + + Predicate toDeleteFilesFilter = Datasets.getToDeleteFilesFilter(datasetFiles); + Set deleted = filesOnDrive.stream().filter(toDeleteFilesFilter).collect(Collectors.toSet()); + + assertEquals(5, deleted.size()); + assertTrue(deleted.contains("1837fda1994-5f74d57e6e47")); + assertTrue(deleted.contains("1837fda17ce-d7b9987fc6e9")); + assertTrue(deleted.contains("18383198c49-aeda08ccffff")); + assertTrue(deleted.contains("1837fda17ce-d7b9987fc6e9_suffix")); + assertTrue(deleted.contains("1837fda17ce-d7b9987fc6e9.aux")); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java index 5508a6c57dc..cdebeddb7bc 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java @@ -3,8 +3,11 @@ import com.jayway.restassured.RestAssured; import com.jayway.restassured.path.json.JsonPath; import com.jayway.restassured.response.Response; +import java.io.File; import java.io.IOException; import java.io.StringReader; +import java.nio.file.Path; +import java.nio.file.Paths; import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonObject; @@ -442,4 +445,122 @@ public void createToolSpreadsheetViewer() { .statusCode(OK.getStatusCode()); } + @Test + public void testFileLevelToolWithAuxFileReq() throws IOException { + + // Delete all external tools before testing. + Response getTools = UtilIT.getExternalTools(); + getTools.prettyPrint(); + getTools.then().assertThat() + .statusCode(OK.getStatusCode()); + String body = getTools.getBody().asString(); + JsonReader bodyObject = Json.createReader(new StringReader(body)); + JsonArray tools = bodyObject.readObject().getJsonArray("data"); + for (int i = 0; i < tools.size(); i++) { + JsonObject tool = tools.getJsonObject(i); + int id = tool.getInt("id"); + Response deleteExternalTool = UtilIT.deleteExternalTool(id); + deleteExternalTool.prettyPrint(); + } + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + createUser.then().assertThat() + .statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + + // Not really an HDF5 file. Just random bytes. But the file extension makes it detected as HDF5. + Path pathToFalseHdf5 = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "false.hdf5"); + byte[] bytes = {1, 2, 3, 4, 5}; + java.nio.file.Files.write(pathToFalseHdf5, bytes); + + Response uploadFalseHdf5 = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFalseHdf5.toString(), apiToken); + uploadFalseHdf5.prettyPrint(); + uploadFalseHdf5.then().assertThat() + .statusCode(OK.getStatusCode()); + + Integer falseHdf5 = JsonPath.from(uploadFalseHdf5.getBody().asString()).getInt("data.files[0].dataFile.id"); + + String pathToTrueHdf5 = "src/test/resources/hdf/hdf5/vlen_string_dset"; + Response uploadTrueHdf5 = UtilIT.uploadFileViaNative(datasetId.toString(), pathToTrueHdf5, apiToken); + uploadTrueHdf5.prettyPrint(); + uploadTrueHdf5.then().assertThat() + .statusCode(OK.getStatusCode()); + + Integer trueHdf5 = JsonPath.from(uploadTrueHdf5.getBody().asString()).getInt("data.files[0].dataFile.id"); + + JsonObjectBuilder job = Json.createObjectBuilder(); + job.add("displayName", "HDF5 Tool"); + job.add("description", "Operates on HDF5 files"); + job.add("types", Json.createArrayBuilder().add("preview")); + job.add("scope", "file"); + job.add("contentType", "application/x-hdf5"); + job.add("toolUrl", "/dataexplore/dataverse-previewers/previewers/v1.3/TextPreview.html"); + job.add("toolParameters", Json.createObjectBuilder() + .add("queryParameters", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("fileid", "{fileId}") + .build()) + .add(Json.createObjectBuilder() + .add("siteUrl", "{siteUrl}") + .build()) + .add(Json.createObjectBuilder() + .add("key", "{apiToken}") + .build()) + .build()) + .build()); + job.add("requirements", Json.createObjectBuilder() + .add("auxFilesExist", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("formatTag", "NcML") + .add("formatVersion", "0.1") + ) + ) + ); + Response addExternalTool = UtilIT.addExternalTool(job.build()); + addExternalTool.prettyPrint(); + addExternalTool.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.displayName", CoreMatchers.equalTo("HDF5 Tool")); + + long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); + + Response getTool = UtilIT.getExternalTool(toolId); + getTool.prettyPrint(); + getTool.then().assertThat() + .body("data.scope", CoreMatchers.equalTo("file")) + .statusCode(OK.getStatusCode()); + + // No tools for false HDF5 file. Aux file couldn't be extracted. Doesn't meet requirements. + Response getToolsForFalseHdf5 = UtilIT.getExternalToolsForFile(falseHdf5.toString(), "preview", apiToken); + getToolsForFalseHdf5.prettyPrint(); + getToolsForFalseHdf5.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data", Matchers.hasSize(0)); + + // The tool shows for a true HDF5 file. The NcML aux file is available. Requirements met. + Response getToolsForTrueHdf5 = UtilIT.getExternalToolsForFile(trueHdf5.toString(), "preview", apiToken); + getToolsForTrueHdf5.prettyPrint(); + getToolsForTrueHdf5.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].displayName", CoreMatchers.equalTo("HDF5 Tool")) + .body("data[0].scope", CoreMatchers.equalTo("file")) + .body("data[0].contentType", CoreMatchers.equalTo("application/x-hdf5")); + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index 950260d1400..a373ee694c2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -1384,6 +1384,67 @@ public void testDataSizeInDataverse() throws InterruptedException { } + @Test + public void testGetFileInfo() { + + Response createUser = UtilIT.createRandomUser(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + Response makeSuperUser = UtilIT.makeSuperUser(username); + String dataverseAlias = createDataverseGetAlias(apiToken); + Integer datasetId = createDatasetGetId(dataverseAlias, apiToken); + + createUser = UtilIT.createRandomUser(); + String apiTokenRegular = UtilIT.getApiTokenFromResponse(createUser); + + msg("Add tabular file"); + String pathToFile = "scripts/search/data/tabular/stata13-auto-withstrls.dta"; + Response addResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + + String dataFileId = addResponse.getBody().jsonPath().getString("data.files[0].dataFile.id"); + msgt("datafile id: " + dataFileId); + + addResponse.prettyPrint(); + + Response getFileDataResponse = UtilIT.getFileData(dataFileId, apiToken); + + getFileDataResponse.prettyPrint(); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo("stata13-auto-withstrls.dta")) + .body("data.dataFile.filename", equalTo("stata13-auto-withstrls.dta")) + .statusCode(OK.getStatusCode()); + + getFileDataResponse = UtilIT.getFileData(dataFileId, apiTokenRegular); + getFileDataResponse.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()); + + // ------------------------- + // Publish dataverse and dataset + // ------------------------- + msg("Publish dataverse and dataset"); + Response publishDataversetResp = UtilIT.publishDataverseViaSword(dataverseAlias, apiToken); + publishDataversetResp.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response publishDatasetResp = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken); + publishDatasetResp.then().assertThat() + .statusCode(OK.getStatusCode()); + //regular user should get to see file data + getFileDataResponse = UtilIT.getFileData(dataFileId, apiTokenRegular); + getFileDataResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + + //cleanup + Response destroyDatasetResponse = UtilIT.destroyDataset(datasetId, apiToken); + assertEquals(200, destroyDatasetResponse.getStatusCode()); + + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); + assertEquals(200, deleteDataverseResponse.getStatusCode()); + + Response deleteUserResponse = UtilIT.deleteUser(username); + assertEquals(200, deleteUserResponse.getStatusCode()); + } + @Test public void testValidateDDI_issue6027() throws InterruptedException { msgt("testValidateDDI_issue6027"); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java index 9eac3545e54..094eb0df77c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java @@ -1,34 +1,59 @@ package edu.harvard.iq.dataverse.api; import java.util.logging.Logger; +import java.util.logging.Level; import com.jayway.restassured.RestAssured; import static com.jayway.restassured.RestAssured.given; +import com.jayway.restassured.path.json.JsonPath; import org.junit.Test; import com.jayway.restassured.response.Response; +import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.UNAUTHORIZED; +import static javax.ws.rs.core.Response.Status.ACCEPTED; +import static javax.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; -import static junit.framework.Assert.assertEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import org.junit.BeforeClass; /** - * extremely minimal (for now) API tests for creating OAI clients. + * This class tests Harvesting Client functionality. + * Note that these methods test BOTH the proprietary Dataverse rest API for + * creating and managing harvesting clients, AND the underlining OAI-PMH + * harvesting functionality itself. I.e., we will use the Dataverse + * /api/harvest/clients/ api to run an actual harvest of a control set and + * then validate the resulting harvested content. */ public class HarvestingClientsIT { private static final Logger logger = Logger.getLogger(HarvestingClientsIT.class.getCanonicalName()); - private static final String harvestClientsApi = "/api/harvest/clients/"; - private static final String harvestCollection = "root"; - private static final String harvestUrl = "https://demo.dataverse.org/oai"; - private static final String archiveUrl = "https://demo.dataverse.org"; - private static final String harvestMetadataFormat = "oai_dc"; - private static final String archiveDescription = "RestAssured harvesting client test"; + private static final String HARVEST_CLIENTS_API = "/api/harvest/clients/"; + private static final String ROOT_COLLECTION = "root"; + private static final String HARVEST_URL = "https://demo.dataverse.org/oai"; + private static final String ARCHIVE_URL = "https://demo.dataverse.org"; + private static final String HARVEST_METADATA_FORMAT = "oai_dc"; + private static final String ARCHIVE_DESCRIPTION = "RestAssured harvesting client test"; + private static final String CONTROL_OAI_SET = "controlTestSet"; + private static final int DATASETS_IN_CONTROL_SET = 7; + private static String normalUserAPIKey; + private static String adminUserAPIKey; + private static String harvestCollectionAlias; @BeforeClass public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + + // Create the users, an admin and a non-admin: + setupUsers(); + + // Create a collection that we will use to harvest remote content into: + setupCollection(); + } - private void setupUsers() { + private static void setupUsers() { Response cu0 = UtilIT.createRandomUser(); normalUserAPIKey = UtilIT.getApiTokenFromResponse(cu0); Response cu1 = UtilIT.createRandomUser(); @@ -36,23 +61,34 @@ private void setupUsers() { Response u1a = UtilIT.makeSuperUser(un1); adminUserAPIKey = UtilIT.getApiTokenFromResponse(cu1); } + + private static void setupCollection() { + Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey); + createDataverseResponse.prettyPrint(); + assertEquals(CREATED.getStatusCode(), createDataverseResponse.getStatusCode()); + + harvestCollectionAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - private String normalUserAPIKey; - private String adminUserAPIKey; + // publish dataverse: + Response publishDataverse = UtilIT.publishDataverseViaNativeApi(harvestCollectionAlias, adminUserAPIKey); + assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode()); + } @Test public void testCreateEditDeleteClient() { - setupUsers(); + // This method focuses on testing the native Dataverse harvesting client + // API. + String nickName = UtilIT.getRandomString(6); - String clientApiPath = String.format(harvestClientsApi+"%s", nickName); + String clientApiPath = String.format(HARVEST_CLIENTS_API+"%s", nickName); String clientJson = String.format("{\"dataverseAlias\":\"%s\"," + "\"type\":\"oai\"," + "\"harvestUrl\":\"%s\"," + "\"archiveUrl\":\"%s\"," + "\"metadataFormat\":\"%s\"}", - harvestCollection, harvestUrl, archiveUrl, harvestMetadataFormat); + ROOT_COLLECTION, HARVEST_URL, ARCHIVE_URL, HARVEST_METADATA_FORMAT); // Try to create a client as normal user, should fail: @@ -61,7 +97,7 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) .body(clientJson) .post(clientApiPath); - assertEquals(401, rCreate.getStatusCode()); + assertEquals(UNAUTHORIZED.getStatusCode(), rCreate.getStatusCode()); // Try to create the same as admin user, should succeed: @@ -70,17 +106,17 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(clientJson) .post(clientApiPath); - assertEquals(201, rCreate.getStatusCode()); + assertEquals(CREATED.getStatusCode(), rCreate.getStatusCode()); // Try to update the client we have just created: - String updateJson = String.format("{\"archiveDescription\":\"%s\"}", archiveDescription); + String updateJson = String.format("{\"archiveDescription\":\"%s\"}", ARCHIVE_DESCRIPTION); Response rUpdate = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(updateJson) .put(clientApiPath); - assertEquals(200, rUpdate.getStatusCode()); + assertEquals(OK.getStatusCode(), rUpdate.getStatusCode()); // Now let's retrieve the client we've just created and edited: @@ -89,7 +125,7 @@ public void testCreateEditDeleteClient() { logger.info("getClient.getStatusCode(): " + getClientResponse.getStatusCode()); logger.info("getClient printresponse: " + getClientResponse.prettyPrint()); - assertEquals(200, getClientResponse.getStatusCode()); + assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode()); // ... and validate the values: @@ -97,11 +133,11 @@ public void testCreateEditDeleteClient() { .body("status", equalTo(AbstractApiBean.STATUS_OK)) .body("data.type", equalTo("oai")) .body("data.nickName", equalTo(nickName)) - .body("data.archiveDescription", equalTo(archiveDescription)) - .body("data.dataverseAlias", equalTo(harvestCollection)) - .body("data.harvestUrl", equalTo(harvestUrl)) - .body("data.archiveUrl", equalTo(archiveUrl)) - .body("data.metadataFormat", equalTo(harvestMetadataFormat)); + .body("data.archiveDescription", equalTo(ARCHIVE_DESCRIPTION)) + .body("data.dataverseAlias", equalTo(ROOT_COLLECTION)) + .body("data.harvestUrl", equalTo(HARVEST_URL)) + .body("data.archiveUrl", equalTo(ARCHIVE_URL)) + .body("data.metadataFormat", equalTo(HARVEST_METADATA_FORMAT)); // Try to delete the client as normal user should fail: @@ -109,7 +145,7 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) .delete(clientApiPath); logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode()); - assertEquals(401, rDelete.getStatusCode()); + assertEquals(UNAUTHORIZED.getStatusCode(), rDelete.getStatusCode()); // Try to delete as admin user should work: @@ -117,6 +153,118 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .delete(clientApiPath); logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode()); - assertEquals(200, rDelete.getStatusCode()); + assertEquals(OK.getStatusCode(), rDelete.getStatusCode()); + } + + @Test + public void testHarvestingClientRun() throws InterruptedException { + // This test will create a client and attempt to perform an actual + // harvest and validate the resulting harvested content. + + // Setup: create the client via native API + // since this API is tested somewhat extensively in the previous + // method, we don't need to pay too much attention to this method, aside + // from confirming the expected HTTP status code. + + String nickName = UtilIT.getRandomString(6); + + String clientApiPath = String.format(HARVEST_CLIENTS_API+"%s", nickName); + String clientJson = String.format("{\"dataverseAlias\":\"%s\"," + + "\"type\":\"oai\"," + + "\"harvestUrl\":\"%s\"," + + "\"archiveUrl\":\"%s\"," + + "\"set\":\"%s\"," + + "\"metadataFormat\":\"%s\"}", + harvestCollectionAlias, HARVEST_URL, ARCHIVE_URL, CONTROL_OAI_SET, HARVEST_METADATA_FORMAT); + + Response createResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) + .body(clientJson) + .post(clientApiPath); + assertEquals(CREATED.getStatusCode(), createResponse.getStatusCode()); + + // API TEST 1. Run the harvest using the configuration (client) we have + // just created + + String runHarvestApiPath = String.format(HARVEST_CLIENTS_API+"%s/run", nickName); + + // TODO? - verify that a non-admin user cannot perform this operation (401) + + Response runResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) + .post(runHarvestApiPath); + assertEquals(ACCEPTED.getStatusCode(), runResponse.getStatusCode()); + + // API TEST 2. As indicated by the ACCEPTED status code above, harvesting + // is an asynchronous operation that will be performed in the background. + // Verify that this "in progress" status is properly reported while it's + // running, and that it completes in some reasonable amount of time. + + int i = 0; + int maxWait=20; // a very conservative interval; this harvest has no business taking this long + do { + // Give it an initial 1 sec. delay, to make sure the client state + // has been updated in the database, which can take some appreciable + // amount of time on a heavily-loaded server running a full suite of + // tests: + Thread.sleep(1000L); + // keep checking the status of the client with the GET api: + Response getClientResponse = given() + .get(clientApiPath); + + assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode()); + JsonPath responseJsonPath = getClientResponse.body().jsonPath(); + assertNotNull("Invalid JSON in GET client response", responseJsonPath); + assertEquals(AbstractApiBean.STATUS_OK, responseJsonPath.getString("status")); + + String clientStatus = responseJsonPath.getString("data.status"); + assertNotNull(clientStatus); + + if ("inProgress".equals(clientStatus) || "IN PROGRESS".equals(responseJsonPath.getString("data.lastResult"))) { + // we'll sleep for another second + i++; + } else { + logger.info("getClientResponse.prettyPrint: " + + getClientResponse.prettyPrint()); + // Check the values in the response: + // a) Confirm that the harvest has completed: + assertEquals("Unexpected client status: "+clientStatus, "inActive", clientStatus); + + // b) Confirm that it has actually succeeded: + assertEquals("Last harvest not reported a success (took "+i+" seconds)", "SUCCESS", responseJsonPath.getString("data.lastResult")); + String harvestTimeStamp = responseJsonPath.getString("data.lastHarvest"); + assertNotNull(harvestTimeStamp); + + // c) Confirm that the other timestamps match: + assertEquals(harvestTimeStamp, responseJsonPath.getString("data.lastSuccessful")); + assertEquals(harvestTimeStamp, responseJsonPath.getString("data.lastNonEmpty")); + + // d) Confirm that the correct number of datasets have been harvested: + assertEquals(DATASETS_IN_CONTROL_SET, responseJsonPath.getInt("data.lastDatasetsHarvested")); + + // ok, it looks like the harvest has completed successfully. + break; + } + } while (i extraDatasetsIdentifiers = new ArrayList<>(); + @BeforeClass public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); // enable harvesting server // Gave some thought to storing the original response, and resetting afterwards - but that appears to be more complexity than it's worth Response enableHarvestingServerResponse = UtilIT.setSetting(SettingsServiceBean.Key.OAIServerEnabled,"true"); + + // Create users: + setupUsers(); + + // Create and publish some datasets: + setupDatasets(); + } @AfterClass @@ -44,7 +59,7 @@ public static void afterClass() { Response enableHarvestingServerResponse = UtilIT.setSetting(SettingsServiceBean.Key.OAIServerEnabled,"false"); } - private void setupUsers() { + private static void setupUsers() { Response cu0 = UtilIT.createRandomUser(); normalUserAPIKey = UtilIT.getApiTokenFromResponse(cu0); Response cu1 = UtilIT.createRandomUser(); @@ -52,6 +67,62 @@ private void setupUsers() { Response u1a = UtilIT.makeSuperUser(un1); adminUserAPIKey = UtilIT.getApiTokenFromResponse(cu1); } + + private static void setupDatasets() { + // create dataverse: + Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + // publish dataverse: + Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, adminUserAPIKey); + assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode()); + + // create dataset: + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey); + createDatasetResponse.prettyPrint(); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + + // retrieve the global id: + singleSetDatasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse); + + // publish dataset: + Response publishDataset = UtilIT.publishDatasetViaNativeApi(singleSetDatasetPersistentId, "major", adminUserAPIKey); + assertEquals(200, publishDataset.getStatusCode()); + + singleSetDatasetIdentifier = singleSetDatasetPersistentId.substring(singleSetDatasetPersistentId.lastIndexOf('/') + 1); + + logger.info("identifier: " + singleSetDatasetIdentifier); + + // Publish command is executed asynchronously, i.e. it may + // still be running after we received the OK from the publish API. + // The oaiExport step also requires the metadata exports to be done and this + // takes longer than just publish/reindex. + // So wait for all of this to finish. + UtilIT.sleepForReexport(singleSetDatasetPersistentId, adminUserAPIKey, 10); + + // ... And let's create 4 more datasets for a multi-dataset experiment: + + for (int i = 0; i < 4; i++) { + // create dataset: + createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey); + createDatasetResponse.prettyPrint(); + datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + + // retrieve the global id: + String thisDatasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse); + + // publish dataset: + publishDataset = UtilIT.publishDatasetViaNativeApi(thisDatasetPersistentId, "major", adminUserAPIKey); + assertEquals(200, publishDataset.getStatusCode()); + + UtilIT.sleepForReexport(thisDatasetPersistentId, adminUserAPIKey, 10); + + extraDatasetsIdentifiers.add(thisDatasetPersistentId.substring(thisDatasetPersistentId.lastIndexOf('/') + 1)); + } + + + } private String jsonForTestSpec(String name, String def) { String r = String.format("{\"name\":\"%s\",\"definition\":\"%s\"}", name, def);//description is optional @@ -63,178 +134,271 @@ private String jsonForEditSpec(String name, String def, String desc) { return r; } - private String normalUserAPIKey; - private String adminUserAPIKey; + private XmlPath validateOaiVerbResponse(Response oaiResponse, String verb) { + logger.info(verb+" response: "+oaiResponse.prettyPrint()); + // confirm that the response is in fact XML: + XmlPath responseXmlPath = oaiResponse.getBody().xmlPath(); + assertNotNull(responseXmlPath); + + String dateString = responseXmlPath.getString("OAI-PMH.responseDate"); + assertNotNull(dateString); + // TODO: validate the formatting of the date string in the record + // header, above. (could be slightly tricky - since this formatting + // is likely locale-specific) + logger.fine("date string from the OAI output:"+dateString); + //assertEquals("http://localhost:8080/oai", responseXmlPath.getString("OAI-PMH.request")); + assertEquals(verb, responseXmlPath.getString("OAI-PMH.request.@verb")); + return responseXmlPath; + } + + @Test + public void testOaiIdentify() { + // Run Identify: + Response identifyResponse = UtilIT.getOaiIdentify(); + assertEquals(OK.getStatusCode(), identifyResponse.getStatusCode()); + // Validate the response: + + XmlPath responseXmlPath = validateOaiVerbResponse(identifyResponse, "Identify"); + //assertEquals("http://localhost:8080/oai", responseXmlPath.getString("OAI-PMH.Identify.baseURL")); + // Confirm that the server is reporting the correct parameters that + // our server implementation should be using: + assertEquals("2.0", responseXmlPath.getString("OAI-PMH.Identify.protocolVersion")); + assertEquals("transient", responseXmlPath.getString("OAI-PMH.Identify.deletedRecord")); + assertEquals("YYYY-MM-DDThh:mm:ssZ", responseXmlPath.getString("OAI-PMH.Identify.granularity")); + } + @Test - public void testSetCreation() { - setupUsers(); + public void testOaiListMetadataFormats() { + // Run ListMeatadataFormats: + Response listFormatsResponse = UtilIT.getOaiListMetadataFormats(); + assertEquals(OK.getStatusCode(), listFormatsResponse.getStatusCode()); + + // Validate the response: + + XmlPath responseXmlPath = validateOaiVerbResponse(listFormatsResponse, "ListMetadataFormats"); + + // Check the payload of the response atgainst the list of metadata formats + // we are currently offering under OAI; will need to be explicitly + // modified if/when we add more harvestable formats. + + List listFormats = responseXmlPath.getList("OAI-PMH.ListMetadataFormats.metadataFormat"); + + assertNotNull(listFormats); + assertEquals(5, listFormats.size()); + + // The metadata formats are reported in an unpredictable ordder. We + // want to sort the prefix names for comparison purposes, and for that + // they need to be saved in a modifiable list: + List metadataPrefixes = new ArrayList<>(); + + for (int i = 0; i < listFormats.size(); i++) { + metadataPrefixes.add(responseXmlPath.getString("OAI-PMH.ListMetadataFormats.metadataFormat["+i+"].metadataPrefix")); + } + Collections.sort(metadataPrefixes); + + assertEquals("[Datacite, dataverse_json, oai_datacite, oai_dc, oai_ddi]", metadataPrefixes.toString()); + + + } + + + @Test + public void testNativeSetAPI() { String setName = UtilIT.getRandomString(6); String def = "*"; - - // make sure the set does not exist - String u0 = String.format("/api/harvest/server/oaisets/%s", setName); + + // This test focuses on the Create/List/Edit functionality of the + // Dataverse OAI Sets API (/api/harvest/server): + + // API Test 1. Make sure the set does not exist yet + String setPath = String.format("/api/harvest/server/oaisets/%s", setName); String createPath ="/api/harvest/server/oaisets/add"; - Response r0 = given() - .get(u0); - assertEquals(404, r0.getStatusCode()); + Response getSetResponse = given() + .get(setPath); + assertEquals(404, getSetResponse.getStatusCode()); - // try to create set as normal user, should fail - Response r1 = given() + // API Test 2. Try to create set as normal user, should fail + Response createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) .body(jsonForTestSpec(setName, def)) .post(createPath); - assertEquals(400, r1.getStatusCode()); + assertEquals(400, createSetResponse.getStatusCode()); - // try to create set as admin user, should succeed - Response r2 = given() + // API Test 3. Try to create set as admin user, should succeed + createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(jsonForTestSpec(setName, def)) .post(createPath); - assertEquals(201, r2.getStatusCode()); + assertEquals(201, createSetResponse.getStatusCode()); - Response getSet = given() - .get(u0); + // API Test 4. Retrieve the set we've just created, validate the response + getSetResponse = given().get(setPath); - logger.info("getSet.getStatusCode(): " + getSet.getStatusCode()); - logger.info("getSet printresponse: " + getSet.prettyPrint()); - assertEquals(200, getSet.getStatusCode()); + System.out.println("getSetResponse.getStatusCode(): " + getSetResponse.getStatusCode()); + System.out.println("getSetResponse, full: " + getSetResponse.prettyPrint()); + assertEquals(200, getSetResponse.getStatusCode()); + + getSetResponse.then().assertThat() + .body("status", equalTo(AbstractApiBean.STATUS_OK)) + .body("data.definition", equalTo("*")) + .body("data.description", equalTo("")) + .body("data.name", equalTo(setName)); + + // API Test 5. Retrieve all sets, check that our new set is listed Response responseAll = given() .get("/api/harvest/server/oaisets"); - logger.info("responseAll.getStatusCode(): " + responseAll.getStatusCode()); - logger.info("responseAll printresponse: " + responseAll.prettyPrint()); + System.out.println("responseAll.getStatusCode(): " + responseAll.getStatusCode()); + System.out.println("responseAll full: " + responseAll.prettyPrint()); assertEquals(200, responseAll.getStatusCode()); - - // try to create set with same name as admin user, should fail - Response r3 = given() + assertTrue(responseAll.body().jsonPath().getList("data.oaisets").size() > 0); + assertTrue(responseAll.body().jsonPath().getList("data.oaisets.name", String.class).contains(setName)); + + // API Test 6. Try to create a set with the same name, should fail + createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(jsonForTestSpec(setName, def)) .post(createPath); - assertEquals(400, r3.getStatusCode()); + assertEquals(400, createSetResponse.getStatusCode()); - // try to export set as admin user, should succeed (under admin API, not checking that normal user will fail) + // API Test 7. Try to export set as admin user, should succeed. Set export + // is under /api/admin, no need to try to access it as a non-admin user Response r4 = UtilIT.exportOaiSet(setName); assertEquals(200, r4.getStatusCode()); - - // try to delete as normal user should fail - Response r5 = given() + + // API TEST 8. Try to delete the set as normal user, should fail + Response deleteResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) - .delete(u0); - logger.info("r5.getStatusCode(): " + r5.getStatusCode()); - assertEquals(400, r5.getStatusCode()); + .delete(setPath); + logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode()); + assertEquals(400, deleteResponse.getStatusCode()); - // try to delete as admin user should work - Response r6 = given() + // API TEST 9. Delete as admin user, should work + deleteResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .delete(u0); - logger.info("r6.getStatusCode(): " + r6.getStatusCode()); - assertEquals(200, r6.getStatusCode()); + .delete(setPath); + logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode()); + assertEquals(200, deleteResponse.getStatusCode()); } @Test - public void testSetEdit() { - setupUsers(); + public void testSetEditAPIandOAIlistSets() { + // This test focuses on testing the Edit functionality of the Dataverse + // OAI Set API and the ListSets method of the Dataverse OAI server. + + // Initial setup: crete a test set. + // Since the Create and List (POST and GET) functionality of the API + // is tested extensively in the previous test, we will not be paying + // as much attention to these methods, aside from confirming the + // expected HTTP result codes. + String setName = UtilIT.getRandomString(6); - String def = "*"; + String setDef = "*"; - // make sure the set does not exist - String u0 = String.format("/api/harvest/server/oaisets/%s", setName); + // Make sure the set does not exist + String setPath = String.format("/api/harvest/server/oaisets/%s", setName); String createPath ="/api/harvest/server/oaisets/add"; - Response r0 = given() - .get(u0); - assertEquals(404, r0.getStatusCode()); + Response getSetResponse = given() + .get(setPath); + assertEquals(404, getSetResponse.getStatusCode()); - // try to create set as admin user, should succeed - Response r1 = given() + // Create the set as admin user + Response createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .body(jsonForTestSpec(setName, def)) + .body(jsonForTestSpec(setName, setDef)) .post(createPath); - assertEquals(201, r1.getStatusCode()); + assertEquals(201, createSetResponse.getStatusCode()); + // I. Test the Modify/Edit (POST method) functionality of the + // Dataverse OAI Sets API + + String newDefinition = "title:New"; + String newDescription = "updated"; - // try to edit as normal user should fail - Response r2 = given() + // API Test 1. Try to modify the set as normal user, should fail + Response editSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) - .body(jsonForEditSpec(setName, def,"")) - .put(u0); - logger.info("r2.getStatusCode(): " + r2.getStatusCode()); - assertEquals(400, r2.getStatusCode()); + .body(jsonForEditSpec(setName, setDef, "")) + .put(setPath); + logger.info("non-admin user editSetResponse.getStatusCode(): " + editSetResponse.getStatusCode()); + assertEquals(400, editSetResponse.getStatusCode()); - // try to edit as with blanks should fail - Response r3 = given() + // API Test 2. Try to modify as admin, but with invalid (empty) values, + // should fail + editSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(jsonForEditSpec(setName, "","")) - .put(u0); - logger.info("r3.getStatusCode(): " + r3.getStatusCode()); - assertEquals(400, r3.getStatusCode()); + .put(setPath); + logger.info("invalid values editSetResponse.getStatusCode(): " + editSetResponse.getStatusCode()); + assertEquals(400, editSetResponse.getStatusCode()); - // try to edit as with something should pass - Response r4 = given() + // API Test 3. Try to modify as admin, with sensible values + editSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .body(jsonForEditSpec(setName, "newDef","newDesc")) - .put(u0); - logger.info("r4 Status code: " + r4.getStatusCode()); - logger.info("r4.prettyPrint(): " + r4.prettyPrint()); - assertEquals(OK.getStatusCode(), r4.getStatusCode()); - - logger.info("u0: " + u0); - // now delete it... - Response r6 = given() + .body(jsonForEditSpec(setName, newDefinition, newDescription)) + .put(setPath); + logger.info("admin user editSetResponse status code: " + editSetResponse.getStatusCode()); + logger.info("admin user editSetResponse.prettyPrint(): " + editSetResponse.prettyPrint()); + assertEquals(OK.getStatusCode(), editSetResponse.getStatusCode()); + + // API Test 4. List the set, confirm that the new values are shown + getSetResponse = given().get(setPath); + + System.out.println("getSetResponse.getStatusCode(): " + getSetResponse.getStatusCode()); + System.out.println("getSetResponse, full: " + getSetResponse.prettyPrint()); + assertEquals(200, getSetResponse.getStatusCode()); + + getSetResponse.then().assertThat() + .body("status", equalTo(AbstractApiBean.STATUS_OK)) + .body("data.definition", equalTo(newDefinition)) + .body("data.description", equalTo(newDescription)) + .body("data.name", equalTo(setName)); + + // II. Test the ListSets functionality of the OAI server + + Response listSetsResponse = UtilIT.getOaiListSets(); + + // 1. Validate the service section of the OAI response: + + XmlPath responseXmlPath = validateOaiVerbResponse(listSetsResponse, "ListSets"); + + // 2. Validate the payload of the response, by confirming that the set + // we created and modified, above, is being listed by the OAI server + // and its xml record is properly formatted + + List listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list().findAll{it.setName=='"+setName+"'}", Node.class); + + // 2a. Confirm that our set is listed: + assertNotNull("Unexpected response from ListSets", listSets); + assertTrue("Newly-created set isn't properly listed by the OAI server", listSets.size() == 1); + // 2b. Confirm that the set entry contains the updated description: + assertEquals("Incorrect description in the ListSets entry", newDescription, listSets.get(0).getPath("setDescription.metadata.element.field", String.class)); + + // ok, the xml record looks good! + + // Cleanup. Delete the set with the DELETE API + Response deleteSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .delete(u0); - logger.info("r6.getStatusCode(): " + r6.getStatusCode()); - assertEquals(200, r6.getStatusCode()); + .delete(setPath); + assertEquals(200, deleteSetResponse.getStatusCode()); } - // A more elaborate test - we'll create and publish a dataset, then create an - // OAI set with that one dataset, and attempt to retrieve the OAI record - // with GetRecord. + // A more elaborate test - we will create and export an + // OAI set with a single dataset, and attempt to retrieve + // it and validate the OAI server responses of the corresponding + // ListIdentifiers, ListRecords and GetRecord methods. @Test - public void testOaiFunctionality() throws InterruptedException { - - setupUsers(); - - // create dataverse: - Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey); - createDataverseResponse.prettyPrint(); - String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - - // publish dataverse: - Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, adminUserAPIKey); - assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode()); - - // create dataset: - Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey); - createDatasetResponse.prettyPrint(); - Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); - - // retrieve the global id: - String datasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse); - - // publish dataset: - Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", adminUserAPIKey); - assertEquals(200, publishDataset.getStatusCode()); - - String identifier = datasetPersistentId.substring(datasetPersistentId.lastIndexOf('/') + 1); - - logger.info("identifier: " + identifier); - - // Let's try and create an OAI set with the dataset we have just - // created and published: - // - however, publish command is executed asynchronously, i.e. it may - // still be running after we received the OK from the publish API. - // The oaiExport step also requires the metadata exports to be done and this - // takes longer than just publish/reindex. - // So wait for all of this to finish. - UtilIT.sleepForReexport(datasetPersistentId, adminUserAPIKey, 10); + public void testSingleRecordOaiSet() throws InterruptedException { + // Let's try and create an OAI set with the "single set dataset" that + // was created as part of the initial setup: - String setName = identifier; - String setQuery = "dsPersistentId:" + identifier; + String setName = singleSetDatasetIdentifier; + String setQuery = "dsPersistentId:" + singleSetDatasetIdentifier; String apiPath = String.format("/api/harvest/server/oaisets/%s", setName); String createPath ="/api/harvest/server/oaisets/add"; Response createSetResponse = given() @@ -243,69 +407,470 @@ public void testOaiFunctionality() throws InterruptedException { .post(createPath); assertEquals(201, createSetResponse.getStatusCode()); - // TODO: a) look up the set via native harvest/server api; - // b) look up the set via the OAI ListSets; - // export set: - // (this is asynchronous - so we should probably wait a little) - Response exportSetResponse = UtilIT.exportOaiSet(setName); - assertEquals(200, exportSetResponse.getStatusCode()); + // The GET method of the oai set API, as well as the OAI ListSets + // method are tested extensively in another method in this class, so + // we'll skip looking too closely into those here. + + // A quick test that the new set is listed under native API Response getSet = given() .get(apiPath); - - logger.info("getSet.getStatusCode(): " + getSet.getStatusCode()); - logger.fine("getSet printresponse: " + getSet.prettyPrint()); assertEquals(200, getSet.getStatusCode()); + + // Export the set. + + Response exportSetResponse = UtilIT.exportOaiSet(setName); + assertEquals(200, exportSetResponse.getStatusCode()); + + // Strictly speaking, exporting an OAI set is an asynchronous operation. + // So the code below was written to expect to have to wait for up to 10 + // additional seconds for it to complete. In retrospect, this is + // most likely unnecessary (because the only potentially expensive part + // of the process is the metadata export, and in this case that must have + // already happened - when the dataset was published (that operation + // now has its own wait mechanism). But I'll keep this extra code in + // place since it's not going to hurt. - L.A. + + Thread.sleep(1000L); // initial sleep interval int i = 0; int maxWait=10; do { - - // Run ListIdentifiers on this newly-created set: + // OAI Test 1. Run ListIdentifiers on this newly-created set: Response listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc"); - List ret = listIdentifiersResponse.getBody().xmlPath().getList("OAI-PMH.ListIdentifiers.header"); - assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); - assertNotNull(ret); - logger.info("setName: " + setName); - if (logger.isLoggable(Level.FINE)) { - logger.info("listIdentifiersResponse.prettyPrint:..... "); - listIdentifiersResponse.prettyPrint(); - } - if (ret.size() != 1) { + + // Validate the service section of the OAI response: + XmlPath responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); + + List ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header"); + + if (ret == null || ret.isEmpty()) { + // OK, we'll sleep for another second i++; } else { - // There should be 1 and only 1 record in the response: + if (logger.isLoggable(Level.FINE)) { + logger.info("listIdentifiersResponse.prettyPrint: " + listIdentifiersResponse.prettyPrint()); + } + // Validate the payload of the ListIdentifiers response: + // a) There should be 1 and only 1 item listed: assertEquals(1, ret.size()); - // And the record should be the dataset we have just created: - assertEquals(datasetPersistentId, listIdentifiersResponse.getBody().xmlPath() + // b) The one record in it should be the dataset we have just created: + assertEquals(singleSetDatasetPersistentId, responseXmlPath .getString("OAI-PMH.ListIdentifiers.header.identifier")); + assertEquals(setName, responseXmlPath + .getString("OAI-PMH.ListIdentifiers.header.setSpec")); + assertNotNull(responseXmlPath.getString("OAI-PMH.ListIdentifiers.header.dateStamp")); + // TODO: validate the formatting of the date string here as well. + + // ok, ListIdentifiers response looks valid. break; } Thread.sleep(1000L); - } while (i")); - // And now run GetRecord on the OAI record for the dataset: - Response getRecordResponse = UtilIT.getOaiRecord(datasetPersistentId, "oai_dc"); + // OAI Test 4. run and validate GetRecord response + + Response getRecordResponse = UtilIT.getOaiRecord(singleSetDatasetPersistentId, "oai_dc"); + System.out.println("GetRecord response in its entirety: "+getRecordResponse.getBody().prettyPrint()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(getRecordResponse, "GetRecord"); + + // Validate the payload of the response: + + // Note that for a set with a single record the output of ListRecrods is + // essentially identical to that of GetRecord! + // (we'll test a multi-record set in a different method) + // a) header section: + assertEquals(singleSetDatasetPersistentId, responseXmlPath.getString("OAI-PMH.GetRecord.record.header.identifier")); + assertEquals(setName, responseXmlPath + .getString("OAI-PMH.GetRecord.record.header.setSpec")); + assertNotNull(responseXmlPath.getString("OAI-PMH.GetRecord.record.header.dateStamp")); + // b) metadata section: + assertEquals(persistentIdUrl, responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.identifier")); + assertEquals("Darwin's Finches", responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.title")); + assertEquals("Finch, Fiona", responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.creator")); + assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", + responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.description")); + assertEquals("Medicine, Health and Life Sciences", responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.subject")); + + // ok, looks legit! + + } + + // This test will attempt to create a set with multiple records (enough + // to trigger a paged respons) and test the resumption token functionality). + // Note that this test requires the OAI service to be configured with some + // non-default settings (the paging limits for ListIdentifiers and ListRecords + // must be set to 2, in order to be able to trigger this paging behavior without + // having to create and export too many datasets). + // So you will need to do this: + // asadmin create-jvm-options "-Ddataverse.oai.server.maxidentifiers=2" + // asadmin create-jvm-options "-Ddataverse.oai.server.maxrecords=2" + + + @Test + public void testMultiRecordOaiSet() throws InterruptedException { + // Setup: Let's create a control OAI set with the 5 datasets created + // in the class init: + + String setName = UtilIT.getRandomString(6); + String setQuery = "(dsPersistentId:" + singleSetDatasetIdentifier; + for (String persistentId : extraDatasetsIdentifiers) { + setQuery = setQuery.concat(" OR dsPersistentId:" + persistentId); + } + setQuery = setQuery.concat(")"); + + String createPath = "/api/harvest/server/oaisets/add"; + + Response createSetResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) + .body(jsonForTestSpec(setName, setQuery)) + .post(createPath); + assertEquals(201, createSetResponse.getStatusCode()); + + // Dataverse OAI Sets API is tested extensively in other methods here, + // so no need to test in any more details than confirming the OK result + // above + Response exportSetResponse = UtilIT.exportOaiSet(setName); + assertEquals(200, exportSetResponse.getStatusCode()); + Thread.sleep(1000L); + + // OAI Test 1. Run ListIdentifiers on the set we've just created: + Response listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc"); + assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + + // Validate the service section of the OAI response: + XmlPath responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); - assertEquals(datasetPersistentId, getRecordResponse.getBody().xmlPath().getString("OAI-PMH.GetRecord.record.header.identifier")); + List ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header.identifier"); + assertNotNull(ret); - // TODO: - // check the actual metadata payload of the OAI record more carefully? + if (logger.isLoggable(Level.FINE)) { + logger.info("listIdentifiersResponse.prettyPrint: "+listIdentifiersResponse.prettyPrint()); + } + + // Validate the payload of the ListIdentifiers response: + // 1a) There should be 2 items listed: + assertEquals("Wrong number of items on the first ListIdentifiers page", + 2, ret.size()); + + // 1b) The response contains a resumptionToken for the next page of items: + String resumptionToken = responseXmlPath.getString("OAI-PMH.ListIdentifiers.resumptionToken"); + assertNotNull("No resumption token in the ListIdentifiers response (has the jvm option dataverse.oai.server.maxidentifiers been configured?)", resumptionToken); + + // 1c) The total number of items in the set (5) is listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@completeListSize")); + + // 1d) ... and the offset (cursor) is at the right position (0): + assertEquals(0, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@cursor")); + + // The formatting of individual item records in the ListIdentifiers response + // is tested extensively in the previous test method, so we are not + // looking at them in such detail here; but we should record the + // identifiers listed, so that we can confirm that all the set is + // served as expected. + + Set persistentIdsInListIdentifiers = new HashSet<>(); + + for (String persistentId : ret) { + persistentIdsInListIdentifiers.add(persistentId.substring(persistentId.lastIndexOf('/') + 1)); + } + + // ok, let's move on to the next ListIdentifiers page: + // (we repeat the exact same checks as the above; minus the different + // expected offset) + + // OAI Test 2. Run ListIdentifiers with the resumptionToken obtained + // in the previous step: + + listIdentifiersResponse = UtilIT.getOaiListIdentifiersWithResumptionToken(resumptionToken); + assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); + + ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listIdentifiersResponse.prettyPrint: "+listIdentifiersResponse.prettyPrint()); + } + + // Validate the payload of the ListIdentifiers response: + // 2a) There should still be 2 items listed: + assertEquals("Wrong number of items on the second ListIdentifiers page", + 2, ret.size()); + + // 2b) The response should contain a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListIdentifiers.resumptionToken"); + assertNotNull("No resumption token in the ListIdentifiers response", resumptionToken); + + // 2c) The total number of items in the set (5) is listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@completeListSize")); + + // 2d) ... and the offset (cursor) is at the right position (2): + assertEquals(2, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@cursor")); + + // Record the identifiers listed on this results page: + + for (String persistentId : ret) { + persistentIdsInListIdentifiers.add(persistentId.substring(persistentId.lastIndexOf('/') + 1)); + } + + // And now the next and the final ListIdentifiers page. + // This time around we should get an *empty* resumptionToken (indicating + // that there are no more results): + + // OAI Test 3. Run ListIdentifiers with the final resumptionToken + + listIdentifiersResponse = UtilIT.getOaiListIdentifiersWithResumptionToken(resumptionToken); + assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); + + ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listIdentifiersResponse.prettyPrint: "+listIdentifiersResponse.prettyPrint()); + } + + // Validate the payload of the ListIdentifiers response: + // 3a) There should be only 1 item listed: + assertEquals("Wrong number of items on the final ListIdentifiers page", + 1, ret.size()); + + // 3b) The response contains a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListIdentifiers.resumptionToken"); + assertNotNull("No resumption token in the final ListIdentifiers response", resumptionToken); + assertTrue("Non-empty resumption token in the final ListIdentifiers response", "".equals(resumptionToken)); + + // 3c) The total number of items in the set (5) is still listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@completeListSize")); + + // 3d) ... and the offset (cursor) is at the right position (4): + assertEquals(4, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@cursor")); + + // Record the last identifier listed on this final page: + persistentIdsInListIdentifiers.add(ret.get(0).substring(ret.get(0).lastIndexOf('/') + 1)); + + // Finally, let's confirm that the expected 5 datasets have been listed + // as part of this Set: + + boolean allDatasetsListed = true; + + allDatasetsListed = persistentIdsInListIdentifiers.contains(singleSetDatasetIdentifier); + for (String persistentId : extraDatasetsIdentifiers) { + allDatasetsListed = allDatasetsListed && persistentIdsInListIdentifiers.contains(persistentId); + } + + assertTrue("Control datasets not properly listed in the paged ListIdentifiers response", + allDatasetsListed); + + // OK, it is safe to assume ListIdentifiers works as it should in page mode. + + // We will now repeat the exact same tests for ListRecords (again, no + // need to pay close attention to the formatting of the individual records, + // since that's tested in the previous test method, since our focus is + // testing the paging/resumptionToken functionality) + + // OAI Test 4. Run ListRecords on the set we've just created: + Response listRecordsResponse = UtilIT.getOaiListRecords(setName, "oai_dc"); + assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords"); + + ret = responseXmlPath.getList("OAI-PMH.ListRecords.record.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listRecordsResponse.prettyPrint: "+listRecordsResponse.prettyPrint()); + } + + // Validate the payload of the ListRecords response: + // 4a) There should be 2 items listed: + assertEquals("Wrong number of items on the first ListRecords page", + 2, ret.size()); + + // 4b) The response contains a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListRecords.resumptionToken"); + assertNotNull("No resumption token in the ListRecords response (has the jvm option dataverse.oai.server.maxrecords been configured?)", resumptionToken); + + // 4c) The total number of items in the set (5) is listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@completeListSize")); + + // 4d) ... and the offset (cursor) is at the right position (0): + assertEquals(0, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@cursor")); + + Set persistentIdsInListRecords = new HashSet<>(); + + for (String persistentId : ret) { + persistentIdsInListRecords.add(persistentId.substring(persistentId.lastIndexOf('/') + 1)); + } + + // ok, let's move on to the next ListRecords page: + // (we repeat the exact same checks as the above; minus the different + // expected offset) + + // OAI Test 5. Run ListRecords with the resumptionToken obtained + // in the previous step: + + listRecordsResponse = UtilIT.getOaiListRecordsWithResumptionToken(resumptionToken); + assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords"); + + ret = responseXmlPath.getList("OAI-PMH.ListRecords.record.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listRecordsResponse.prettyPrint: "+listRecordsResponse.prettyPrint()); + } + + // Validate the payload of the ListRecords response: + // 4a) There should still be 2 items listed: + assertEquals("Wrong number of items on the second ListRecords page", + 2, ret.size()); + + // 4b) The response should contain a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListRecords.resumptionToken"); + assertNotNull("No resumption token in the ListRecords response", resumptionToken); + + // 4c) The total number of items in the set (5) is listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@completeListSize")); + + // 4d) ... and the offset (cursor) is at the right position (2): + assertEquals(2, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@cursor")); + + // Record the identifiers listed on this results page: + + for (String persistentId : ret) { + persistentIdsInListRecords.add(persistentId.substring(persistentId.lastIndexOf('/') + 1)); + } + + // And now the next and the final ListRecords page. + // This time around we should get an *empty* resumptionToken (indicating + // that there are no more results): + + // OAI Test 6. Run ListRecords with the final resumptionToken + + listRecordsResponse = UtilIT.getOaiListRecordsWithResumptionToken(resumptionToken); + assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords"); + + ret = responseXmlPath.getList("OAI-PMH.ListRecords.record.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listRecordsResponse.prettyPrint: "+listRecordsResponse.prettyPrint()); + } + + // Validate the payload of the ListRecords response: + // 6a) There should be only 1 item listed: + assertEquals("Wrong number of items on the final ListRecords page", + 1, ret.size()); + + // 6b) The response contains a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListRecords.resumptionToken"); + assertNotNull("No resumption token in the final ListRecords response", resumptionToken); + assertTrue("Non-empty resumption token in the final ListRecords response", "".equals(resumptionToken)); + + // 6c) The total number of items in the set (5) is still listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@completeListSize")); + + // 6d) ... and the offset (cursor) is at the right position (4): + assertEquals(4, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@cursor")); + + // Record the last identifier listed on this final page: + persistentIdsInListRecords.add(ret.get(0).substring(ret.get(0).lastIndexOf('/') + 1)); + + // Finally, let's confirm that the expected 5 datasets have been listed + // as part of this Set: + + allDatasetsListed = true; + + allDatasetsListed = persistentIdsInListRecords.contains(singleSetDatasetIdentifier); + for (String persistentId : extraDatasetsIdentifiers) { + allDatasetsListed = allDatasetsListed && persistentIdsInListRecords.contains(persistentId); + } + + assertTrue("Control datasets not properly listed in the paged ListRecords response", + allDatasetsListed); + + // OK, it is safe to assume ListRecords works as it should in page mode + // as well. + + // And finally, let's delete the set + String setPath = String.format("/api/harvest/server/oaisets/%s", setName); + Response deleteResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) + .delete(setPath); + logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode()); + assertEquals("Failed to delete the control multi-record set", 200, deleteResponse.getStatusCode()); } + + // TODO: + // What else can we test? + // Some ideas: + // - Test handling of deleted dataset records + // - Test "from" and "until" time parameters + // - Validate full verb response records against XML schema + // (for each supported metadata format, possibly?) } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java b/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java index 9ac2d2cb7e5..76e9b7d6bc8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java @@ -170,15 +170,6 @@ public void testDeepLinks() { .body("data.total_count", equalTo(1)) .body("data.items[0].name", equalTo(level1a)); - /** - * Remove this early return when you are ready to work on - * https://github.com/IQSS/dataverse/issues/7430 about strange linking - * behavior. - */ - if (true) { - return; - } - Response createLevel2a = UtilIT.createSubDataverse(UtilIT.getRandomDvAlias() + "-level2a", null, apiToken, level1a); createLevel2a.prettyPrint(); String level2a = UtilIT.getAliasFromResponse(createLevel2a); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java new file mode 100644 index 00000000000..9716e7aca13 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java @@ -0,0 +1,182 @@ +package edu.harvard.iq.dataverse.api; + +import com.jayway.restassured.RestAssured; +import com.jayway.restassured.path.json.JsonPath; +import com.jayway.restassured.response.Response; +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.FORBIDDEN; +import static javax.ws.rs.core.Response.Status.NOT_FOUND; +import static javax.ws.rs.core.Response.Status.OK; +import org.hamcrest.CoreMatchers; +import static org.hamcrest.CoreMatchers.equalTo; +import org.junit.BeforeClass; +import org.junit.Test; + +public class NetcdfIT { + + @BeforeClass + public static void setUp() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + @Test + public void testNmclFromNetcdf() throws IOException { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + String pathToFile = "src/test/resources/netcdf/madis-raob"; + + Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat().statusCode(OK.getStatusCode()); + + long fileId = JsonPath.from(uploadFile.body().asString()).getLong("data.files[0].dataFile.id"); + String tag = "NcML"; + String version = "0.1"; + + Response downloadNcml = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + //downloadNcml.prettyPrint(); // long output + downloadNcml.then().assertThat() + .statusCode(OK.getStatusCode()) + .contentType("text/xml; name=\"madis-raob.ncml_0.1.xml\";charset=UTF-8"); + + Response deleteNcml = UtilIT.deleteAuxFile(fileId, tag, version, apiToken); + deleteNcml.prettyPrint(); + deleteNcml.then().assertThat().statusCode(OK.getStatusCode()); + + Response downloadNcmlShouldFail = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcmlShouldFail.then().assertThat() + .statusCode(NOT_FOUND.getStatusCode()); + + UtilIT.makeSuperUser(username).then().assertThat().statusCode(OK.getStatusCode()); + + Response extractNcml = UtilIT.extractNcml(fileId, apiToken); + extractNcml.prettyPrint(); + extractNcml.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response downloadNcmlShouldWork = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcmlShouldWork.then().assertThat() + .statusCode(OK.getStatusCode()); + + } + + @Test + public void testNmclFromNetcdfErrorChecking() throws IOException { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + + Response createUserRandom = UtilIT.createRandomUser(); + createUserRandom.then().assertThat().statusCode(OK.getStatusCode()); + String apiTokenRandom = UtilIT.getApiTokenFromResponse(createUserRandom); + + String apiTokenNull = null; + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + String pathToFile = "src/test/resources/netcdf/madis-raob"; + + Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat().statusCode(OK.getStatusCode()); + + long fileId = JsonPath.from(uploadFile.body().asString()).getLong("data.files[0].dataFile.id"); + String tag = "NcML"; + String version = "0.1"; + + Response downloadNcmlFail = UtilIT.downloadAuxFile(fileId, tag, version, apiTokenNull); + downloadNcmlFail.then().assertThat() + .statusCode(FORBIDDEN.getStatusCode()); + + Response downloadNcml = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcml.then().assertThat() + .statusCode(OK.getStatusCode()) + .contentType("text/xml; name=\"madis-raob.ncml_0.1.xml\";charset=UTF-8"); + + Response deleteNcml = UtilIT.deleteAuxFile(fileId, tag, version, apiToken); + deleteNcml.prettyPrint(); + deleteNcml.then().assertThat().statusCode(OK.getStatusCode()); + + Response downloadNcmlShouldFail = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcmlShouldFail.then().assertThat() + .statusCode(NOT_FOUND.getStatusCode()); + + Response extractNcmlFailRandomUser = UtilIT.extractNcml(fileId, apiTokenRandom); + extractNcmlFailRandomUser.prettyPrint(); + extractNcmlFailRandomUser.then().assertThat() + .statusCode(FORBIDDEN.getStatusCode()); + + UtilIT.makeSuperUser(username).then().assertThat().statusCode(OK.getStatusCode()); + + Response extractNcml = UtilIT.extractNcml(fileId, apiToken); + extractNcml.prettyPrint(); + extractNcml.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.result", CoreMatchers.equalTo(true)); + + Response downloadNcmlShouldWork = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcmlShouldWork.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response extractNcmlFailExistsAlready = UtilIT.extractNcml(fileId, apiToken); + extractNcmlFailExistsAlready.prettyPrint(); + extractNcmlFailExistsAlready.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.result", CoreMatchers.equalTo(false)); + + Path pathToTxt = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "file.txt"); + String contentOfTxt = "Just a text file. Don't expect NcML out!"; + java.nio.file.Files.write(pathToTxt, contentOfTxt.getBytes()); + + Response uploadFileTxt = UtilIT.uploadFileViaNative(datasetId.toString(), pathToTxt.toString(), apiToken); + uploadFileTxt.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("file.txt")); + + long fileIdTxt = JsonPath.from(uploadFileTxt.body().asString()).getLong("data.files[0].dataFile.id"); + + Response extractNcmlFailText = UtilIT.extractNcml(fileIdTxt, apiToken); + extractNcmlFailText.prettyPrint(); + extractNcmlFailText.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.result", CoreMatchers.equalTo(false)); + + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 54a217be527..dc9152859ee 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -743,10 +743,11 @@ static Response uploadAuxFile(Long fileId, String pathToFile, String formatTag, } static Response downloadAuxFile(Long fileId, String formatTag, String formatVersion, String apiToken) { - Response response = given() - .header(API_TOKEN_HTTP_HEADER, apiToken) - .get("/api/access/datafile/" + fileId + "/auxiliary/" + formatTag + "/" + formatVersion); - return response; + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification.header(API_TOKEN_HTTP_HEADER, apiToken); + } + return requestSpecification.get("/api/access/datafile/" + fileId + "/auxiliary/" + formatTag + "/" + formatVersion); } static Response listAuxFilesByOrigin(Long fileId, String origin, String apiToken) { @@ -997,6 +998,12 @@ static Response getFileMetadata(String fileIdOrPersistentId, String optionalForm .urlEncodingEnabled(false) .get("/api/access/datafile/" + idInPath + "/metadata" + optionalFormatInPath + optionalQueryParam); } + + static Response getFileData(String fileId, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/files/" + fileId ); + } static Response testIngest(String fileName, String fileType) { return given() @@ -1170,7 +1177,14 @@ public static Response uningestFile(Long fileId, String apiToken) { .post("/api/files/" + fileId + "/uningest/?key=" + apiToken); return uningestFileResponse; } - + + public static Response extractNcml(Long fileId, String apiToken) { + Response response = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .post("/api/files/" + fileId + "/extractNcml"); + return response; + } + //I don't understand why this blows up when I remove the key public static Response getDataFileMetadata(Long fileId, String apiToken) { Response fileResponse = given() @@ -2625,13 +2639,39 @@ static Response exportOaiSet(String setName) { return given().put(apiPath); } + static Response getOaiIdentify() { + String oaiVerbPath = "/oai?verb=Identify"; + return given().get(oaiVerbPath); + } + + static Response getOaiListMetadataFormats() { + String oaiVerbPath = "/oai?verb=ListMetadataFormats"; + return given().get(oaiVerbPath); + } + + static Response getOaiListSets() { + String oaiVerbPath = "/oai?verb=ListSets"; + return given().get(oaiVerbPath); + } + static Response getOaiRecord(String datasetPersistentId, String metadataFormat) { String apiPath = String.format("/oai?verb=GetRecord&identifier=%s&metadataPrefix=%s", datasetPersistentId, metadataFormat); return given().get(apiPath); } static Response getOaiListIdentifiers(String setName, String metadataFormat) { - String apiPath = String.format("/oai?verb=ListIdentifiers&set=%s&metadataPrefix=%s", setName, metadataFormat); + + String apiPath; + if (StringUtil.nonEmpty(setName)) { + apiPath = String.format("/oai?verb=ListIdentifiers&set=%s&metadataPrefix=%s", setName, metadataFormat); + } else { + apiPath = String.format("/oai?verb=ListIdentifiers&metadataPrefix=%s", metadataFormat); + } + return given().get(apiPath); + } + + static Response getOaiListIdentifiersWithResumptionToken(String resumptionToken) { + String apiPath = String.format("/oai?verb=ListIdentifiers&resumptionToken=%s", resumptionToken); return given().get(apiPath); } @@ -2639,6 +2679,11 @@ static Response getOaiListRecords(String setName, String metadataFormat) { String apiPath = String.format("/oai?verb=ListRecords&set=%s&metadataPrefix=%s", setName, metadataFormat); return given().get(apiPath); } + + static Response getOaiListRecordsWithResumptionToken(String resumptionToken) { + String apiPath = String.format("/oai?verb=ListRecords&resumptionToken=%s", resumptionToken); + return given().get(apiPath); + } static Response changeAuthenticatedUserIdentifier(String oldIdentifier, String newIdentifier, String apiToken) { Response response; diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java new file mode 100644 index 00000000000..a0e79268e3d --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -0,0 +1,120 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import org.erdtman.jcs.JsonCanonicalizer; +import org.junit.Assert; +import org.junit.Test; +import com.auth0.jwt.JWT; +import com.auth0.jwt.algorithms.Algorithm; +import com.auth0.jwt.interfaces.DecodedJWT; + +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import java.security.KeyFactory; +import java.security.interfaces.RSAPrivateKey; +//import java.security.interfaces.RSAPublicKey; +import java.security.spec.PKCS8EncodedKeySpec; +import java.util.Base64; + + +public class DRSSubmitToArchiveCommandTest { + + /* Simple test of JWT encode/decode functionality + * + */ + @Test + public void createJWT() throws CommandException { + + String privKeyString = "MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCzSwj+c/uiRz5A" + + "OiDWsV5pxJrdzlDRV2PKKwRGCzhv1MEPwQCvFp6wZRDgCE4EfpVUuByNInV1eOfr" + + "BjwIlxp8hv9RPYCAsPCFV46VLeZsr8FOfvqI6IswYqB3qwdi5NW+CuJRLgTFJP87" + + "X5GgoItVnE0/DxIuZobuaEEzPa8TV8kUvdehzxTlkMTay5J/USeyKsUjPozqgKtN" + + "4ScCWrQx2FXEuKoCg85wNgFRJHgSGBH07lNAYV2tOz+w0ToSNzKswNqhTpRl7W61" + + "gzDCFJu6IYreH9bH5eh/Z9BzjNOs16k0Ok2PmQhOhHYCT3fdkKogriSREVN5dlHi" + + "FV7eB577AgMBAAECggEAPGfLX+8zmDjogDsVVT/szzWt94zLLbyDollb1z1whjzn" + + "zqb31AWK8WMbjF8/6cO8DA77j5FMgYd6m3Q+RaajBdF1s6lE4ha68jHNl/Ue7P9J" + + "4WhmgDnYqzSPW8IDew4d9Sk1lqQqd0E/vIE2TyfHydAfNl+dgISKcUgur1TY52rb" + + "taldnMP44BoXSeKM1qMAE7tWXDQlRjDdcx2Vn6nKJ4iCC6490JSGaFpsoock9wkF" + + "Fi1euzVnvX3ksyioXHMZwzZ9ErCHsI+Px25xiroyloxeoj0zfcA8kZcC9vyoa9HF" + + "2p62iK6RM7JCQc7yMcSN2Fp8PzyHlOLgdI+8CKV4AQKBgQDYmVFenIbapLgN3uyW" + + "gPTgUQGdnLf2S1g1HHHw7+74aZuMKq20w8Ikv6qWMx07R05gm8yxQ1Z4ciLcEw2z" + + "KBurLte/t6ZAJXQ7wnbPyX1JPFQNxKJrPKq+FynnANrdPVgwUunmO9JJbsudU/cG" + + "WKaQiG0w5ltvXg1NY5i1doifawKBgQDT6HFxh31nGUySNRQloE9mpvbzT35ornvl" + + "0oMlCYX2M52C3/nH/rq30woP4hDMBlvq3V6blOzPHzQwlu4+4OKBqvxlAluYIoXP" + + "QD1vJhb7eti+mYnIWyQ6hnAhrg/WDxn69mixEson2EL68+WRawz61h3WbfKoivbe" + + "YP02G2uysQKBgBOPFLf0boED6tLl1HtqvbIb3od7BWmqOBbjsK5PHEc2UiOAHxt5" + + "qehjnmXdy7/0mnFC4GMJb5+Evv0cg1owPv9gRX88eDjGqQ5UayIsUbHxTq3HmdsR" + + "KWHs+Y2wmBLuXS5P7msp771N0fktAduC2denWiTWSF9wIMdiPQH16DRtAoGBAKs4" + + "ABmEKT4ZgfYMryervRwrQhPcIj5A5VkP2+kcJcKFd/pcMH15A7Mt8M5ekcXYSYKe" + + "tSeukBzWkJvGB+CEYl/1IRQYcJufIVERDdJ2C1HMs75lXp+ljMNBBu8frin+b7aI" + + "TJTuoqrJIW2VjeMOhSFTyi4NDmlCRy/tXArQ4xcxAoGAUppOsJZeF/1kPQIFwBkS" + + "bVuGxMscWKswHy6dXEq2VabVGBL8H33PkpJRBnw7S/f+8wvk9dX63NuTF6VYM546" + + "J73YadnpU82C+7OnaTTCDVPfXYgPFLpE9xKFKkRFacgUbEnvZ2i0zSUquH0RAyaK" + + "tJ0d/dnd5TQUccAZwT8Nrw0="; + + String pubKeyString = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAs0sI/nP7okc+QDog1rFe" + + "acSa3c5Q0VdjyisERgs4b9TBD8EArxaesGUQ4AhOBH6VVLgcjSJ1dXjn6wY8CJca" + + "fIb/UT2AgLDwhVeOlS3mbK/BTn76iOiLMGKgd6sHYuTVvgriUS4ExST/O1+RoKCL" + + "VZxNPw8SLmaG7mhBMz2vE1fJFL3Xoc8U5ZDE2suSf1EnsirFIz6M6oCrTeEnAlq0" + + "MdhVxLiqAoPOcDYBUSR4EhgR9O5TQGFdrTs/sNE6EjcyrMDaoU6UZe1utYMwwhSb" + + "uiGK3h/Wx+Xof2fQc4zTrNepNDpNj5kIToR2Ak933ZCqIK4kkRFTeXZR4hVe3gee" + + "+wIDAQAB"; + + String fakeBody = "{\n" + + " \"s3_bucket_name\": \"dataverse-export-dev\",\n" + + " \"package_id\": \"doi-10-5072-fk2-e6cmkr.v1.18\",\n" + + " \"s3_path\": \"doi-10-5072-fk2-e6cmkr\",\n" + + " \"admin_metadata\": {\n" + + " \"accessFlag\": \"N\",\n" + + " \"contentModel\": \"opaque\",\n" + + " \"depositingSystem\": \"Harvard Dataverse\",\n" + + " \"firstGenerationInDrs\": \"unspecified\",\n" + + " \"objectRole\": \"CG:DATASET\",\n" + + " \"usageClass\": \"LOWUSE\",\n" + + " \"storageClass\": \"AR\",\n" + + " \"s3_bucket_name\": \"dataverse-export-dev\",\n" + + " \"ownerCode\": \"123\",\n" + + " \"billingCode\": \"456\",\n" + + " \"resourceNamePattern\": \"pattern\",\n" + + " \"urnAuthorityPath\": \"path\",\n" + + " \"depositAgent\": \"789\",\n" + + " \"depositAgentEmail\": \"someone@mailinator.com\",\n" + + " \"successEmail\": \"winner@mailinator.com\",\n" + + " \"failureEmail\": \"loser@mailinator.com\",\n" + + " \"successMethod\": \"method\",\n" + + " \"adminCategory\": \"root\"\n" + + " }\n" + + "}"; + + byte[] encoded = Base64.getDecoder().decode(privKeyString); + try { + KeyFactory keyFactory = KeyFactory.getInstance("RSA"); + PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(encoded); + RSAPrivateKey privKey = (RSAPrivateKey) keyFactory.generatePrivate(keySpec); + //RSAPublicKey publicKey; + /* + * If public key is needed: encoded = Base64.decodeBase64(publicKeyPEM); + * + * KeyFactory keyFactory = KeyFactory.getInstance("RSA"); X509EncodedKeySpec + * keySpec = new X509EncodedKeySpec(encoded); return (RSAPublicKey) + * keyFactory.generatePublic(keySpec); RSAPublicKey publicKey = new + * RSAPublicKey(System.getProperty(RS256_KEY)); + * + * + */ + String canonicalBody = new JsonCanonicalizer(fakeBody).getEncodedString(); + System.out.println("Canonical form:"+ canonicalBody); + + Algorithm algorithmRSA = Algorithm.RSA256(null, privKey); + String token1 = DRSSubmitToArchiveCommand.createJWTString(algorithmRSA, "InstallationBrandName", fakeBody, 5); + + System.out.println("JWT: " + token1); + DecodedJWT jwt = JWT.decode(token1); + System.out.println(jwt.getPayload()); + } catch (Exception e) { + System.out.println(e.getClass() + e.getLocalizedMessage()); + e.printStackTrace(); + //Any exception is a failure, otherwise decoding worked. + Assert.fail(e.getLocalizedMessage()); + } + + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index 56080c66eec..e660cf78da2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -6,20 +6,22 @@ import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.mocks.MockDatasetFieldSvc; -import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL; import static edu.harvard.iq.dataverse.util.SystemConfig.FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.json.JsonParseException; import edu.harvard.iq.dataverse.util.json.JsonParser; import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.IOException; import java.io.PrintWriter; -import java.io.StringReader; import java.net.URI; import java.nio.file.Files; import java.nio.file.Paths; import java.sql.Timestamp; +import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; @@ -28,15 +30,16 @@ import java.util.List; import java.util.Set; import java.util.logging.Logger; -import javax.json.Json; import javax.json.JsonObject; -import javax.json.JsonReader; + +import edu.harvard.iq.dataverse.util.testing.JvmSetting; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; import org.mockito.Mockito; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; /** * For docs see {@link SchemaDotOrgExporter}. @@ -62,94 +65,39 @@ public static void tearDownClass() { /** * Test of exportDataset method, of class SchemaDotOrgExporter. + * @throws IOException + * @throws JsonParseException + * @throws ParseException + * */ @Test - public void testExportDataset() throws Exception { + @JvmSetting(key = JvmSettings.SITE_URL, value = "https://librascholar.org") + public void testExportDataset() throws JsonParseException, ParseException, IOException { File datasetVersionJson = new File("src/test/resources/json/dataset-finch2.json"); String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); - License license = new License("CC0 1.0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0/"), URI.create("/resources/images/cc0.png"), true, 1l); - license.setDefault(true); - - JsonReader jsonReader1 = Json.createReader(new StringReader(datasetVersionAsJson)); - JsonObject json1 = jsonReader1.readObject(); - JsonParser jsonParser = new JsonParser(datasetFieldTypeSvc, null, settingsService, licenseService); - DatasetVersion version = jsonParser.parseDatasetVersion(json1.getJsonObject("datasetVersion")); - version.setVersionState(DatasetVersion.VersionState.RELEASED); - SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd"); - Date publicationDate = dateFmt.parse("19551105"); - version.setReleaseTime(publicationDate); - version.setVersionNumber(1l); - TermsOfUseAndAccess terms = new TermsOfUseAndAccess(); - terms.setLicense(license); - version.setTermsOfUseAndAccess(terms); - - Dataset dataset = new Dataset(); - dataset.setProtocol("doi"); - dataset.setAuthority("10.5072/FK2"); - dataset.setIdentifier("IMK5A4"); - dataset.setPublicationDate(new Timestamp(publicationDate.getTime())); - version.setDataset(dataset); - Dataverse dataverse = new Dataverse(); - dataverse.setName("LibraScholar"); - dataset.setOwner(dataverse); - System.setProperty(SITE_URL, "https://librascholar.org"); - boolean hideFileUrls = false; - if (hideFileUrls) { - System.setProperty(FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS, "true"); - } - FileMetadata fmd = new FileMetadata(); - DataFile dataFile = new DataFile(); - dataFile.setId(42l); - dataFile.setFilesize(1234); - dataFile.setContentType("text/plain"); - dataFile.setProtocol("doi"); - dataFile.setAuthority("10.5072/FK2"); - dataFile.setIdentifier("7V5MPI"); - fmd.setDatasetVersion(version); - fmd.setDataFile(dataFile); - fmd.setLabel("README.md"); - fmd.setDescription("README file."); - List fileMetadatas = new ArrayList<>(); - fileMetadatas.add(fmd); - dataFile.setFileMetadatas(fileMetadatas);; - dataFile.setOwner(dataset); - version.setFileMetadatas(fileMetadatas); - - ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); - if(json1 == null) logger.fine("Json null"); - if(version == null) logger.fine("ver null"); - if(byteArrayOutputStream == null) logger.fine("bytarr null"); - if(schemaDotOrgExporter == null) logger.fine("sdoe" + " null"); - try { - schemaDotOrgExporter.exportDataset(version, json1, byteArrayOutputStream); - } catch (Exception e) { - e.printStackTrace(); - } - String jsonLd = byteArrayOutputStream.toString(); - String prettyJson = JsonUtil.prettyPrint(jsonLd); - logger.fine("schema.org JSON-LD: " + prettyJson); - JsonReader jsonReader2 = Json.createReader(new StringReader(jsonLd)); - JsonObject json2 = jsonReader2.readObject(); + JsonObject json = JsonUtil.getJsonObject(datasetVersionAsJson); + JsonObject json2 = createExportFromJson(json); + assertEquals("http://schema.org", json2.getString("@context")); assertEquals("Dataset", json2.getString("@type")); assertEquals("https://doi.org/10.5072/FK2/IMK5A4", json2.getString("@id")); assertEquals("https://doi.org/10.5072/FK2/IMK5A4", json2.getString("identifier")); assertEquals("Darwin's Finches", json2.getString("name")); assertEquals("Finch, Fiona", json2.getJsonArray("creator").getJsonObject(0).getString("name")); - assertEquals("Birds Inc.", json2.getJsonArray("creator").getJsonObject(0).getString("affiliation")); + assertEquals("Birds Inc.", json2.getJsonArray("creator").getJsonObject(0).getJsonObject("affiliation").getString("name")); assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("creator").getJsonObject(0).getString("@id")); assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("creator").getJsonObject(0).getString("identifier")); + assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("creator").getJsonObject(0).getString("sameAs")); assertEquals("Finch, Fiona", json2.getJsonArray("author").getJsonObject(0).getString("name")); - assertEquals("Birds Inc.", json2.getJsonArray("author").getJsonObject(0).getString("affiliation")); + assertEquals("Birds Inc.", json2.getJsonArray("author").getJsonObject(0).getJsonObject("affiliation").getString("name")); assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("author").getJsonObject(0).getString("@id")); assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("author").getJsonObject(0).getString("identifier")); + assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("author").getJsonObject(0).getString("sameAs")); assertEquals("1955-11-05", json2.getString("datePublished")); assertEquals("1955-11-05", json2.getString("dateModified")); assertEquals("1", json2.getString("version")); - assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", json2.getJsonArray("description").getString(0)); - assertEquals("Bird is the word.", json2.getJsonArray("description").getString(1)); - assertEquals(2, json2.getJsonArray("description").size()); + assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.\nBird is the word.", json2.getString("description")); assertEquals("Medicine, Health and Life Sciences", json2.getJsonArray("keywords").getString(0)); assertEquals("tcTerm1", json2.getJsonArray("keywords").getString(1)); assertEquals("KeywordTerm1", json2.getJsonArray("keywords").getString(2)); @@ -157,9 +105,10 @@ public void testExportDataset() throws Exception { // This dataset, for example, has multiple keywords separated by commas: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/24034&version=2.0 assertEquals("keywords, with, commas", json2.getJsonArray("keywords").getString(4)); assertEquals("CreativeWork", json2.getJsonArray("citation").getJsonObject(0).getString("@type")); - assertEquals("Finch, Fiona 2018. \"The Finches.\" American Ornithological Journal 60 (4): 990-1005.", json2.getJsonArray("citation").getJsonObject(0).getString("text")); + assertEquals("Finch, Fiona 2018. \"The Finches.\" American Ornithological Journal 60 (4): 990-1005.", json2.getJsonArray("citation").getJsonObject(0).getString("name")); assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("@id")); assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("identifier")); + assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("url")); assertEquals("2002/2005", json2.getJsonArray("temporalCoverage").getString(0)); assertEquals("2001-10-01/2015-11-15", json2.getJsonArray("temporalCoverage").getString(1)); assertEquals(null, json2.getString("schemaVersion", null)); @@ -168,7 +117,7 @@ public void testExportDataset() throws Exception { assertEquals("LibraScholar", json2.getJsonObject("includedInDataCatalog").getString("name")); assertEquals("https://librascholar.org", json2.getJsonObject("includedInDataCatalog").getString("url")); assertEquals("Organization", json2.getJsonObject("publisher").getString("@type")); - assertEquals("LibraScholar", json2.getJsonObject("provider").getString("name")); + assertEquals("LibraScholar", json2.getJsonObject("publisher").getString("name")); assertEquals("Organization", json2.getJsonObject("provider").getString("@type")); assertEquals("LibraScholar", json2.getJsonObject("provider").getString("name")); assertEquals("Organization", json2.getJsonArray("funder").getJsonObject(0).getString("@type")); @@ -181,7 +130,7 @@ public void testExportDataset() throws Exception { assertEquals(2, json2.getJsonArray("spatialCoverage").size()); assertEquals("DataDownload", json2.getJsonArray("distribution").getJsonObject(0).getString("@type")); assertEquals("README.md", json2.getJsonArray("distribution").getJsonObject(0).getString("name")); - assertEquals("text/plain", json2.getJsonArray("distribution").getJsonObject(0).getString("fileFormat")); + assertEquals("text/plain", json2.getJsonArray("distribution").getJsonObject(0).getString("encodingFormat")); assertEquals(1234, json2.getJsonArray("distribution").getJsonObject(0).getInt("contentSize")); assertEquals("README file.", json2.getJsonArray("distribution").getJsonObject(0).getString("description")); assertEquals("https://doi.org/10.5072/FK2/7V5MPI", json2.getJsonArray("distribution").getJsonObject(0).getString("@id")); @@ -189,8 +138,85 @@ public void testExportDataset() throws Exception { assertEquals("https://librascholar.org/api/access/datafile/42", json2.getJsonArray("distribution").getJsonObject(0).getString("contentUrl")); assertEquals(1, json2.getJsonArray("distribution").size()); try (PrintWriter printWriter = new PrintWriter("/tmp/dvjsonld.json")) { - printWriter.println(prettyJson); + printWriter.println(JsonUtil.prettyPrint(json2)); + } + + } + + /** + * Test description truncation in exportDataset method, of class SchemaDotOrgExporter. + * @throws IOException + * @throws JsonParseException + * @throws ParseException + * + */ + @Test + public void testExportDescriptionTruncation() throws JsonParseException, ParseException, IOException { + File datasetVersionJson = new File("src/test/resources/json/dataset-long-description.json"); + String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); + + JsonObject json = JsonUtil.getJsonObject(datasetVersionAsJson); + JsonObject json2 = createExportFromJson(json); + + assertTrue(json2.getString("description").endsWith("at...")); + } + + private JsonObject createExportFromJson(JsonObject json) throws JsonParseException, ParseException { + License license = new License("CC0 1.0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0/"), URI.create("/resources/images/cc0.png"), true, 1l); + license.setDefault(true); + JsonParser jsonParser = new JsonParser(datasetFieldTypeSvc, null, settingsService, licenseService); + DatasetVersion version = jsonParser.parseDatasetVersion(json.getJsonObject("datasetVersion")); + version.setVersionState(DatasetVersion.VersionState.RELEASED); + SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd"); + Date publicationDate = dateFmt.parse("19551105"); + version.setReleaseTime(publicationDate); + version.setVersionNumber(1l); + TermsOfUseAndAccess terms = new TermsOfUseAndAccess(); + terms.setLicense(license); + version.setTermsOfUseAndAccess(terms); + + Dataset dataset = new Dataset(); + dataset.setProtocol("doi"); + dataset.setAuthority("10.5072/FK2"); + dataset.setIdentifier("IMK5A4"); + dataset.setPublicationDate(new Timestamp(publicationDate.getTime())); + version.setDataset(dataset); + Dataverse dataverse = new Dataverse(); + dataverse.setName("LibraScholar"); + dataset.setOwner(dataverse); + boolean hideFileUrls = false; + if (hideFileUrls) { + System.setProperty(FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS, "true"); + } + + FileMetadata fmd = new FileMetadata(); + DataFile dataFile = new DataFile(); + dataFile.setId(42l); + dataFile.setFilesize(1234); + dataFile.setContentType("text/plain"); + dataFile.setProtocol("doi"); + dataFile.setAuthority("10.5072/FK2"); + dataFile.setIdentifier("7V5MPI"); + fmd.setDatasetVersion(version); + fmd.setDataFile(dataFile); + fmd.setLabel("README.md"); + fmd.setDescription("README file."); + List fileMetadatas = new ArrayList<>(); + fileMetadatas.add(fmd); + dataFile.setFileMetadatas(fileMetadatas); + ; + dataFile.setOwner(dataset); + version.setFileMetadatas(fileMetadatas); + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + if(schemaDotOrgExporter == null) logger.fine("sdoe" + " null"); + try { + schemaDotOrgExporter.exportDataset(version, json, byteArrayOutputStream); + } catch (Exception e) { + e.printStackTrace(); } + String jsonLdStr = byteArrayOutputStream.toString(); + return JsonUtil.getJsonObject(jsonLdStr); } /** diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java index 70393ebcb2b..ab3a0263d66 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java @@ -7,22 +7,20 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.json.JsonUtil; -import edu.harvard.iq.dataverse.util.testing.SystemProperty; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import org.junit.jupiter.api.Test; import javax.json.Json; import javax.json.JsonObject; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.List; -import org.junit.Test; -import org.mockito.Mockito; - public class ExternalToolHandlerTest { // TODO: It would probably be better to split these into individual tests. @@ -205,9 +203,10 @@ public void testGetToolUrlWithOptionalQueryParameters() { assertEquals("Unknown reserved word: {junk}", expectedException.getMessage()); } - + + @Test - @SystemProperty(key = SystemConfig.SITE_URL, value = "https://librascholar.org") + @JvmSetting(key = JvmSettings.SITE_URL, value = "https://librascholar.org") public void testGetToolUrlWithAllowedApiCalls() { System.out.println("allowedApiCalls test"); Dataset ds = new Dataset(); diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java index 74e10d67352..3885c9b358c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java @@ -19,7 +19,10 @@ public class ExternalToolServiceBeanTest { + private final ExternalToolServiceBean externalToolService; + public ExternalToolServiceBeanTest() { + this.externalToolService = new ExternalToolServiceBean(); } @Test @@ -49,7 +52,7 @@ public void testfindAll() { ExternalToolHandler externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, null); List externalTools = new ArrayList<>(); externalTools.add(externalTool); - List availableExternalTools = ExternalToolServiceBean.findExternalToolsByFile(externalTools, dataFile); + List availableExternalTools = externalToolService.findExternalToolsByFile(externalTools, dataFile); assertEquals(availableExternalTools.size(), 1); } @@ -544,4 +547,47 @@ protected static ExternalTool getAllowedApiCallsTool() { return ExternalToolServiceBean.parseAddExternalToolManifest(tool); } + + @Test + public void testParseAddFileToolRequireAuxFile() { + JsonObjectBuilder job = Json.createObjectBuilder(); + job.add("displayName", "AwesomeTool"); + job.add("toolName", "explorer"); + job.add("description", "This tool is awesome."); + job.add("types", Json.createArrayBuilder().add("explore")); + job.add("scope", "file"); + job.add("hasPreviewMode", "false"); + job.add("toolUrl", "http://awesometool.com"); + job.add("toolParameters", Json.createObjectBuilder() + .add("queryParameters", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("filePid", "{filePid}") + .build()) + .add(Json.createObjectBuilder() + .add("key", "{apiToken}") + .build()) + .add(Json.createObjectBuilder() + .add("fileMetadataId", "{fileMetadataId}") + .build()) + .add(Json.createObjectBuilder() + .add("dvLocale", "{localeCode}") + .build()) + .build()) + .build()); + job.add("requirements", Json.createObjectBuilder() + .add("auxFilesExist", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("formatTag", "NcML") + .add("formatVersion", "0.1") + ) + ) + ); + job.add(ExternalTool.CONTENT_TYPE, DataFileServiceBean.MIME_TYPE_TSV_ALT); + String tool = job.build().toString(); + ExternalTool externalTool = ExternalToolServiceBean.parseAddExternalToolManifest(tool); + assertEquals("AwesomeTool", externalTool.getDisplayName()); + assertEquals("explorer", externalTool.getToolName()); + assertEquals("{\"auxFilesExist\":[{\"formatTag\":\"NcML\",\"formatVersion\":\"0.1\"}]}", externalTool.getRequirements()); + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java index ad4647e4898..aab6af660cb 100644 --- a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java @@ -1,18 +1,5 @@ package edu.harvard.iq.dataverse.search; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Set; -import java.util.logging.Logger; -import java.util.stream.Collectors; - -import org.apache.solr.client.solrj.SolrServerException; -import org.junit.Before; -import org.junit.Test; -import org.mockito.Mockito; - import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetField; @@ -26,21 +13,47 @@ import edu.harvard.iq.dataverse.MetadataBlock; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +@ExtendWith(MockitoExtension.class) public class IndexServiceBeanTest { private static final Logger logger = Logger.getLogger(IndexServiceBeanTest.class.getCanonicalName()); private IndexServiceBean indexService; private Dataverse dataverse; - @Before + @Mock + private SettingsServiceBean settingsService; + @InjectMocks + private SystemConfig systemConfig = new SystemConfig(); + + @BeforeEach public void setUp() { dataverse = MocksFactory.makeDataverse(); dataverse.setDataverseType(DataverseType.UNCATEGORIZED); indexService = new IndexServiceBean(); - indexService.systemConfig = new SystemConfig(); + indexService.systemConfig = systemConfig; indexService.settingsService = Mockito.mock(SettingsServiceBean.class); indexService.dataverseService = Mockito.mock(DataverseServiceBean.class); indexService.datasetFieldService = Mockito.mock(DatasetFieldServiceBean.class); @@ -48,6 +61,36 @@ public void setUp() { Mockito.when(indexService.dataverseService.findRootDataverse()).thenReturn(dataverse); } + + @Test + public void testInitWithDefaults() { + // given + String url = "http://localhost:8983/solr/collection1"; + + // when + indexService.init(); + + // then + HttpSolrClient client = (HttpSolrClient) indexService.solrServer; + assertEquals(url, client.getBaseURL()); + } + + + @Test + @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar") + @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234") + @JvmSetting(key = JvmSettings.SOLR_CORE, value = "test") + void testInitWithConfig() { + // given + String url = "http://foobar:1234/solr/test"; + + // when + indexService.init(); + + // then + HttpSolrClient client = (HttpSolrClient) indexService.solrServer; + assertEquals(url, client.getBaseURL()); + } @Test public void TestIndexing() throws SolrServerException, IOException { diff --git a/src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java b/src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java new file mode 100644 index 00000000000..a3b3c8a2080 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java @@ -0,0 +1,59 @@ +package edu.harvard.iq.dataverse.search; + +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +@ExtendWith(MockitoExtension.class) +class SolrClientServiceTest { + + @Mock + SettingsServiceBean settingsServiceBean; + @InjectMocks + SystemConfig systemConfig; + SolrClientService clientService = new SolrClientService(); + + @BeforeEach + void setUp() { + clientService.systemConfig = systemConfig; + } + + @Test + void testInitWithDefaults() { + // given + String url = "http://localhost:8983/solr/collection1"; + + // when + clientService.init(); + + // then + HttpSolrClient client = (HttpSolrClient) clientService.getSolrClient(); + assertEquals(url, client.getBaseURL()); + } + + @Test + @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar") + @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234") + @JvmSetting(key = JvmSettings.SOLR_CORE, value = "test") + void testInitWithConfig() { + // given + String url = "http://foobar:1234/solr/test"; + + // when + clientService.init(); + + // then + HttpSolrClient client = (HttpSolrClient) clientService.getSolrClient(); + assertEquals(url, client.getBaseURL()); + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java new file mode 100644 index 00000000000..b22f18ca787 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java @@ -0,0 +1,118 @@ +package edu.harvard.iq.dataverse.util; + +import edu.harvard.iq.dataverse.export.openaire.Organizations; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + +import org.junit.Ignore; +import org.junit.Test; +import static org.junit.Assert.*; + +import javax.json.JsonObject; + +public class PersonOrOrgUtilTest { + + public PersonOrOrgUtilTest() { + } + + @Test + public void testOrganizationSimpleName() { + verifyIsOrganization("IBM"); + verifyIsOrganization("Harvard University"); + } + + @Test + public void testOrganizationCOMPLEXName() { + verifyIsOrganization("The Institute for Quantitative Social Science"); + verifyIsOrganization("Council on Aging"); + verifyIsOrganization("The Ford Foundation"); + verifyIsOrganization("United Nations Economic and Social Commission for Asia and the Pacific (UNESCAP)"); + verifyIsOrganization("Michael J. Fox Foundation for Parkinson's Research"); + // The next example is one known to be asserted to be a Person without an entry + // in the OrgWordArray + // So we test with it in the array and then when the array is empty to verify + // the array works, resetting the array works, and the problem still exists in + // the underlying algorithm + PersonOrOrgUtil.setOrgPhraseArray("[\"Portable\"]"); + verifyIsOrganization("Portable Antiquities of the Netherlands"); + PersonOrOrgUtil.setOrgPhraseArray(null); + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization("Portable Antiquities of the Netherlands", false, false); + assertTrue(obj.getBoolean("isPerson")); + } + + @Test + public void testOrganizationAcademicName() { + + verifyIsOrganization("John Smith Center"); + verifyIsOrganization("John Smith Group"); + //An example the base algorithm doesn't handle: + PersonOrOrgUtil.setAssumeCommaInPersonName(true); + verifyIsOrganization("John Smith Project"); + PersonOrOrgUtil.setAssumeCommaInPersonName(false); + } + + + @Test + public void testOrganizationCommaOrDash() { + verifyIsOrganization("Digital Archive of Massachusetts Anti-Slavery and Anti-Segregation Petitions, Massachusetts Archives, Boston MA"); + verifyIsOrganization("U.S. Department of Commerce, Bureau of the Census, Geography Division"); + verifyIsOrganization("Harvard Map Collection, Harvard College Library"); + verifyIsOrganization("Geographic Data Technology, Inc. (GDT)"); + } + + @Ignore + @Test + public void testOrganizationES() { + //Spanish recognition is not enabled - see export/Organization.java + verifyIsOrganization("Compañía de San Fernando"); + } + + /** + * Name is composed of: + * + */ + @Test + public void testName() { + verifyIsPerson("Jorge Mario Bergoglio", "Jorge Mario", "Bergoglio"); + verifyIsPerson("Bergoglio", null, null); + verifyIsPerson("Francesco Cadili", "Francesco", "Cadili"); + // This Philip Seymour Hoffman example is from ShibUtilTest. + verifyIsPerson("Philip Seymour Hoffman", "Philip Seymour", "Hoffman"); + + // test Smith (is also a name) + verifyIsPerson("John Smith", "John", "Smith"); + // resolved using hint file + verifyIsPerson("Guido van Rossum", "Guido", "van Rossum"); + // test only name + verifyIsPerson("Francesco", "Francesco", null); + // test only family name + verifyIsPerson("Cadili", null, null); + } + + private void verifyIsOrganization(String fullName) { + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false, false); + System.out.println(JsonUtil.prettyPrint(obj)); + assertEquals(obj.getString("fullName"),fullName); + assertFalse(obj.getBoolean("isPerson")); + + } + + private void verifyIsPerson(String fullName, String givenName, String familyName) { + verifyIsPerson(fullName, givenName, familyName, false); + } + + private void verifyIsPerson(String fullName, String givenName, String familyName, boolean isPerson) { + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false, isPerson); + System.out.println(JsonUtil.prettyPrint(obj)); + assertEquals(obj.getString("fullName"),fullName); + assertTrue(obj.getBoolean("isPerson")); + assertEquals(obj.containsKey("givenName"), givenName != null); + if(obj.containsKey("givenName") && givenName != null) { + assertEquals(obj.getString("givenName"),givenName); + } + assertEquals(obj.containsKey("familyName"), familyName != null); + if(obj.containsKey("familyName") && familyName != null) { + assertEquals(obj.getString("familyName"),familyName); + } + } + + } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java index 891b029f521..2806aa3aa9b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java @@ -1,13 +1,99 @@ package edu.harvard.iq.dataverse.util; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.doReturn; +@ExtendWith(MockitoExtension.class) class SystemConfigTest { - + + @InjectMocks + SystemConfig systemConfig = new SystemConfig(); + @Mock + SettingsServiceBean settingsService; + + @Test + @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar") + @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234") + void testGetSolrHostColonPortNoDBEntry() { + // given + String hostPort = "foobar:1234"; + + // when + doReturn(null).when(settingsService).getValueForKey(SettingsServiceBean.Key.SolrHostColonPort); + String result = systemConfig.getSolrHostColonPort(); + + // then + assertEquals(hostPort, result); + } + + @Test + @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar") + @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234") + void testGetSolrHostColonPortWithDBEntry() { + // given + String dbEntry = "hello:4321"; + + // when + doReturn(dbEntry).when(settingsService).getValueForKey(SettingsServiceBean.Key.SolrHostColonPort); + String result = systemConfig.getSolrHostColonPort(); + + // then + assertEquals(dbEntry, result); + } + + @Test + void testGetSolrHostColonPortDefault() { + // given + String hostPort = "localhost:8983"; + + // when + doReturn(null).when(settingsService).getValueForKey(SettingsServiceBean.Key.SolrHostColonPort); + String result = systemConfig.getSolrHostColonPort(); + + // then + assertEquals(hostPort, result); + } + + @Test + void testGetVersion() { + // given + String version = "100.100"; + System.setProperty(JvmSettings.VERSION.getScopedKey(), version); + + // when + String result = systemConfig.getVersion(false); + + // then + assertEquals(version, result); + } + + @Test + @JvmSetting(key = JvmSettings.VERSION, value = "100.100") + @JvmSetting(key = JvmSettings.BUILD, value = "FOOBAR") + void testGetVersionWithBuild() { + // when + String result = systemConfig.getVersion(true); + + // then + assertTrue(result.startsWith("100.100"), "'" + result + "' not starting with 100.100"); + assertTrue(result.contains("build")); + + // Cannot test this here - there might be the bundle file present which is not under test control + //assertTrue(result.endsWith("FOOBAR"), "'" + result + "' not ending with FOOBAR"); + } + @Test void testGetLongLimitFromStringOrDefault_withNullInput() { long defaultValue = 5L; diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java index 5e5c14ed063..236179bdb12 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java @@ -6,21 +6,25 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import edu.harvard.iq.dataverse.util.testing.SystemProperty; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import org.junit.jupiter.api.Test; -import static org.junit.Assert.assertEquals; import java.util.ArrayList; import java.util.List; -import org.junit.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; -public class UrlTokenUtilTest { +class UrlTokenUtilTest { @Test - @SystemProperty(key = SystemConfig.SITE_URL, value = "https://librascholar.org") - public void testGetToolUrlWithOptionalQueryParameters() { + @JvmSetting(key = JvmSettings.SITE_URL, value = "https://librascholar.org") + void testGetToolUrlWithOptionalQueryParameters() { + // given + String siteUrl = "https://librascholar.org"; + DataFile dataFile = new DataFile(); - dataFile.setId(42l); + dataFile.setId(42L); FileMetadata fmd = new FileMetadata(); DatasetVersion dv = new DatasetVersion(); Dataset ds = new Dataset(); @@ -28,20 +32,26 @@ public void testGetToolUrlWithOptionalQueryParameters() { ds.setGlobalId(new GlobalId("doi:10.5072/FK2ABCDEF")); dv.setDataset(ds); fmd.setDatasetVersion(dv); - List fmdl = new ArrayList(); + List fmdl = new ArrayList<>(); fmdl.add(fmd); dataFile.setFileMetadatas(fmdl); + ApiToken apiToken = new ApiToken(); apiToken.setTokenString("7196b5ce-f200-4286-8809-03ffdbc255d7"); + + // when & then 1/2 URLTokenUtil urlTokenUtil = new URLTokenUtil(dataFile, apiToken, fmd, "en"); assertEquals("en", urlTokenUtil.replaceTokensWithValues("{localeCode}")); assertEquals("42 test en", urlTokenUtil.replaceTokensWithValues("{fileId} test {localeCode}")); assertEquals("42 test en", urlTokenUtil.replaceTokensWithValues("{fileId} test {localeCode}")); - - assertEquals("https://librascholar.org/api/files/42/metadata?key=" + apiToken.getTokenString(), urlTokenUtil.replaceTokensWithValues("{siteUrl}/api/files/{fileId}/metadata?key={apiToken}")); - + assertEquals( siteUrl + "/api/files/42/metadata?key=" + apiToken.getTokenString(), + urlTokenUtil.replaceTokensWithValues("{siteUrl}/api/files/{fileId}/metadata?key={apiToken}")); + + // when & then 2/2 URLTokenUtil urlTokenUtil2 = new URLTokenUtil(ds, apiToken, "en"); - assertEquals("https://librascholar.org/api/datasets/50?key=" + apiToken.getTokenString(), urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/{datasetId}?key={apiToken}")); - assertEquals("https://librascholar.org/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2ABCDEF&key=" + apiToken.getTokenString(), urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/:persistentId/?persistentId={datasetPid}&key={apiToken}")); + assertEquals(siteUrl + "/api/datasets/50?key=" + apiToken.getTokenString(), + urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/{datasetId}?key={apiToken}")); + assertEquals(siteUrl + "/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2ABCDEF&key=" + apiToken.getTokenString(), + urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/:persistentId/?persistentId={datasetPid}&key={apiToken}")); } } diff --git a/src/test/resources/json/dataset-long-description.json b/src/test/resources/json/dataset-long-description.json new file mode 100644 index 00000000000..a6e5c291322 --- /dev/null +++ b/src/test/resources/json/dataset-long-description.json @@ -0,0 +1,362 @@ +{ + "datasetVersion": { + "metadataBlocks": { + "citation": { + "fields": [ + { + "value": "Darwin's Finches", + "typeClass": "primitive", + "multiple": false, + "typeName": "title" + }, + { + "value": [ + { + "authorName": { + "value": "Finch, Fiona", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorName" + }, + "authorIdentifierScheme": { + "typeName": "authorIdentifierScheme", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "ORCID" + }, + "authorIdentifier": { + "typeName": "authorIdentifier", + "multiple": false, + "typeClass": "primitive", + "value": "0000-0002-1825-0097" + }, + "authorAffiliation": { + "value": "Birds Inc.", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorAffiliation" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "author" + }, + { + "value": [ + { + "datasetContactEmail": { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactEmail", + "value": "finch@mailinator.com" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "datasetContact" + }, + { + "value": [ + { + "dsDescriptionValue": { + "value": "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc, quis gravida magna mi a libero. Fusce vulputate eleifend sapien. Vestibulum purus quam, scelerisque ut, mollis sed, nonummy id, metus. Nullam accumsan lorem in dui. Cras ultricies mi eu turpis hendrerit fringilla. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; In ac dui quis mi consectetuer lacinia. Nam pretium turpis et arcu. Duis arcu tortor, suscipit eget, imperdiet nec, imperdiet iaculis, ipsum. Sed aliquam ultrices mauris. Integer ante arcu, accumsan a, consectetuer eget, posuere ut, mauris. Praesent adipiscing. Phasellus ullamcorper ipsum rutrum nunc. Nunc nonummy metus. Vestibulum volutpat pretium libero. Cras id dui. Aenean ut eros et nisl sagittis vestibulum. Nullam nulla eros, ultricies sit amet, nonummy id, imperdiet feugiat, pede. Sed lectus. Donec mollis hendrerit risus. Phasellus nec sem in justo pellentesque facilisis. Etiam imperdiet imperdiet orci. Nunc nec neque. Phasellus leo dolor, tempus non, auctor et, hendrerit quis, nisi. Curabitur ligula sapien, tincidunt non, euismod vitae, posuere imperdiet, leo. Maecenas malesuada. Praesent congue erat at massa. Sed cursus turpis vitae tortor. Donec posuere vulputate arcu. Phasellus accumsan cursus velit. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Sed aliquam, nisi quis porttitor congue, elit erat euismod orci, ac placerat dolor lectus quis orci. Phasellus consectetuer vestibulum elit. Aenean tellus metus, bibendum sed, posuere ac, mattis non, nunc. Vestibulum fringilla pede sit amet augue. In turpis. Pellentesque posuere. Praesent turpis. Aenean posuere, tortor sed cursus feugiat, nunc augue blandit nunc, eu sollicitudin urna dolor sagittis lacus. Donec elit libero, sodales nec, volutpat a, suscipit non, turpis. Nullam sagittis. Suspendisse pulvinar, augue ac venenatis condimentum, sem libero volutpat nibh, nec pellentesque velit pede quis nunc. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Fusce id purus. Ut varius tincidunt libero. Phasellus dolor. Maecenas vestibulum mollis diam. Pellentesque ut neque. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. In dui magna, posuere eget, vestibulum et, tempor auctor, justo. In ac felis quis tortor malesuada pretium. Pellentesque auctor neque nec urna. Proin sapien ipsum, porta a, auctor quis, euismod ut, mi. Aenean viverra rhoncus pede. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Ut non enim eleifend felis pretium feugiat. Vivamus quis mi. Phasellus a est. Phasellus magna. In hac habitasse platea dictumst. Curabitur at lacus ac velit ornare lobortis. Curabitur a felis in nunc fringilla tristique. Morbi mattis ullamcorper velit. Phasellus gravida semper nisi. Nullam vel sem. Pellentesque libero tortor, tincidunt et, tincidunt eget, semper nec, quam. Sed hendrerit. Morbi ac felis. Nunc egestas, augue at pellentesque laoreet, felis eros vehicula leo, at malesuada velit leo quis pede. Donec interdum, metus et hendrerit aliquet, dolor diam sagittis ligula, eget egestas libero turpis vel mi. Nunc nulla. Fusce risus nisl, viverra et, tempor et, pretium in, sapien. Donec venenatis vulputate lorem. Morbi nec metus. Phasellus blandit leo ut odio. Maecenas ullamcorper, dui et placerat feugiat, eros pede varius nisi, condimentum viverra felis nunc et lorem. Sed magna purus, fermentum eu, tincidunt eu, varius ut, felis. In auctor lobortis lacus. Quisque libero metus, condimentum nec, tempor a, commodo mollis, magna. Vestibulum ullamcorper mauris at ligul beyond 5000 chars", + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "dsDescription" + }, + { + "value": [ + "Medicine, Health and Life Sciences" + ], + "typeClass": "controlledVocabulary", + "multiple": true, + "typeName": "subject" + }, + { + "typeName": "keyword", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordTerm1" + }, + "keywordVocabulary": { + "typeName": "keywordVocabulary", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordVocabulary1" + }, + "keywordVocabularyURI": { + "typeName": "keywordVocabularyURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://KeywordVocabularyURL1.org" + } + }, + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordTerm2" + }, + "keywordVocabulary": { + "typeName": "keywordVocabulary", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordVocabulary2" + }, + "keywordVocabularyURI": { + "typeName": "keywordVocabularyURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://KeywordVocabularyURL2.org" + } + }, + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "keywords, with, commas" + } + } + ] + }, + { + "typeName": "topicClassification", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "topicClassValue": { + "typeName": "topicClassValue", + "multiple": false, + "typeClass": "primitive", + "value": "tcTerm1" + }, + "topicClassVocab": { + "typeName": "topicClassVocab", + "multiple": false, + "typeClass": "primitive", + "value": "tcVocab1" + }, + "topicClassVocabURI": { + "typeName": "topicClassVocabURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://example.com/tcTerm1" + } + } + ] + }, + { + "typeName": "contributor", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Data Collector" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "Holmes, Sherlock" + } + }, + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Funder" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "National Science Foundation" + } + }, + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Data Collector" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "Watson, John" + } + } + ] + }, + { + "typeName": "grantNumber", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "grantNumberAgency": { + "typeName": "grantNumberAgency", + "multiple": false, + "typeClass": "primitive", + "value": "National Institutes of Health" + }, + "grantNumberValue": { + "typeName": "grantNumberValue", + "multiple": false, + "typeClass": "primitive", + "value": "1245" + } + } + ] + }, + { + "typeName": "publication", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "publicationCitation": { + "typeName": "publicationCitation", + "multiple": false, + "typeClass": "primitive", + "value": "Finch, Fiona 2018. \"The Finches.\" American Ornithological Journal 60 (4): 990-1005." + }, + "publicationIDType": { + "typeName": "publicationIDType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "doi" + }, + "publicationIDNumber": { + "typeName": "publicationIDNumber", + "multiple": false, + "typeClass": "primitive", + "value": "10.5072/FK2/RV16HK" + }, + "publicationURL": { + "typeName": "publicationURL", + "multiple": false, + "typeClass": "primitive", + "value": "https://doi.org/10.5072/FK2/RV16HK" + } + } + ] + }, + { + "typeName": "timePeriodCovered", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "timePeriodCoveredStart": { + "typeName": "timePeriodCoveredStart", + "multiple": false, + "typeClass": "primitive", + "value": "2002" + }, + "timePeriodCoveredEnd": { + "typeName": "timePeriodCoveredEnd", + "multiple": false, + "typeClass": "primitive", + "value": "2005" + } + }, + { + "timePeriodCoveredStart": { + "typeName": "timePeriodCoveredStart", + "multiple": false, + "typeClass": "primitive", + "value": "2001-10-01" + }, + "timePeriodCoveredEnd": { + "typeName": "timePeriodCoveredEnd", + "multiple": false, + "typeClass": "primitive", + "value": "2015-11-15" + } + } + ] + } + ], + "displayName": "Citation Metadata", + "name": "citation" + }, + "geospatial": { + "displayName": "Geospatial Metadata", + "name": "geospatial", + "fields": [ + { + "typeName": "geographicCoverage", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "city": { + "typeName": "city", + "multiple": false, + "typeClass": "primitive", + "value": "Columbus" + }, + "state": { + "typeName": "state", + "multiple": false, + "typeClass": "primitive", + "value": "Ohio" + }, + "country": { + "typeName": "country", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "United States" + }, + "otherGeographicCoverage": { + "typeName": "otherGeographicCoverage", + "multiple": false, + "typeClass": "primitive", + "value": "North America" + } + }, + { + "country": { + "typeName": "country", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "United States" + }, + "state": { + "typeName": "state", + "multiple": false, + "typeClass": "primitive", + "value": "Wisconsin" + } + } + ] + } + ] + } + } + } +} diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt index 6e6668d45af..1e9110be2de 100644 --- a/tests/integration-tests.txt +++ b/tests/integration-tests.txt @@ -1 +1 @@ -DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT +DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT