From a835f5db1dc2ed3fd307c012d8b1535dae24523f Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 18 Aug 2023 18:42:30 -0400 Subject: [PATCH 01/22] added pagination to the /versions api. dropped the files section from the (default) output of the api. (#9763) --- doc/sphinx-guides/source/api/native-api.rst | 9 +- .../iq/dataverse/DatasetServiceBean.java | 2 +- .../harvard/iq/dataverse/DatasetVersion.java | 6 +- .../dataverse/DatasetVersionServiceBean.java | 112 +++++++++++++++++- .../harvard/iq/dataverse/api/Datasets.java | 36 +++--- .../iq/dataverse/dataset/DatasetUtil.java | 2 +- .../command/impl/ListVersionsCommand.java | 48 +++++--- .../iq/dataverse/util/json/JsonPrinter.java | 16 +-- 8 files changed, 188 insertions(+), 43 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 4d9466703e4..da3fbfffa73 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -830,7 +830,7 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl "https://demo.dataverse.org/api/datasets/24/versions" + curl "https://demo.dataverse.org/api/datasets/24/versions?includeFiles=true" It returns a list of versions with their metadata, and file list: @@ -883,6 +883,10 @@ It returns a list of versions with their metadata, and file list: ] } +The optional ``includeFiles`` parameter specifies whether the files should be listed in the output. It defaults to ``false``. (Note that for a dataset with a large number of versions and/or files having the files included can dramatically increase the volume of the output). A separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. + +The optional ``offset`` and ``limit`` parameters can be used to specify the range of the versions list to be shown. This can be used to paginate through the list in a dataset with a large number of versions. + Get Version of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -903,6 +907,9 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/datasets/24/versions/1.0" +The optional ``includeFiles`` parameter specifies whether the files should be listed in the output (defaults to ``false``). Note that a separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. + + .. _export-dataset-metadata-api: Export Metadata of a Dataset in Various Formats diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 52eb5868c35..ceb5902defa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -137,7 +137,7 @@ public Dataset findDeep(Object pk) { .setHint("eclipselink.left-join-fetch", "o.files.roleAssignments") .getSingleResult(); } - + public List findByOwnerId(Long ownerId) { return findByOwnerId(ownerId, false); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 5836bd9e175..8d4dafad62a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -68,7 +68,11 @@ query = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.dataset.harvestedFrom IS NULL and o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL" ), @NamedQuery(name = "DatasetVersion.findById", - query = "SELECT o FROM DatasetVersion o LEFT JOIN FETCH o.fileMetadatas WHERE o.id=:id")}) + query = "SELECT o FROM DatasetVersion o LEFT JOIN FETCH o.fileMetadatas WHERE o.id=:id"), + @NamedQuery(name = "DatasetVersion.findByDataset", + query = "SELECT o FROM DatasetVersion o WHERE o.dataset.id=:datasetId ORDER BY o.versionNumber DESC, o.minorVersionNumber DESC"), + @NamedQuery(name = "DatasetVersion.findReleasedByDataset", + query = "SELECT o FROM DatasetVersion o WHERE o.dataset.id=:datasetId AND o.versionState=edu.harvard.iq.dataverse.DatasetVersion.VersionState.RELEASED ORDER BY o.versionNumber DESC, o.minorVersionNumber DESC")}) @Entity diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 28243c37eee..27a4f4773d4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -48,7 +48,23 @@ public class DatasetVersionServiceBean implements java.io.Serializable { private static final Logger logger = Logger.getLogger(DatasetVersionServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); - + + private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_LABEL = "SELECT fm FROM FileMetadata fm" + + " WHERE fm.datasetVersion.id=:datasetVersionId" + + " ORDER BY fm.label"; + private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_DATE = "SELECT fm FROM FileMetadata fm, DvObject dvo" + + " WHERE fm.datasetVersion.id = :datasetVersionId" + + " AND fm.dataFile.id = dvo.id" + + " ORDER BY CASE WHEN dvo.publicationDate IS NOT NULL THEN dvo.publicationDate ELSE dvo.createDate END"; + private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_SIZE = "SELECT fm FROM FileMetadata fm, DataFile df" + + " WHERE fm.datasetVersion.id = :datasetVersionId" + + " AND fm.dataFile.id = df.id" + + " ORDER BY df.filesize"; + private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_TYPE = "SELECT fm FROM FileMetadata fm, DataFile df" + + " WHERE fm.datasetVersion.id = :datasetVersionId" + + " AND fm.dataFile.id = df.id" + + " ORDER BY df.contentType"; + @EJB DatasetServiceBean datasetService; @@ -149,7 +165,19 @@ public DatasetVersion getDatasetVersion(){ return this.datasetVersionForResponse; } } // end RetrieveDatasetVersionResponse - + + /** + * Different criteria to sort the results of FileMetadata queries used in {@link DatasetVersionServiceBean#getFileMetadatas} + */ + public enum FileMetadatasOrderCriteria { + NameAZ, + NameZA, + Newest, + Oldest, + Size, + Type + } + public DatasetVersion find(Object pk) { return em.find(DatasetVersion.class, pk); } @@ -168,7 +196,39 @@ public DatasetVersion findDeep(Object pk) { .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.creator") .getSingleResult(); } - + + /** + * Performs the same database lookup as the one behind Dataset.getVersions(). + * Additionally, provides the arguments for selecting a partial list of + * (length-offset) versions for pagination, plus the ability to pre-select + * only the publicly-viewable versions. + * @param datasetId + * @param offset for pagination through long lists of versions + * @param length for pagination through long lists of versions + * @param includeUnpublished retrieves all the versions, including drafts and deaccessioned. + * @return (partial) list of versions + */ + public List findVersions(Long datasetId, Integer offset, Integer length, boolean includeUnpublished) { + TypedQuery query; + if (includeUnpublished) { + query = em.createNamedQuery("DatasetVersion.findByDataset", DatasetVersion.class); + } else { + query = em.createNamedQuery("DatasetVersion.findReleasedByDataset", DatasetVersion.class) + .setParameter("datasetId", datasetId); + } + + query.setParameter("datasetId", datasetId); + + if (offset != null) { + query.setFirstResult(offset); + } + if (length != null) { + query.setMaxResults(length); + } + + return query.getResultList(); + } + public DatasetVersion findByFriendlyVersionNumber(Long datasetId, String friendlyVersionNumber) { Long majorVersionNumber = null; Long minorVersionNumber = null; @@ -1224,4 +1284,50 @@ public List getUnarchivedDatasetVersions(){ return null; } } // end getUnarchivedDatasetVersions + + /** + * Returns a FileMetadata list of files in the specified DatasetVersion + * + * @param datasetVersion the DatasetVersion to access + * @param limit for pagination, can be null + * @param offset for pagination, can be null + * @param orderCriteria a FileMetadatasOrderCriteria to order the results + * @return a FileMetadata list of the specified DatasetVersion + */ + public List getFileMetadatas(DatasetVersion datasetVersion, Integer limit, Integer offset, FileMetadatasOrderCriteria orderCriteria) { + TypedQuery query = em.createQuery(getQueryStringFromFileMetadatasOrderCriteria(orderCriteria), FileMetadata.class) + .setParameter("datasetVersionId", datasetVersion.getId()); + if (limit != null) { + query.setMaxResults(limit); + } + if (offset != null) { + query.setFirstResult(offset); + } + return query.getResultList(); + } + + private String getQueryStringFromFileMetadatasOrderCriteria(FileMetadatasOrderCriteria orderCriteria) { + String queryString; + switch (orderCriteria) { + case NameZA: + queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_LABEL + " DESC"; + break; + case Newest: + queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_DATE + " DESC"; + break; + case Oldest: + queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_DATE; + break; + case Size: + queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_SIZE; + break; + case Type: + queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_TYPE; + break; + default: + queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_LABEL; + break; + } + return queryString; + } } // end class diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index dbea63cb1c8..25d077f9807 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -260,7 +260,7 @@ public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id" MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, retrieved); mdcLogService.logEntry(entry); } - return ok(jsonbuilder.add("latestVersion", (latest != null) ? json(latest) : null)); + return ok(jsonbuilder.add("latestVersion", (latest != null) ? json(latest, true) : null)); }, getRequestUser(crc)); } @@ -466,31 +466,39 @@ public Response useDefaultCitationDate(@Context ContainerRequestContext crc, @Pa @GET @AuthRequired @Path("{id}/versions") - public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id ) { + public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id, @QueryParam("includeFiles") Boolean includeFiles, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset) { return response( req -> - ok( execCommand( new ListVersionsCommand(req, findDatasetOrDie(id)) ) + ok( execCommand( new ListVersionsCommand(req, findDatasetOrDie(id), offset, limit) ) .stream() - .map( d -> json(d) ) + .map( d -> json(d, includeFiles == null ? false : includeFiles) ) .collect(toJsonArray())), getRequestUser(crc)); } @GET @AuthRequired @Path("{id}/versions/{versionId}") - public Response getVersion(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + public Response getVersion(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @QueryParam("includeFiles") Boolean includeFiles, @Context UriInfo uriInfo, @Context HttpHeaders headers) { return response( req -> { DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); return (dsv == null || dsv.getId() == null) ? notFound("Dataset version not found") - : ok(json(dsv)); + : ok(json(dsv, includeFiles == null ? false : includeFiles)); }, getRequestUser(crc)); } @GET @AuthRequired @Path("{id}/versions/{versionId}/files") - public Response getVersionFiles(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - return response( req -> ok( jsonFileMetadatas( - getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getFileMetadatas())), getRequestUser(crc)); + public Response getVersionFiles(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset, @QueryParam("orderCriteria") String orderCriteria, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + return response( req -> { + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); + DatasetVersionServiceBean.FileMetadatasOrderCriteria fileMetadatasOrderCriteria; + try { + fileMetadatasOrderCriteria = orderCriteria != null ? DatasetVersionServiceBean.FileMetadatasOrderCriteria.valueOf(orderCriteria) : DatasetVersionServiceBean.FileMetadatasOrderCriteria.NameAZ; + } catch (IllegalArgumentException e) { + return error(Response.Status.BAD_REQUEST, "Invalid order criteria: " + orderCriteria); + } + return ok(jsonFileMetadatas(datasetversionService.getFileMetadatas(datasetVersion, limit, offset, fileMetadatasOrderCriteria))); + }, getRequestUser(crc)); } @GET @@ -708,7 +716,7 @@ public Response updateDraftVersion(@Context ContainerRequestContext crc, String } managedVersion = execCommand(new CreateDatasetVersionCommand(req, ds, incomingVersion)); } - return ok( json(managedVersion) ); + return ok( json(managedVersion, true) ); } catch (JsonParseException ex) { logger.log(Level.SEVERE, "Semantic error parsing dataset version Json: " + ex.getMessage(), ex); @@ -943,7 +951,7 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav DatasetVersion managedVersion = execCommand(new UpdateDatasetVersionCommand(ds, req)).getLatestVersion(); - return ok(json(managedVersion)); + return ok(json(managedVersion, true)); } catch (JsonParseException ex) { logger.log(Level.SEVERE, "Semantic error parsing dataset update Json: " + ex.getMessage(), ex); @@ -1092,7 +1100,7 @@ private Response processDatasetUpdate(String jsonBody, String id, DataverseReque } DatasetVersion managedVersion = execCommand(new UpdateDatasetVersionCommand(ds, req)).getLatestVersion(); - return ok(json(managedVersion)); + return ok(json(managedVersion, true)); } catch (JsonParseException ex) { logger.log(Level.SEVERE, "Semantic error parsing dataset update Json: " + ex.getMessage(), ex); @@ -3848,9 +3856,9 @@ public Response getPrivateUrlDatasetVersion(@PathParam("privateUrlToken") String JsonObjectBuilder responseJson; if (isAnonymizedAccess) { List anonymizedFieldTypeNamesList = new ArrayList<>(Arrays.asList(anonymizedFieldTypeNames.split(",\\s"))); - responseJson = json(dsv, anonymizedFieldTypeNamesList); + responseJson = json(dsv, anonymizedFieldTypeNamesList, true); } else { - responseJson = json(dsv); + responseJson = json(dsv, true); } return ok(responseJson); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index adbd132bce8..e36ba34a364 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -521,7 +521,7 @@ public static boolean validateDatasetMetadataExternally(Dataset ds, String execu // for the filter to whitelist by these attributes. try { - jsonMetadata = json(ds).add("datasetVersion", json(ds.getLatestVersion())) + jsonMetadata = json(ds).add("datasetVersion", json(ds.getLatestVersion(), true)) .add("sourceAddress", sourceAddressLabel) .add("userIdentifier", userIdentifier) .add("parentAlias", ds.getOwner().getAlias()) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java index 51283f29156..80a5fe9b080 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java @@ -23,23 +23,41 @@ */ // No permission needed to view published dvObjects @RequiredPermissions({}) -public class ListVersionsCommand extends AbstractCommand>{ - +public class ListVersionsCommand extends AbstractCommand> { + private final Dataset ds; + private final Integer limit; + private final Integer offset; - public ListVersionsCommand(DataverseRequest aRequest, Dataset aDataset) { - super(aRequest, aDataset); - ds = aDataset; - } + public ListVersionsCommand(DataverseRequest aRequest, Dataset aDataset) { + this(aRequest, aDataset, null, null); + } + + public ListVersionsCommand(DataverseRequest aRequest, Dataset aDataset, Integer offset, Integer limit) { + super(aRequest, aDataset); + ds = aDataset; + this.offset = offset; + this.limit = limit; + } - @Override - public List execute(CommandContext ctxt) throws CommandException { - List outputList = new LinkedList<>(); - for ( DatasetVersion dsv : ds.getVersions() ) { - if (dsv.isReleased() || ctxt.permissions().request( getRequest() ).on(ds).has(Permission.EditDataset)) { - outputList.add(dsv); + @Override + public List execute(CommandContext ctxt) throws CommandException { + + boolean includeUnpublished = ctxt.permissions().request(getRequest()).on(ds).has(Permission.EditDataset); + + if (offset == null && limit == null) { + // @todo: this fragment can be dropped, and the service-based method below + // can be used for both cases. + List outputList = new LinkedList<>(); + for (DatasetVersion dsv : ds.getVersions()) { + if (dsv.isReleased() || includeUnpublished) { + outputList.add(dsv); + } } - } - return outputList; - } + return outputList; + } else { + // Only a partial list (one "page"-worth) of versions is being requested + return ctxt.datasetVersion().findVersions(ds.getId(), offset, limit, includeUnpublished); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index b6026998bb7..dc8971c9539 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -368,11 +368,11 @@ public static JsonObjectBuilder json(FileDetailsHolder ds) { .add("mime",ds.getMime())); } - public static JsonObjectBuilder json(DatasetVersion dsv) { - return json(dsv, null); + public static JsonObjectBuilder json(DatasetVersion dsv, boolean includeFiles) { + return json(dsv, null, includeFiles); } - public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList) { + public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, boolean includeFiles) { Dataset dataset = dsv.getDataset(); JsonObjectBuilder bld = jsonObjectBuilder() .add("id", dsv.getId()).add("datasetId", dataset.getId()) @@ -415,7 +415,9 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized jsonByBlocks(dsv.getDatasetFields(), anonymizedFieldTypeNamesList) : jsonByBlocks(dsv.getDatasetFields()) ); - bld.add("files", jsonFileMetadatas(dsv.getFileMetadatas())); + if (includeFiles) { + bld.add("files", jsonFileMetadatas(dsv.getFileMetadatas())); + } return bld; } @@ -447,8 +449,8 @@ public static JsonObjectBuilder jsonDataFileList(List dataFiles){ * to the regular `json` method for DatasetVersion? Will anything break? * Unit tests for that method could not be found. */ - public static JsonObjectBuilder jsonWithCitation(DatasetVersion dsv) { - JsonObjectBuilder dsvWithCitation = JsonPrinter.json(dsv); + public static JsonObjectBuilder jsonWithCitation(DatasetVersion dsv, boolean includeFiles) { + JsonObjectBuilder dsvWithCitation = JsonPrinter.json(dsv, includeFiles); dsvWithCitation.add("citation", dsv.getCitation()); return dsvWithCitation; } @@ -467,7 +469,7 @@ public static JsonObjectBuilder jsonWithCitation(DatasetVersion dsv) { */ public static JsonObjectBuilder jsonAsDatasetDto(DatasetVersion dsv) { JsonObjectBuilder datasetDtoAsJson = JsonPrinter.json(dsv.getDataset()); - datasetDtoAsJson.add("datasetVersion", jsonWithCitation(dsv)); + datasetDtoAsJson.add("datasetVersion", jsonWithCitation(dsv, true)); return datasetDtoAsJson; } From de35ae7c65fc4b77704ab0cd5df4a9d31ec0dbad Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 21 Aug 2023 00:17:17 -0400 Subject: [PATCH 02/22] added left join hints to the full filemetadatas lookup. #9763 --- .../dataverse/DatasetVersionServiceBean.java | 25 +++++++++++++++---- .../harvard/iq/dataverse/api/Datasets.java | 1 + 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 27a4f4773d4..1edc281fa3e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -202,6 +202,8 @@ public DatasetVersion findDeep(Object pk) { * Additionally, provides the arguments for selecting a partial list of * (length-offset) versions for pagination, plus the ability to pre-select * only the publicly-viewable versions. + * It is recommended that individual software components utilize the + * ListVersionsCommand, instead of calling this service method directly. * @param datasetId * @param offset for pagination through long lists of versions * @param length for pagination through long lists of versions @@ -1297,11 +1299,24 @@ public List getUnarchivedDatasetVersions(){ public List getFileMetadatas(DatasetVersion datasetVersion, Integer limit, Integer offset, FileMetadatasOrderCriteria orderCriteria) { TypedQuery query = em.createQuery(getQueryStringFromFileMetadatasOrderCriteria(orderCriteria), FileMetadata.class) .setParameter("datasetVersionId", datasetVersion.getId()); - if (limit != null) { - query.setMaxResults(limit); - } - if (offset != null) { - query.setFirstResult(offset); + + if (limit == null && offset == null) { + query.setHint("eclipselink.left-join-fetch", "fm.dataFile.ingestRequest") + .setHint("eclipselink.left-join-fetch", "fm.dataFile.thumbnailForDataset") + .setHint("eclipselink.left-join-fetch", "fm.dataFile.dataTables") + .setHint("eclipselink.left-join-fetch", "fm.fileCategories") + .setHint("eclipselink.left-join-fetch", "fm.dataFile.embargo") + .setHint("eclipselink.left-join-fetch", "fm.datasetVersion") + .setHint("eclipselink.left-join-fetch", "fm.dataFile.releaseUser") + .setHint("eclipselink.left-join-fetch", "fm.dataFile.creator"); + } else { + // @todo: is there really no way to use offset-limit with left join hints? + if (limit != null) { + query.setMaxResults(limit); + } + if (offset != null) { + query.setFirstResult(offset); + } } return query.getResultList(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 25d077f9807..48755d4ea8a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -479,6 +479,7 @@ public Response listVersions(@Context ContainerRequestContext crc, @PathParam("i @Path("{id}/versions/{versionId}") public Response getVersion(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @QueryParam("includeFiles") Boolean includeFiles, @Context UriInfo uriInfo, @Context HttpHeaders headers) { return response( req -> { + // @todo: consider using DatasetVersionServiceBean.findDeep() here  DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); return (dsv == null || dsv.getId() == null) ? notFound("Dataset version not found") : ok(json(dsv, includeFiles == null ? false : includeFiles)); From 4cd62eb6ed0812fec031e9328d3595dd13616225 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 22 Aug 2023 13:23:49 -0400 Subject: [PATCH 03/22] (ongoing experiments; a lot of these changes are temporary and will be deleted) #9763 --- .../harvard/iq/dataverse/DatasetVersion.java | 4 ++- .../dataverse/DatasetVersionServiceBean.java | 31 +++++++++++++++++-- .../harvard/iq/dataverse/api/Datasets.java | 20 ++++++++++++ .../command/impl/ListVersionsCommand.java | 4 ++- .../search/SearchIncludeFragment.java | 1 + .../iq/dataverse/util/json/JsonPrinter.java | 5 +++ 6 files changed, 60 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 8d4dafad62a..f547f2963d1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -72,7 +72,9 @@ @NamedQuery(name = "DatasetVersion.findByDataset", query = "SELECT o FROM DatasetVersion o WHERE o.dataset.id=:datasetId ORDER BY o.versionNumber DESC, o.minorVersionNumber DESC"), @NamedQuery(name = "DatasetVersion.findReleasedByDataset", - query = "SELECT o FROM DatasetVersion o WHERE o.dataset.id=:datasetId AND o.versionState=edu.harvard.iq.dataverse.DatasetVersion.VersionState.RELEASED ORDER BY o.versionNumber DESC, o.minorVersionNumber DESC")}) + query = "SELECT o FROM DatasetVersion o WHERE o.dataset.id=:datasetId AND o.versionState=edu.harvard.iq.dataverse.DatasetVersion.VersionState.RELEASED ORDER BY o.versionNumber DESC, o.minorVersionNumber DESC")/*, + @NamedQuery(name = "DatasetVersion.findVersionElements", + query = "SELECT o.id, o.versionState, o.versionNumber, o.minorVersionNumber FROM DatasetVersion o WHERE o.dataset.id=:datasetId ORDER BY o.versionNumber DESC, o.minorVersionNumber DESC")*/}) @Entity diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 1edc281fa3e..fbed7d93cdd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1301,7 +1301,7 @@ public List getFileMetadatas(DatasetVersion datasetVersion, Intege .setParameter("datasetVersionId", datasetVersion.getId()); if (limit == null && offset == null) { - query.setHint("eclipselink.left-join-fetch", "fm.dataFile.ingestRequest") + query = query.setHint("eclipselink.left-join-fetch", "fm.dataFile.ingestRequest") .setHint("eclipselink.left-join-fetch", "fm.dataFile.thumbnailForDataset") .setHint("eclipselink.left-join-fetch", "fm.dataFile.dataTables") .setHint("eclipselink.left-join-fetch", "fm.fileCategories") @@ -1312,10 +1312,35 @@ public List getFileMetadatas(DatasetVersion datasetVersion, Intege } else { // @todo: is there really no way to use offset-limit with left join hints? if (limit != null) { - query.setMaxResults(limit); + query = query.setMaxResults(limit); } if (offset != null) { - query.setFirstResult(offset); + query = query.setFirstResult(offset); + } + } + return query.getResultList(); + } + + public List getFileMetadatasByDbId(Long versionId, Integer limit, Integer offset, FileMetadatasOrderCriteria orderCriteria) { + TypedQuery query = em.createQuery(getQueryStringFromFileMetadatasOrderCriteria(orderCriteria), FileMetadata.class) + .setParameter("datasetVersionId", versionId); + + if (limit == null && offset == null) { + query = query.setHint("eclipselink.left-join-fetch", "fm.dataFile.ingestRequest") + .setHint("eclipselink.left-join-fetch", "fm.dataFile.thumbnailForDataset") + .setHint("eclipselink.left-join-fetch", "fm.dataFile.dataTables") + .setHint("eclipselink.left-join-fetch", "fm.fileCategories") + .setHint("eclipselink.left-join-fetch", "fm.dataFile.embargo") + .setHint("eclipselink.left-join-fetch", "fm.datasetVersion") + .setHint("eclipselink.left-join-fetch", "fm.dataFile.releaseUser") + .setHint("eclipselink.left-join-fetch", "fm.dataFile.creator"); + } else { + // @todo: is there really no way to use offset-limit with left join hints? + if (limit != null) { + query = query.setMaxResults(limit); + } + if (offset != null) { + query = query.setFirstResult(offset); } } return query.getResultList(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 48755d4ea8a..47c249b7c8a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -467,6 +467,9 @@ public Response useDefaultCitationDate(@Context ContainerRequestContext crc, @Pa @AuthRequired @Path("{id}/versions") public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id, @QueryParam("includeFiles") Boolean includeFiles, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset) { + // @todo: when full versions list - including files - is requested, consider + // using datasetservice.findDeep() (needs testing on "monstrous" datasets + // with a lot of versions!) return response( req -> ok( execCommand( new ListVersionsCommand(req, findDatasetOrDie(id), offset, limit) ) .stream() @@ -502,6 +505,23 @@ public Response getVersionFiles(@Context ContainerRequestContext crc, @PathParam }, getRequestUser(crc)); } + //@todo: remember to delete this! (for experiments only!) + @GET + @AuthRequired + @Path("{id}/versions/{versionId}/files2") + public Response getVersionFiles2(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") Long versionId, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset, @QueryParam("orderCriteria") String orderCriteria, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + return response( req -> { + //DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); + DatasetVersionServiceBean.FileMetadatasOrderCriteria fileMetadatasOrderCriteria; + try { + fileMetadatasOrderCriteria = orderCriteria != null ? DatasetVersionServiceBean.FileMetadatasOrderCriteria.valueOf(orderCriteria) : DatasetVersionServiceBean.FileMetadatasOrderCriteria.NameAZ; + } catch (IllegalArgumentException e) { + return error(Response.Status.BAD_REQUEST, "Invalid order criteria: " + orderCriteria); + } + return ok(jsonFileMetadatas(datasetversionService.getFileMetadatasByDbId(versionId, limit, offset, fileMetadatasOrderCriteria))); + }, getRequestUser(crc)); + } + @GET @AuthRequired @Path("{id}/dirindex") diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java index 80a5fe9b080..d3675a8f206 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java @@ -47,7 +47,9 @@ public List execute(CommandContext ctxt) throws CommandException if (offset == null && limit == null) { // @todo: this fragment can be dropped, and the service-based method below - // can be used for both cases. + // can be used for both cases. + // @todo: on the other hand, consider using datasetservice.findDeep() + // when a full list of versions is requested. List outputList = new LinkedList<>(); for (DatasetVersion dsv : ds.getVersions()) { if (dsv.isReleased() || includeUnpublished) { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index e249b81c983..5c5dc8b5171 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -205,6 +205,7 @@ public String searchRedirect(String dataverseRedirectPage, Dataverse dataverseIn */ dataverse = dataverseIn; + logger.info("redirect page: "+dataverseRedirectPage); dataverseRedirectPage = StringUtils.isBlank(dataverseRedirectPage) ? "dataverse.xhtml" : dataverseRedirectPage; String optionalDataverseScope = "&alias=" + dataverse.getAlias(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index dc8971c9539..68f0be3a067 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -373,6 +373,9 @@ public static JsonObjectBuilder json(DatasetVersion dsv, boolean includeFiles) { } public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, boolean includeFiles) { + /* return json(dsv, null, includeFiles, null); + } + public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, boolean includeFiles, Long numberOfFiles) {*/ Dataset dataset = dsv.getDataset(); JsonObjectBuilder bld = jsonObjectBuilder() .add("id", dsv.getId()).add("datasetId", dataset.getId()) @@ -388,6 +391,8 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized .add("alternativePersistentId", dataset.getAlternativePersistentIdentifier()) .add("publicationDate", dataset.getPublicationDateFormattedYYYYMMDD()) .add("citationDate", dataset.getCitationDateFormattedYYYYMMDD()); + //.add("numberOfFiles", numberOfFiles); + License license = DatasetUtil.getLicense(dsv); if (license != null) { bld.add("license", jsonLicense(dsv)); From 4c28979b0b62f9244313a90ce74c8a7e22791671 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 29 Aug 2023 17:48:08 -0400 Subject: [PATCH 04/22] work in progress. --- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 47c249b7c8a..b1858b9982f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -470,11 +470,18 @@ public Response listVersions(@Context ContainerRequestContext crc, @PathParam("i // @todo: when full versions list - including files - is requested, consider // using datasetservice.findDeep() (needs testing on "monstrous" datasets // with a lot of versions!) - return response( req -> - ok( execCommand( new ListVersionsCommand(req, findDatasetOrDie(id), offset, limit) ) + + return response( req -> { + Dataset dataset = findDatasetOrDie(id); + if (includeFiles == null ? false : includeFiles) { + dataset = datasetService.findDeep(dataset.getId()); + } + //return ok( execCommand( new ListVersionsCommand(req, findDatasetOrDie(id), offset, limit) ) + return ok( execCommand( new ListVersionsCommand(req, dataset, offset, limit) ) .stream() .map( d -> json(d, includeFiles == null ? false : includeFiles) ) - .collect(toJsonArray())), getRequestUser(crc)); + .collect(toJsonArray())); + }, getRequestUser(crc)); } @GET From ccd6b7dfd6ea6bc5ae8ec09b0f34819f4adeda59 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 30 Aug 2023 16:58:57 -0400 Subject: [PATCH 05/22] made the "includeFiles" option true by default, cleaned up the ".findDeep()" logic. #9763 --- .../edu/harvard/iq/dataverse/Dataset.java | 8 ++-- .../dataverse/DatasetVersionServiceBean.java | 1 + .../harvard/iq/dataverse/api/Datasets.java | 40 ++++++------------- .../command/impl/ListVersionsCommand.java | 27 ++++++++++--- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 620e66c6c54..a6123a36c9d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -676,11 +676,11 @@ public Timestamp getCitationDate() { Timestamp citationDate = null; //Only calculate if this dataset doesn't use an alternate date field for publication date if (citationDateDatasetFieldType == null) { - List versions = this.versions; + //List versions = this.versions; // TODo - is this ever not version 1.0 (or draft if not published yet) - DatasetVersion oldest = versions.get(versions.size() - 1); + //DatasetVersion oldest = versions.get(versions.size() - 1); citationDate = super.getPublicationDate(); - if (oldest.isPublished()) { + /*if (oldest.isPublished()) { List fms = oldest.getFileMetadatas(); for (FileMetadata fm : fms) { Embargo embargo = fm.getDataFile().getEmbargo(); @@ -691,7 +691,7 @@ public Timestamp getCitationDate() { } } } - } + }*/ } return citationDate; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index fbed7d93cdd..6c514a2405c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -194,6 +194,7 @@ public DatasetVersion findDeep(Object pk) { .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.datasetVersion") .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.releaseUser") .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.creator") + .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.dataFileTags") .getSingleResult(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index b1858b9982f..23de46c1324 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -467,19 +467,13 @@ public Response useDefaultCitationDate(@Context ContainerRequestContext crc, @Pa @AuthRequired @Path("{id}/versions") public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id, @QueryParam("includeFiles") Boolean includeFiles, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset) { - // @todo: when full versions list - including files - is requested, consider - // using datasetservice.findDeep() (needs testing on "monstrous" datasets - // with a lot of versions!) return response( req -> { Dataset dataset = findDatasetOrDie(id); - if (includeFiles == null ? false : includeFiles) { - dataset = datasetService.findDeep(dataset.getId()); - } - //return ok( execCommand( new ListVersionsCommand(req, findDatasetOrDie(id), offset, limit) ) - return ok( execCommand( new ListVersionsCommand(req, dataset, offset, limit) ) + + return ok( execCommand( new ListVersionsCommand(req, dataset, offset, limit, (includeFiles == null ? true : includeFiles)) ) .stream() - .map( d -> json(d, includeFiles == null ? false : includeFiles) ) + .map( d -> json(d, includeFiles == null ? true : includeFiles) ) .collect(toJsonArray())); }, getRequestUser(crc)); } @@ -491,8 +485,15 @@ public Response getVersion(@Context ContainerRequestContext crc, @PathParam("id" return response( req -> { // @todo: consider using DatasetVersionServiceBean.findDeep() here  DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); - return (dsv == null || dsv.getId() == null) ? notFound("Dataset version not found") - : ok(json(dsv, includeFiles == null ? false : includeFiles)); + + if (dsv == null || dsv.getId() == null) { + return notFound("Dataset version not found"); + } + + if (includeFiles == null ? true : includeFiles) { + dsv = datasetversionService.findDeep(dsv.getId()); + } + return ok(json(dsv, includeFiles == null ? true : includeFiles)); }, getRequestUser(crc)); } @@ -512,23 +513,6 @@ public Response getVersionFiles(@Context ContainerRequestContext crc, @PathParam }, getRequestUser(crc)); } - //@todo: remember to delete this! (for experiments only!) - @GET - @AuthRequired - @Path("{id}/versions/{versionId}/files2") - public Response getVersionFiles2(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") Long versionId, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset, @QueryParam("orderCriteria") String orderCriteria, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - return response( req -> { - //DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); - DatasetVersionServiceBean.FileMetadatasOrderCriteria fileMetadatasOrderCriteria; - try { - fileMetadatasOrderCriteria = orderCriteria != null ? DatasetVersionServiceBean.FileMetadatasOrderCriteria.valueOf(orderCriteria) : DatasetVersionServiceBean.FileMetadatasOrderCriteria.NameAZ; - } catch (IllegalArgumentException e) { - return error(Response.Status.BAD_REQUEST, "Invalid order criteria: " + orderCriteria); - } - return ok(jsonFileMetadatas(datasetversionService.getFileMetadatasByDbId(versionId, limit, offset, fileMetadatasOrderCriteria))); - }, getRequestUser(crc)); - } - @GET @AuthRequired @Path("{id}/dirindex") diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java index d3675a8f206..b93833ffdf9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java @@ -14,6 +14,7 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import java.util.LinkedList; import java.util.List; @@ -28,16 +29,22 @@ public class ListVersionsCommand extends AbstractCommand> { private final Dataset ds; private final Integer limit; private final Integer offset; + private final Boolean deepLookup; public ListVersionsCommand(DataverseRequest aRequest, Dataset aDataset) { this(aRequest, aDataset, null, null); } - + public ListVersionsCommand(DataverseRequest aRequest, Dataset aDataset, Integer offset, Integer limit) { + this(aRequest, aDataset, null, null, false); + } + + public ListVersionsCommand(DataverseRequest aRequest, Dataset aDataset, Integer offset, Integer limit, boolean deepLookup) { super(aRequest, aDataset); ds = aDataset; this.offset = offset; this.limit = limit; + this.deepLookup = deepLookup; } @Override @@ -45,14 +52,22 @@ public List execute(CommandContext ctxt) throws CommandException boolean includeUnpublished = ctxt.permissions().request(getRequest()).on(ds).has(Permission.EditDataset); - if (offset == null && limit == null) { - // @todo: this fragment can be dropped, and the service-based method below - // can be used for both cases. - // @todo: on the other hand, consider using datasetservice.findDeep() - // when a full list of versions is requested. + if (offset == null && limit == null) { + List outputList = new LinkedList<>(); for (DatasetVersion dsv : ds.getVersions()) { if (dsv.isReleased() || includeUnpublished) { + if (deepLookup) { + // @todo: when "deep"/extended lookup is requested, and + // we call .findDeep() to look up each version again, + // there is probably a more economical way to obtain the + // numeric ids of the versions, by a direct single query, + // rather than go through ds.getVersions() like we are now. + dsv = ctxt.datasetVersion().findDeep(dsv.getId()); + if (dsv == null) { + throw new CommandExecutionException("Failed to look up full list of dataset versions", this); + } + } outputList.add(dsv); } } From 2d27c0392a2da21895aa9ff49bc62515ebb5faa1 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 31 Aug 2023 17:10:45 -0400 Subject: [PATCH 06/22] intermediate changes for the adjusted citation date. #9763 --- .../edu/harvard/iq/dataverse/Dataset.java | 21 +++++++++++++++++++ .../dataverse/DatasetVersionServiceBean.java | 1 + 2 files changed, 22 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index a6123a36c9d..f5a2f7cc6fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -158,6 +158,22 @@ public void setCitationDateDatasetFieldType(DatasetFieldType citationDateDataset this.citationDateDatasetFieldType = citationDateDatasetFieldType; } + // Per DataCite best practices, the citation date of a dataset may need + // to be adjusted to reflect the latest embargo availability date of any + // file within the first published version. + // If any files are embargoed in the first version, we will find calculate + // the date and cache it here. + private Timestamp embargoCitationDate; + + public Timestamp getEmbargoCitationDate() { + return embargoCitationDate; + } + + public void setEmbargoCitationDate(Timestamp embargoCitationDate) { + this.embargoCitationDate = embargoCitationDate; + } + + @ManyToOne @JoinColumn(name="template_id",nullable = true) @@ -680,6 +696,11 @@ public Timestamp getCitationDate() { // TODo - is this ever not version 1.0 (or draft if not published yet) //DatasetVersion oldest = versions.get(versions.size() - 1); citationDate = super.getPublicationDate(); + if (embargoCitationDate != null) { + if (citationDate.compareTo(embargoCitationDate) < 0) { + return embargoCitationDate; + } + } /*if (oldest.isPublished()) { List fms = oldest.getFileMetadatas(); for (FileMetadata fm : fms) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 6c514a2405c..d1a73358166 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1309,6 +1309,7 @@ public List getFileMetadatas(DatasetVersion datasetVersion, Intege .setHint("eclipselink.left-join-fetch", "fm.dataFile.embargo") .setHint("eclipselink.left-join-fetch", "fm.datasetVersion") .setHint("eclipselink.left-join-fetch", "fm.dataFile.releaseUser") + .setHint("eclipselink.left-join-fetch", "fm.dataFile.dataFileTags") .setHint("eclipselink.left-join-fetch", "fm.dataFile.creator"); } else { // @todo: is there really no way to use offset-limit with left join hints? From 7b1e799b4eaf3d70328b5237a41dc08622112de0 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 6 Sep 2023 09:54:32 -0400 Subject: [PATCH 07/22] Additional changes needed for the optimized "embargo publication date" aggregate. #9763 --- .../edu/harvard/iq/dataverse/Dataset.java | 4 +++ .../FinalizeDatasetPublicationCommand.java | 33 +++++++++++++++++-- .../V6.0.0.1__9763-embargocitationdate.sql | 14 ++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 src/main/resources/db/migration/V6.0.0.1__9763-embargocitationdate.sql diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index f5a2f7cc6fb..258806dad77 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -692,15 +692,19 @@ public Timestamp getCitationDate() { Timestamp citationDate = null; //Only calculate if this dataset doesn't use an alternate date field for publication date if (citationDateDatasetFieldType == null) { + // @todo: remove this commented-out code once/if the PR passes review - L.A. //List versions = this.versions; // TODo - is this ever not version 1.0 (or draft if not published yet) //DatasetVersion oldest = versions.get(versions.size() - 1); + // - I believe the answer is yes, the oldest versions will always be + // either 1.0 or draft - L.A. citationDate = super.getPublicationDate(); if (embargoCitationDate != null) { if (citationDate.compareTo(embargoCitationDate) < 0) { return embargoCitationDate; } } + // @todo: remove this commented-out code once/if the PR passes review - L.A. /*if (oldest.isPublished()) { List fms = oldest.getFileMetadatas(); for (FileMetadata fm : fms) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index f5e70209744..3da087addd9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -10,6 +10,7 @@ import edu.harvard.iq.dataverse.DatasetVersionUser; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.Embargo; import edu.harvard.iq.dataverse.UserNotification; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -117,9 +118,37 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // is this the first publication of the dataset? if (theDataset.getPublicationDate() == null) { theDataset.setReleaseUser((AuthenticatedUser) getUser()); - } - if ( theDataset.getPublicationDate() == null ) { + theDataset.setPublicationDate(new Timestamp(new Date().getTime())); + + // if there are any embargoed files in this version, we will save + // the latest availability date as the "embargoCitationDate" for future + // reference (if the files are not available yet, as of publishing of + // the dataset, this date will be used as the "ciatation date" of the dataset, + // instead of the publicatonDate, in compliance with the DataCite + // best practices). + // the code below replicates the logic that used to be in the method + // Dataset.getCitationDate() that calculated this adjusted date in real time. + + Timestamp latestEmbargoDate = null; + for (DataFile dataFile : theDataset.getFiles()) { + // this is the first version of the dataset that is being published. + // therefore we can iterate through .getFiles() instead of obtaining + // the DataFiles by going through the FileMetadatas in the current version. + Embargo embargo = dataFile.getEmbargo(); + if (embargo != null) { + // "dataAvailable" is not nullable in the Embargo class, no need for a null check + Timestamp embargoDate = Timestamp.valueOf(embargo.getDateAvailable().atStartOfDay()); + if (latestEmbargoDate == null || latestEmbargoDate.compareTo(embargoDate) < 0) { + latestEmbargoDate = embargoDate; + } + } + } + // the above loop could be easily replaced with a database query; + // but we iterate through .getFiles() elsewhere in the command, when + // updating and/or registering the files, so it should not result in + // an extra performance hit. + theDataset.setEmbargoCitationDate(latestEmbargoDate); } //Clear any external status diff --git a/src/main/resources/db/migration/V6.0.0.1__9763-embargocitationdate.sql b/src/main/resources/db/migration/V6.0.0.1__9763-embargocitationdate.sql new file mode 100644 index 00000000000..536798015ba --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.1__9763-embargocitationdate.sql @@ -0,0 +1,14 @@ +-- An aggregated timestamp which is the latest of the availability dates of any embargoed files in the first published version, if present +ALTER TABLE dataset ADD COLUMN IF NOT EXISTS embargoCitationDate timestamp without time zone; +-- ... and an update query that will populate this column for all the published datasets with embargoed files in the first released version: +UPDATE dataset SET embargocitationdate=o.embargocitationdate +FROM (SELECT d.id, MAX(e.dateavailable) AS embargocitationdate +FROM embargo e, dataset d, datafile f, datasetversion v, filemetadata m +WHERE v.dataset_id = d.id +AND v.versionstate = 'RELEASED' +AND v.versionnumber = 1 +AND v.minorversionnumber = 0 +AND f.embargo_id = e.id +AND m.datasetversion_id = v.id +AND m.datafile_id = f.id GROUP BY d.id) o WHERE o.id = dataset.id; +-- (the query follows the logic that used to be in the method Dataset.getCitationDate() that calculated this adjusted date in real time). From fd30fd53e521a786b59f48df786ab4b17366aa6b Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 6 Sep 2023 10:19:05 -0400 Subject: [PATCH 08/22] removing a comment (#9763) --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 23de46c1324..1d7244fd6e7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -483,7 +483,6 @@ public Response listVersions(@Context ContainerRequestContext crc, @PathParam("i @Path("{id}/versions/{versionId}") public Response getVersion(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @QueryParam("includeFiles") Boolean includeFiles, @Context UriInfo uriInfo, @Context HttpHeaders headers) { return response( req -> { - // @todo: consider using DatasetVersionServiceBean.findDeep() here  DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); if (dsv == null || dsv.getId() == null) { From b74affc942e287329bf2aed0e7900d89fcf8bc5e Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 6 Sep 2023 10:49:05 -0400 Subject: [PATCH 09/22] a short release note (#9763) --- doc/release-notes/9763-versions-api-improvements.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 doc/release-notes/9763-versions-api-improvements.md diff --git a/doc/release-notes/9763-versions-api-improvements.md b/doc/release-notes/9763-versions-api-improvements.md new file mode 100644 index 00000000000..2c2374dd9b6 --- /dev/null +++ b/doc/release-notes/9763-versions-api-improvements.md @@ -0,0 +1,4 @@ +# Some improvements have been added to the /versions API + +See the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide for more information. + From 2324fe14bdc13c291fdf606ff4187183262e5f0a Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 6 Sep 2023 11:10:16 -0400 Subject: [PATCH 10/22] changed the guide to reflect the fact that the includeFiles flag defaults to "true". (#9763) --- doc/sphinx-guides/source/api/native-api.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index da3fbfffa73..1234e215f0b 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -830,7 +830,7 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl "https://demo.dataverse.org/api/datasets/24/versions?includeFiles=true" + curl "https://demo.dataverse.org/api/datasets/24/versions" It returns a list of versions with their metadata, and file list: @@ -883,7 +883,7 @@ It returns a list of versions with their metadata, and file list: ] } -The optional ``includeFiles`` parameter specifies whether the files should be listed in the output. It defaults to ``false``. (Note that for a dataset with a large number of versions and/or files having the files included can dramatically increase the volume of the output). A separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. +The optional ``includeFiles`` parameter specifies whether the files should be listed in the output. It defaults to ``true``, preserving backward compatibility. (Note that for a dataset with a large number of versions and/or files having the files included can dramatically increase the volume of the output). A separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. The optional ``offset`` and ``limit`` parameters can be used to specify the range of the versions list to be shown. This can be used to paginate through the list in a dataset with a large number of versions. @@ -899,15 +899,15 @@ Get Version of a Dataset export ID=24 export VERSION=1.0 - curl "$SERVER_URL/api/datasets/$ID/versions/$VERSION" + curl "$SERVER_URL/api/datasets/$ID/versions/$VERSION?includeFiles=false" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl "https://demo.dataverse.org/api/datasets/24/versions/1.0" + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0?includeFiles=false" -The optional ``includeFiles`` parameter specifies whether the files should be listed in the output (defaults to ``false``). Note that a separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. +The optional ``includeFiles`` parameter specifies whether the files should be listed in the output (defaults to ``true``). Note that a separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. .. _export-dataset-metadata-api: From 35835e40390442cac77fd5d38731b2e50d7b6560 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 6 Sep 2023 11:32:42 -0400 Subject: [PATCH 11/22] extended the release note. (#9763) --- doc/release-notes/9763-versions-api-improvements.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/9763-versions-api-improvements.md b/doc/release-notes/9763-versions-api-improvements.md index 2c2374dd9b6..191afe8176f 100644 --- a/doc/release-notes/9763-versions-api-improvements.md +++ b/doc/release-notes/9763-versions-api-improvements.md @@ -1,4 +1,8 @@ # Some improvements have been added to the /versions API -See the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide for more information. +- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions; +- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output; +- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. + +This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. From 9a9d7d61e95262be66970b1fda41cdfa15def540 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 6 Sep 2023 11:39:10 -0400 Subject: [PATCH 12/22] cosmetic change in the release note (#9763) --- doc/release-notes/9763-versions-api-improvements.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/9763-versions-api-improvements.md b/doc/release-notes/9763-versions-api-improvements.md index 191afe8176f..8d7f6c7a20a 100644 --- a/doc/release-notes/9763-versions-api-improvements.md +++ b/doc/release-notes/9763-versions-api-improvements.md @@ -1,4 +1,4 @@ -# Some improvements have been added to the /versions API +# Improvements in the /versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions; - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output; From d465b209c7cded84ff8d08799f7f4f42fb489fb2 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 6 Sep 2023 11:45:58 -0400 Subject: [PATCH 13/22] cosmetic change, comment text (#9763) --- src/main/java/edu/harvard/iq/dataverse/Dataset.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 258806dad77..ca5a8dd2b81 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -161,8 +161,9 @@ public void setCitationDateDatasetFieldType(DatasetFieldType citationDateDataset // Per DataCite best practices, the citation date of a dataset may need // to be adjusted to reflect the latest embargo availability date of any // file within the first published version. - // If any files are embargoed in the first version, we will find calculate - // the date and cache it here. + // If any files are embargoed in the first version, this date will be + // calculated and cached here upon its publication, in the + // FinalizeDatasetPublicationCommand. private Timestamp embargoCitationDate; public Timestamp getEmbargoCitationDate() { From ee36dee64a128942ad4412a5f64e1a1336a3063c Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 6 Sep 2023 12:53:32 -0400 Subject: [PATCH 14/22] removed a noisy logging line that got checked in by mistake in an earlier PR, as part of a quick fix for #9803 --- .../edu/harvard/iq/dataverse/search/SearchIncludeFragment.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 0dfad74bedf..2ce06541afa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -205,7 +205,6 @@ public String searchRedirect(String dataverseRedirectPage, Dataverse dataverseIn */ dataverse = dataverseIn; - logger.info("redirect page: "+dataverseRedirectPage); dataverseRedirectPage = StringUtils.isBlank(dataverseRedirectPage) ? "dataverse.xhtml" : dataverseRedirectPage; String optionalDataverseScope = "&alias=" + dataverse.getAlias(); From bfe7f9c3537a89b75fd3190d063433c8f6147f96 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 13 Sep 2023 10:56:59 -0400 Subject: [PATCH 15/22] RestAssured tests for the new functionality added to the /versions api. (#9763) --- .../harvard/iq/dataverse/api/DatasetsIT.java | 85 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 47 +++++++++- 2 files changed, 130 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index b353b4488d0..d5b3dbca05a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -555,6 +555,91 @@ public void testCreatePublishDestroyDataset() { } + /** + * The apis (/api/datasets/{id}/versions and /api/datasets/{id}/versions/{vid} + * are called from other RestAssured tests, in this class and also FileIT. + * But this test is dedicated to this api specifically, and focuses on the + * functionality added to it in 6.1. + */ + @Test + public void testDatasetVersionsAPI() { + // Create user + String apiToken = UtilIT.createRandomUserGetToken(); + + // Create user with no permission + String apiTokenNoPerms = UtilIT.createRandomUserGetToken(); + + // Create Collection + String collectionAlias = UtilIT.createRandomCollectionGetAlias(apiToken); + + // Create Dataset + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(collectionAlias, apiToken); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = JsonPath.from(createDataset.asString()).getString("data.persistentId"); + + // Upload file + String pathToFile = "src/main/webapp/resources/images/dataverseproject.png"; + Response uploadResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + + Integer fileId = JsonPath.from(uploadResponse.body().asString()).getInt("data.files[0].dataFile.id"); + + // Check that the file we just uploaded is shown by the versions api: + Response unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, ":draft", apiToken); + unpublishedDraft.prettyPrint(); + unpublishedDraft.then().assertThat() + .body("data.files.size()", equalTo(1)) + .statusCode(OK.getStatusCode()); + + // Now check that the file is NOT shown, when we ask the versions api to + // skip files: + boolean skipFiles = true; + unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, ":draft", apiToken, skipFiles); + unpublishedDraft.prettyPrint(); + unpublishedDraft.then().assertThat() + .body("data.files", equalTo(null)) + .statusCode(OK.getStatusCode()); + + // Publish collection and dataset + UtilIT.publishDataverseViaNativeApi(collectionAlias, apiToken).then().assertThat().statusCode(OK.getStatusCode()); + UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken).then().assertThat().statusCode(OK.getStatusCode()); + + // Upload another file: + String pathToFile2 = "src/main/webapp/resources/images/cc0.png"; + Response uploadResponse2 = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile2, apiToken); + uploadResponse2.then().assertThat().statusCode(OK.getStatusCode()); + + // We should now have a published version, and a draft. + + // Call /versions api, *with the owner api token*, make sure both + // versions are listed + Response versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiToken); + versionsResponse.prettyPrint(); + versionsResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.size()", equalTo(2)); + + // And now call it with an un-privileged token, to make sure only one + // (the published one) version is shown: + + versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiTokenNoPerms); + versionsResponse.prettyPrint(); + versionsResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.size()", equalTo(1)); + + // And now call the "short", no-files version of the same api + versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiTokenNoPerms, skipFiles); + versionsResponse.prettyPrint(); + versionsResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].files", equalTo(null)); + } + + /** * This test requires the root dataverse to be published to pass. */ diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index e47971f9b92..678d4e5523b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -9,6 +9,7 @@ import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; +import static jakarta.ws.rs.core.Response.Status.CREATED; import java.io.File; import java.io.IOException; @@ -51,7 +52,6 @@ import java.util.Collections; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.*; -import static org.junit.jupiter.api.Assertions.*; public class UtilIT { @@ -119,6 +119,16 @@ public static Response createRandomUser() { return createRandomUser("user"); } + + /** + * A convenience method for creating a random test user, when all you need + * is the api token. + * @return apiToken + */ + public static String createRandomUserGetToken(){ + Response createUser = createRandomUser(); + return getApiTokenFromResponse(createUser); + } public static Response createUser(String username, String email) { logger.info("Creating user " + username); @@ -369,6 +379,20 @@ static Response createRandomDataverse(String apiToken) { String category = null; return createDataverse(alias, category, apiToken); } + + /** + * A convenience method for creating a random collection and getting its + * alias in one step. + * @param apiToken + * @return alias + */ + static String createRandomCollectionGetAlias(String apiToken){ + + Response createCollectionResponse = createRandomDataverse(apiToken); + //createDataverseResponse.prettyPrint(); + createCollectionResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + return UtilIT.getAliasFromResponse(createCollectionResponse); + } static Response showDataverseContents(String alias, String apiToken) { return given() @@ -1403,9 +1427,17 @@ static Response nativeGetUsingPersistentId(String persistentId, String apiToken) } static Response getDatasetVersion(String persistentId, String versionNumber, String apiToken) { + return getDatasetVersion(persistentId, versionNumber, apiToken, false); + } + + static Response getDatasetVersion(String persistentId, String versionNumber, String apiToken, boolean skipFiles) { return given() .header(API_TOKEN_HTTP_HEADER, apiToken) - .get("/api/datasets/:persistentId/versions/" + versionNumber + "?persistentId=" + persistentId); + .get("/api/datasets/:persistentId/versions/" + + versionNumber + + "?persistentId=" + + persistentId + + (skipFiles ? "&includeFiles=false" : "")); } static Response getMetadataBlockFromDatasetVersion(String persistentId, String versionNumber, String metadataBlock, String apiToken) { @@ -1767,6 +1799,10 @@ static Response removeDatasetThumbnail(String datasetPersistentId, String apiTok } static Response getDatasetVersions(String idOrPersistentId, String apiToken) { + return getDatasetVersions(idOrPersistentId, apiToken, false); + } + + static Response getDatasetVersions(String idOrPersistentId, String apiToken, boolean skipFiles) { logger.info("Getting Dataset Versions"); String idInPath = idOrPersistentId; // Assume it's a number. String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. @@ -1774,6 +1810,13 @@ static Response getDatasetVersions(String idOrPersistentId, String apiToken) { idInPath = ":persistentId"; optionalQueryParam = "?persistentId=" + idOrPersistentId; } + if (skipFiles) { + if ("".equals(optionalQueryParam)) { + optionalQueryParam = "?includeFiles=false"; + } else { + optionalQueryParam = optionalQueryParam.concat("&includeFiles=false"); + } + } RequestSpecification requestSpecification = given(); if (apiToken != null) { requestSpecification = given() From 8e894c37a17ce184bb3c59eb027dc03ed0f21274 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 13 Sep 2023 11:42:25 -0400 Subject: [PATCH 16/22] added another test, for the pagination functionality in the /versions api (also being added in 6.1). #9763 --- .../harvard/iq/dataverse/api/DatasetsIT.java | 26 ++++++++++++++----- .../edu/harvard/iq/dataverse/api/UtilIT.java | 22 ++++++++++++++++ 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index d5b3dbca05a..4a0e1c857c7 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -557,7 +557,7 @@ public void testCreatePublishDestroyDataset() { /** * The apis (/api/datasets/{id}/versions and /api/datasets/{id}/versions/{vid} - * are called from other RestAssured tests, in this class and also FileIT. + * are already called from other RestAssured tests, in this class and also FileIT. * But this test is dedicated to this api specifically, and focuses on the * functionality added to it in 6.1. */ @@ -584,8 +584,6 @@ public void testDatasetVersionsAPI() { String pathToFile = "src/main/webapp/resources/images/dataverseproject.png"; Response uploadResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); - - Integer fileId = JsonPath.from(uploadResponse.body().asString()).getInt("data.files[0].dataFile.id"); // Check that the file we just uploaded is shown by the versions api: Response unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, ":draft", apiToken); @@ -615,13 +613,27 @@ public void testDatasetVersionsAPI() { // We should now have a published version, and a draft. // Call /versions api, *with the owner api token*, make sure both - // versions are listed + // versions are listed; also check that the correct numbers of files + // are shown in each version (2 in the draft, 1 in the published). Response versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiToken); versionsResponse.prettyPrint(); versionsResponse.then().assertThat() .statusCode(OK.getStatusCode()) - .body("data.size()", equalTo(2)); - + .body("data.size()", equalTo(2)) + .body("data[0].files.size()", equalTo(2)) + .body("data[1].files.size()", equalTo(1)); + + // Now call the this api with the new (as of 6.1) pagination parameters + Integer offset = 0; + Integer howmany = 1; + versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiToken, offset, howmany); + // (the above should return only one version, the draft) + versionsResponse.prettyPrint(); + versionsResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.size()", equalTo(1)) + .body("data[0].files.size()", equalTo(2)); + // And now call it with an un-privileged token, to make sure only one // (the published one) version is shown: @@ -630,7 +642,7 @@ public void testDatasetVersionsAPI() { versionsResponse.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.size()", equalTo(1)); - + // And now call the "short", no-files version of the same api versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiTokenNoPerms, skipFiles); versionsResponse.prettyPrint(); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 678d4e5523b..f94cfa8e400 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1803,6 +1803,14 @@ static Response getDatasetVersions(String idOrPersistentId, String apiToken) { } static Response getDatasetVersions(String idOrPersistentId, String apiToken, boolean skipFiles) { + return getDatasetVersions(idOrPersistentId, apiToken, null, null, skipFiles); + } + + static Response getDatasetVersions(String idOrPersistentId, String apiToken, Integer offset, Integer limit) { + return getDatasetVersions(idOrPersistentId, apiToken, offset, limit, false); + } + + static Response getDatasetVersions(String idOrPersistentId, String apiToken, Integer offset, Integer limit, boolean skipFiles) { logger.info("Getting Dataset Versions"); String idInPath = idOrPersistentId; // Assume it's a number. String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. @@ -1817,6 +1825,20 @@ static Response getDatasetVersions(String idOrPersistentId, String apiToken, boo optionalQueryParam = optionalQueryParam.concat("&includeFiles=false"); } } + if (offset != null) { + if ("".equals(optionalQueryParam)) { + optionalQueryParam = "?offset="+offset; + } else { + optionalQueryParam = optionalQueryParam.concat("&offset="+offset); + } + } + if (limit != null) { + if ("".equals(optionalQueryParam)) { + optionalQueryParam = "?limit="+limit; + } else { + optionalQueryParam = optionalQueryParam.concat("&limit="+limit); + } + } RequestSpecification requestSpecification = given(); if (apiToken != null) { requestSpecification = given() From b9e99f3e7253d836aadebac8b128efa21027eef8 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 13 Sep 2023 11:43:42 -0400 Subject: [PATCH 17/22] typo in a comment. #9763 --- src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 4a0e1c857c7..e726337cf8b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -557,7 +557,7 @@ public void testCreatePublishDestroyDataset() { /** * The apis (/api/datasets/{id}/versions and /api/datasets/{id}/versions/{vid} - * are already called from other RestAssured tests, in this class and also FileIT. + * are already called from other RestAssured tests, in this class and also in FilesIT. * But this test is dedicated to this api specifically, and focuses on the * functionality added to it in 6.1. */ From f164a681deaf14d27ee5fb35a344805d86ac631b Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 13 Sep 2023 11:46:27 -0400 Subject: [PATCH 18/22] more typos in comments. (#9763) --- src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index e726337cf8b..23fc5911ad0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -623,7 +623,7 @@ public void testDatasetVersionsAPI() { .body("data[0].files.size()", equalTo(2)) .body("data[1].files.size()", equalTo(1)); - // Now call the this api with the new (as of 6.1) pagination parameters + // Now call this api with the new (as of 6.1) pagination parameters Integer offset = 0; Integer howmany = 1; versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiToken, offset, howmany); @@ -635,7 +635,7 @@ public void testDatasetVersionsAPI() { .body("data[0].files.size()", equalTo(2)); // And now call it with an un-privileged token, to make sure only one - // (the published one) version is shown: + // (the published) version is shown: versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiTokenNoPerms); versionsResponse.prettyPrint(); From 18cdf133f49d597da6aea9d21385e45b77844ceb Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 11 Oct 2023 14:48:51 -0400 Subject: [PATCH 19/22] stripping more dead code in the version service bean (my experimental filemetadatas retrieval method, not directly used in the PR). (#9763) --- .../dataverse/DatasetVersionServiceBean.java | 88 ------------------- 1 file changed, 88 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 476a306e081..c2f9027a38a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -49,22 +49,6 @@ public class DatasetVersionServiceBean implements java.io.Serializable { private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); - private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_LABEL = "SELECT fm FROM FileMetadata fm" - + " WHERE fm.datasetVersion.id=:datasetVersionId" - + " ORDER BY fm.label"; - private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_DATE = "SELECT fm FROM FileMetadata fm, DvObject dvo" - + " WHERE fm.datasetVersion.id = :datasetVersionId" - + " AND fm.dataFile.id = dvo.id" - + " ORDER BY CASE WHEN dvo.publicationDate IS NOT NULL THEN dvo.publicationDate ELSE dvo.createDate END"; - private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_SIZE = "SELECT fm FROM FileMetadata fm, DataFile df" - + " WHERE fm.datasetVersion.id = :datasetVersionId" - + " AND fm.dataFile.id = df.id" - + " ORDER BY df.filesize"; - private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_TYPE = "SELECT fm FROM FileMetadata fm, DataFile df" - + " WHERE fm.datasetVersion.id = :datasetVersionId" - + " AND fm.dataFile.id = df.id" - + " ORDER BY df.contentType"; - @EJB DatasetServiceBean datasetService; @@ -166,18 +150,6 @@ public DatasetVersion getDatasetVersion(){ } } // end RetrieveDatasetVersionResponse - /** - * Different criteria to sort the results of FileMetadata queries used in {@link DatasetVersionServiceBean#getFileMetadatas} - */ - public enum FileMetadatasOrderCriteria { - NameAZ, - NameZA, - Newest, - Oldest, - Size, - Type - } - public DatasetVersion find(Object pk) { return em.find(DatasetVersion.class, pk); } @@ -1287,64 +1259,4 @@ public List getUnarchivedDatasetVersions(){ return null; } } // end getUnarchivedDatasetVersions - - /** - * Returns a FileMetadata list of files in the specified DatasetVersion - * - * @param datasetVersion the DatasetVersion to access - * @param limit for pagination, can be null - * @param offset for pagination, can be null - * @param orderCriteria a FileMetadatasOrderCriteria to order the results - * @return a FileMetadata list of the specified DatasetVersion - */ - public List getFileMetadatas(DatasetVersion datasetVersion, Integer limit, Integer offset, FileMetadatasOrderCriteria orderCriteria) { - TypedQuery query = em.createQuery(getQueryStringFromFileMetadatasOrderCriteria(orderCriteria), FileMetadata.class) - .setParameter("datasetVersionId", datasetVersion.getId()); - - if (limit == null && offset == null) { - query = query.setHint("eclipselink.left-join-fetch", "fm.dataFile.ingestRequest") - .setHint("eclipselink.left-join-fetch", "fm.dataFile.thumbnailForDataset") - .setHint("eclipselink.left-join-fetch", "fm.dataFile.dataTables") - .setHint("eclipselink.left-join-fetch", "fm.fileCategories") - .setHint("eclipselink.left-join-fetch", "fm.dataFile.embargo") - .setHint("eclipselink.left-join-fetch", "fm.datasetVersion") - .setHint("eclipselink.left-join-fetch", "fm.dataFile.releaseUser") - .setHint("eclipselink.left-join-fetch", "fm.dataFile.dataFileTags") - .setHint("eclipselink.left-join-fetch", "fm.dataFile.creator"); - } else { - // @todo: is there really no way to use offset-limit with left join hints? - if (limit != null) { - query = query.setMaxResults(limit); - } - if (offset != null) { - query = query.setFirstResult(offset); - } - } - return query.getResultList(); - } - - private String getQueryStringFromFileMetadatasOrderCriteria(FileMetadatasOrderCriteria orderCriteria) { - String queryString; - switch (orderCriteria) { - case NameZA: - queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_LABEL + " DESC"; - break; - case Newest: - queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_DATE + " DESC"; - break; - case Oldest: - queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_DATE; - break; - case Size: - queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_SIZE; - break; - case Type: - queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_TYPE; - break; - default: - queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_LABEL; - break; - } - return queryString; - } } // end class From 381ddf59088808a536d58498e60514e1ea8557b8 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 11 Oct 2023 15:22:52 -0400 Subject: [PATCH 20/22] more commented-out code that needed to be removed before finalizing the pr. (#9763) --- .../edu/harvard/iq/dataverse/Dataset.java | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 692a2ba0245..245bdf0efd2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -693,31 +693,12 @@ public Timestamp getCitationDate() { Timestamp citationDate = null; //Only calculate if this dataset doesn't use an alternate date field for publication date if (citationDateDatasetFieldType == null) { - // @todo: remove this commented-out code once/if the PR passes review - L.A. - //List versions = this.versions; - // TODo - is this ever not version 1.0 (or draft if not published yet) - //DatasetVersion oldest = versions.get(versions.size() - 1); - // - I believe the answer is yes, the oldest versions will always be - // either 1.0 or draft - L.A. citationDate = super.getPublicationDate(); if (embargoCitationDate != null) { if (citationDate.compareTo(embargoCitationDate) < 0) { return embargoCitationDate; } } - // @todo: remove this commented-out code once/if the PR passes review - L.A. - /*if (oldest.isPublished()) { - List fms = oldest.getFileMetadatas(); - for (FileMetadata fm : fms) { - Embargo embargo = fm.getDataFile().getEmbargo(); - if (embargo != null) { - Timestamp embDate = Timestamp.valueOf(embargo.getDateAvailable().atStartOfDay()); - if (citationDate.compareTo(embDate) < 0) { - citationDate = embDate; - } - } - } - }*/ } return citationDate; } From 4b5ad8fac1c1733c73ad0e2f5d7e1e47155895bc Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 13 Oct 2023 10:04:25 -0400 Subject: [PATCH 21/22] rename sql script #9763 avoid conflict with V6.0.0.1__9599-guestbook-at-request.sql --- ...rgocitationdate.sql => V6.0.0.2__9763-embargocitationdate.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V6.0.0.1__9763-embargocitationdate.sql => V6.0.0.2__9763-embargocitationdate.sql} (100%) diff --git a/src/main/resources/db/migration/V6.0.0.1__9763-embargocitationdate.sql b/src/main/resources/db/migration/V6.0.0.2__9763-embargocitationdate.sql similarity index 100% rename from src/main/resources/db/migration/V6.0.0.1__9763-embargocitationdate.sql rename to src/main/resources/db/migration/V6.0.0.2__9763-embargocitationdate.sql From f47867ee34e93e14efaca2fba414e202d234c1c6 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 15 Oct 2023 19:24:09 -0400 Subject: [PATCH 22/22] renaming the flyway script since 6.0.0.1 has already been merged. (#9763) --- ...rgocitationdate.sql => V6.0.0.2__9763-embargocitationdate.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V6.0.0.1__9763-embargocitationdate.sql => V6.0.0.2__9763-embargocitationdate.sql} (100%) diff --git a/src/main/resources/db/migration/V6.0.0.1__9763-embargocitationdate.sql b/src/main/resources/db/migration/V6.0.0.2__9763-embargocitationdate.sql similarity index 100% rename from src/main/resources/db/migration/V6.0.0.1__9763-embargocitationdate.sql rename to src/main/resources/db/migration/V6.0.0.2__9763-embargocitationdate.sql