-
Notifications
You must be signed in to change notification settings - Fork 492
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Versions API improvements (9763) #9883
Changes from 16 commits
a835f5d
de35ae7
4cd62eb
1c93978
4c28979
ccd6b7d
2d27c03
e08f26a
7b1e799
fd30fd5
b74affc
2324fe1
35835e4
9a9d7d6
d465b20
ee36dee
77dc0b5
bfe7f9c
8e894c3
b9e99f3
f164a68
66ab0c0
b5be877
18cdf13
381ddf5
4377e12
4b5ad8f
402ccfb
f47867e
a1349ba
037e2d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Improvements in the /versions API | ||
|
||
- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions; | ||
- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output; | ||
- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. | ||
|
||
This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,7 +48,23 @@ public class DatasetVersionServiceBean implements java.io.Serializable { | |
private static final Logger logger = Logger.getLogger(DatasetVersionServiceBean.class.getCanonicalName()); | ||
|
||
private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); | ||
|
||
|
||
private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_LABEL = "SELECT fm FROM FileMetadata fm" | ||
+ " WHERE fm.datasetVersion.id=:datasetVersionId" | ||
+ " ORDER BY fm.label"; | ||
private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_DATE = "SELECT fm FROM FileMetadata fm, DvObject dvo" | ||
+ " WHERE fm.datasetVersion.id = :datasetVersionId" | ||
+ " AND fm.dataFile.id = dvo.id" | ||
+ " ORDER BY CASE WHEN dvo.publicationDate IS NOT NULL THEN dvo.publicationDate ELSE dvo.createDate END"; | ||
private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_SIZE = "SELECT fm FROM FileMetadata fm, DataFile df" | ||
+ " WHERE fm.datasetVersion.id = :datasetVersionId" | ||
+ " AND fm.dataFile.id = df.id" | ||
+ " ORDER BY df.filesize"; | ||
private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_TYPE = "SELECT fm FROM FileMetadata fm, DataFile df" | ||
+ " WHERE fm.datasetVersion.id = :datasetVersionId" | ||
+ " AND fm.dataFile.id = df.id" | ||
+ " ORDER BY df.contentType"; | ||
|
||
@EJB | ||
DatasetServiceBean datasetService; | ||
|
||
|
@@ -149,7 +165,19 @@ public DatasetVersion getDatasetVersion(){ | |
return this.datasetVersionForResponse; | ||
} | ||
} // end RetrieveDatasetVersionResponse | ||
|
||
|
||
/** | ||
* Different criteria to sort the results of FileMetadata queries used in {@link DatasetVersionServiceBean#getFileMetadatas} | ||
*/ | ||
public enum FileMetadatasOrderCriteria { | ||
NameAZ, | ||
NameZA, | ||
Newest, | ||
Oldest, | ||
Size, | ||
Type | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I recognize this from #9693. Does it matter which PR is merged first? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer #9693 to be merged first, and then resolve the conflict in this branch (I added a database lookup optimization on top of the code introduced in that PR). |
||
} | ||
|
||
public DatasetVersion find(Object pk) { | ||
return em.find(DatasetVersion.class, pk); | ||
} | ||
|
@@ -166,9 +194,44 @@ public DatasetVersion findDeep(Object pk) { | |
.setHint("eclipselink.left-join-fetch", "o.fileMetadatas.datasetVersion") | ||
.setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.releaseUser") | ||
.setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.creator") | ||
.setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.dataFileTags") | ||
.getSingleResult(); | ||
} | ||
|
||
|
||
/** | ||
* Performs the same database lookup as the one behind Dataset.getVersions(). | ||
* Additionally, provides the arguments for selecting a partial list of | ||
* (length-offset) versions for pagination, plus the ability to pre-select | ||
* only the publicly-viewable versions. | ||
* It is recommended that individual software components utilize the | ||
* ListVersionsCommand, instead of calling this service method directly. | ||
* @param datasetId | ||
* @param offset for pagination through long lists of versions | ||
* @param length for pagination through long lists of versions | ||
* @param includeUnpublished retrieves all the versions, including drafts and deaccessioned. | ||
* @return (partial) list of versions | ||
*/ | ||
public List<DatasetVersion> findVersions(Long datasetId, Integer offset, Integer length, boolean includeUnpublished) { | ||
TypedQuery<DatasetVersion> query; | ||
if (includeUnpublished) { | ||
query = em.createNamedQuery("DatasetVersion.findByDataset", DatasetVersion.class); | ||
} else { | ||
query = em.createNamedQuery("DatasetVersion.findReleasedByDataset", DatasetVersion.class) | ||
.setParameter("datasetId", datasetId); | ||
} | ||
|
||
query.setParameter("datasetId", datasetId); | ||
|
||
if (offset != null) { | ||
query.setFirstResult(offset); | ||
} | ||
if (length != null) { | ||
query.setMaxResults(length); | ||
} | ||
|
||
return query.getResultList(); | ||
} | ||
|
||
public DatasetVersion findByFriendlyVersionNumber(Long datasetId, String friendlyVersionNumber) { | ||
Long majorVersionNumber = null; | ||
Long minorVersionNumber = null; | ||
|
@@ -1224,4 +1287,89 @@ public List<DatasetVersion> getUnarchivedDatasetVersions(){ | |
return null; | ||
} | ||
} // end getUnarchivedDatasetVersions | ||
|
||
/** | ||
* Returns a FileMetadata list of files in the specified DatasetVersion | ||
* | ||
* @param datasetVersion the DatasetVersion to access | ||
* @param limit for pagination, can be null | ||
* @param offset for pagination, can be null | ||
* @param orderCriteria a FileMetadatasOrderCriteria to order the results | ||
* @return a FileMetadata list of the specified DatasetVersion | ||
*/ | ||
public List<FileMetadata> getFileMetadatas(DatasetVersion datasetVersion, Integer limit, Integer offset, FileMetadatasOrderCriteria orderCriteria) { | ||
TypedQuery<FileMetadata> query = em.createQuery(getQueryStringFromFileMetadatasOrderCriteria(orderCriteria), FileMetadata.class) | ||
.setParameter("datasetVersionId", datasetVersion.getId()); | ||
|
||
if (limit == null && offset == null) { | ||
query = query.setHint("eclipselink.left-join-fetch", "fm.dataFile.ingestRequest") | ||
.setHint("eclipselink.left-join-fetch", "fm.dataFile.thumbnailForDataset") | ||
.setHint("eclipselink.left-join-fetch", "fm.dataFile.dataTables") | ||
.setHint("eclipselink.left-join-fetch", "fm.fileCategories") | ||
.setHint("eclipselink.left-join-fetch", "fm.dataFile.embargo") | ||
.setHint("eclipselink.left-join-fetch", "fm.datasetVersion") | ||
.setHint("eclipselink.left-join-fetch", "fm.dataFile.releaseUser") | ||
.setHint("eclipselink.left-join-fetch", "fm.dataFile.dataFileTags") | ||
.setHint("eclipselink.left-join-fetch", "fm.dataFile.creator"); | ||
} else { | ||
// @todo: is there really no way to use offset-limit with left join hints? | ||
if (limit != null) { | ||
query = query.setMaxResults(limit); | ||
} | ||
if (offset != null) { | ||
query = query.setFirstResult(offset); | ||
} | ||
} | ||
return query.getResultList(); | ||
} | ||
|
||
public List<FileMetadata> getFileMetadatasByDbId(Long versionId, Integer limit, Integer offset, FileMetadatasOrderCriteria orderCriteria) { | ||
TypedQuery<FileMetadata> query = em.createQuery(getQueryStringFromFileMetadatasOrderCriteria(orderCriteria), FileMetadata.class) | ||
.setParameter("datasetVersionId", versionId); | ||
|
||
if (limit == null && offset == null) { | ||
query = query.setHint("eclipselink.left-join-fetch", "fm.dataFile.ingestRequest") | ||
.setHint("eclipselink.left-join-fetch", "fm.dataFile.thumbnailForDataset") | ||
.setHint("eclipselink.left-join-fetch", "fm.dataFile.dataTables") | ||
.setHint("eclipselink.left-join-fetch", "fm.fileCategories") | ||
.setHint("eclipselink.left-join-fetch", "fm.dataFile.embargo") | ||
.setHint("eclipselink.left-join-fetch", "fm.datasetVersion") | ||
.setHint("eclipselink.left-join-fetch", "fm.dataFile.releaseUser") | ||
.setHint("eclipselink.left-join-fetch", "fm.dataFile.creator"); | ||
} else { | ||
// @todo: is there really no way to use offset-limit with left join hints? | ||
if (limit != null) { | ||
query = query.setMaxResults(limit); | ||
} | ||
if (offset != null) { | ||
query = query.setFirstResult(offset); | ||
} | ||
} | ||
return query.getResultList(); | ||
} | ||
|
||
private String getQueryStringFromFileMetadatasOrderCriteria(FileMetadatasOrderCriteria orderCriteria) { | ||
String queryString; | ||
switch (orderCriteria) { | ||
case NameZA: | ||
queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_LABEL + " DESC"; | ||
break; | ||
case Newest: | ||
queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_DATE + " DESC"; | ||
break; | ||
case Oldest: | ||
queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_DATE; | ||
break; | ||
case Size: | ||
queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_SIZE; | ||
break; | ||
case Type: | ||
queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_TYPE; | ||
break; | ||
default: | ||
queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_LABEL; | ||
break; | ||
} | ||
return queryString; | ||
} | ||
} // end class |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, good to remove commented out code.