IQSS · kcondon · Oct 18, 2023 · Aug 18, 2023 · Aug 21, 2023 · Aug 22, 2023
diff --git a/doc/release-notes/9763-versions-api-improvements.md b/doc/release-notes/9763-versions-api-improvements.md
@@ -0,0 +1,8 @@
+# Improvements in the /versions API
+
+- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions;
+- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output;
+- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. 
+
+This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide.
+
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
@@ -883,6 +883,10 @@ It returns a list of versions with their metadata, and file list:
     ]
   }
 
+The optional ``includeFiles`` parameter specifies whether the files should be listed in the output. It defaults to ``true``, preserving backward compatibility. (Note that for a dataset with a large number of versions and/or files having the files included can dramatically increase the volume of the output). A separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. 
+
+The optional ``offset`` and ``limit`` parameters can be used to specify the range of the versions list to be shown. This can be used to paginate through the list in a dataset with a large number of versions. 
+
 
 Get Version of a Dataset
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -895,13 +899,16 @@ Get Version of a Dataset
   export ID=24
   export VERSION=1.0
 
-  curl "$SERVER_URL/api/datasets/$ID/versions/$VERSION"
+  curl "$SERVER_URL/api/datasets/$ID/versions/$VERSION?includeFiles=false"
 
 The fully expanded example above (without environment variables) looks like this:
 
 .. code-block:: bash
 
-  curl "https://demo.dataverse.org/api/datasets/24/versions/1.0"
+  curl "https://demo.dataverse.org/api/datasets/24/versions/1.0?includeFiles=false"
+
+The optional ``includeFiles`` parameter specifies whether the files should be listed in the output (defaults to ``true``). Note that a separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. 
+
 
 .. _export-dataset-metadata-api:
 

diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java
@@ -158,6 +158,23 @@ public void setCitationDateDatasetFieldType(DatasetFieldType citationDateDataset
         this.citationDateDatasetFieldType = citationDateDatasetFieldType;
     }    
 
+    // Per DataCite best practices, the citation date of a dataset may need 
+    // to be adjusted to reflect the latest embargo availability date of any 
+    // file within the first published version. 
+    // If any files are embargoed in the first version, this date will be
+    // calculated and cached here upon its publication, in the 
+    // FinalizeDatasetPublicationCommand. 
+    private Timestamp embargoCitationDate;
+
+    public Timestamp getEmbargoCitationDate() {
+        return embargoCitationDate;
+    }
+
+    public void setEmbargoCitationDate(Timestamp embargoCitationDate) {
+        this.embargoCitationDate = embargoCitationDate;
+    }
+
+
 
     @ManyToOne
     @JoinColumn(name="template_id",nullable = true)
@@ -676,11 +693,20 @@ public Timestamp getCitationDate() {
         Timestamp citationDate = null;
         //Only calculate if this dataset doesn't use an alternate date field for publication date
         if (citationDateDatasetFieldType == null) {
-            List<DatasetVersion> versions = this.versions;
+            // @todo: remove this commented-out code once/if the PR passes review - L.A. 
+            //List<DatasetVersion> versions = this.versions;
             // TODo - is this ever not version 1.0 (or draft if not published yet)
-            DatasetVersion oldest = versions.get(versions.size() - 1);
+            //DatasetVersion oldest = versions.get(versions.size() - 1);
+            // - I believe the answer is yes, the oldest versions will always be 
+            // either 1.0 or draft - L.A. 
             citationDate = super.getPublicationDate();
-            if (oldest.isPublished()) {
+            if (embargoCitationDate != null) {
+                if (citationDate.compareTo(embargoCitationDate) < 0) {
+                    return embargoCitationDate;
+                }
+            }
+            // @todo: remove this commented-out code once/if the PR passes review - L.A. 
+            /*if (oldest.isPublished()) {
                 List<FileMetadata> fms = oldest.getFileMetadatas();
                 for (FileMetadata fm : fms) {
                     Embargo embargo = fm.getDataFile().getEmbargo();
@@ -691,7 +717,7 @@ public Timestamp getCitationDate() {
                         }
                     }
                 }
-            }
+            }*/
         }
         return citationDate;
     }

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
@@ -137,7 +137,7 @@ public Dataset findDeep(Object pk) {
             .setHint("eclipselink.left-join-fetch", "o.files.roleAssignments")
             .getSingleResult();
     }
-
+    
     public List<Dataset> findByOwnerId(Long ownerId) {
         return findByOwnerId(ownerId, false);
     }

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java
@@ -68,7 +68,13 @@
                query = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.dataset.harvestedFrom IS NULL and o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL"
     ), 
     @NamedQuery(name = "DatasetVersion.findById", 
-                query = "SELECT o FROM DatasetVersion o LEFT JOIN FETCH o.fileMetadatas WHERE o.id=:id")})
+                query = "SELECT o FROM DatasetVersion o LEFT JOIN FETCH o.fileMetadatas WHERE o.id=:id"), 
+    @NamedQuery(name = "DatasetVersion.findByDataset",
+                query = "SELECT o FROM DatasetVersion o WHERE o.dataset.id=:datasetId ORDER BY o.versionNumber DESC, o.minorVersionNumber DESC"), 
+    @NamedQuery(name = "DatasetVersion.findReleasedByDataset",
+                query = "SELECT o FROM DatasetVersion o WHERE o.dataset.id=:datasetId AND o.versionState=edu.harvard.iq.dataverse.DatasetVersion.VersionState.RELEASED ORDER BY o.versionNumber DESC, o.minorVersionNumber DESC")/*,
+    @NamedQuery(name = "DatasetVersion.findVersionElements",
+                query = "SELECT o.id, o.versionState, o.versionNumber, o.minorVersionNumber FROM DatasetVersion o WHERE o.dataset.id=:datasetId ORDER BY o.versionNumber DESC, o.minorVersionNumber DESC")*/})
 
 
 @Entity

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java
@@ -48,7 +48,23 @@ public class DatasetVersionServiceBean implements java.io.Serializable {
     private static final Logger logger = Logger.getLogger(DatasetVersionServiceBean.class.getCanonicalName());
 
     private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss");
-
+
+    private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_LABEL = "SELECT fm FROM FileMetadata fm"
+            + " WHERE fm.datasetVersion.id=:datasetVersionId"
+            + " ORDER BY fm.label";
+    private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_DATE = "SELECT fm FROM FileMetadata fm, DvObject dvo"
+            + " WHERE fm.datasetVersion.id = :datasetVersionId"
+            + " AND fm.dataFile.id = dvo.id"
+            + " ORDER BY CASE WHEN dvo.publicationDate IS NOT NULL THEN dvo.publicationDate ELSE dvo.createDate END";
+    private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_SIZE = "SELECT fm FROM FileMetadata fm, DataFile df"
+            + " WHERE fm.datasetVersion.id = :datasetVersionId"
+            + " AND fm.dataFile.id = df.id"
+            + " ORDER BY df.filesize";
+    private static final String QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_TYPE = "SELECT fm FROM FileMetadata fm, DataFile df"
+            + " WHERE fm.datasetVersion.id = :datasetVersionId"
+            + " AND fm.dataFile.id = df.id"
+            + " ORDER BY df.contentType";
+
     @EJB
     DatasetServiceBean datasetService;
 
@@ -149,7 +165,19 @@ public DatasetVersion getDatasetVersion(){
             return this.datasetVersionForResponse;
         }                
     } // end RetrieveDatasetVersionResponse
-
+
+    /**
+     *  Different criteria to sort the results of FileMetadata queries used in {@link DatasetVersionServiceBean#getFileMetadatas}
+     */
+    public enum FileMetadatasOrderCriteria {
+        NameAZ,
+        NameZA,
+        Newest,
+        Oldest,
+        Size,
+        Type
+    }
+
     public DatasetVersion find(Object pk) {
         return em.find(DatasetVersion.class, pk);
     }
@@ -166,9 +194,44 @@ public DatasetVersion findDeep(Object pk) {
             .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.datasetVersion")
             .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.releaseUser")
             .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.creator")
+            .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.dataFileTags")
             .getSingleResult();
     }
-
+
+    /**
+     * Performs the same database lookup as the one behind Dataset.getVersions().
+     * Additionally, provides the arguments for selecting a partial list of 
+     * (length-offset) versions for pagination, plus the ability to pre-select 
+     * only the publicly-viewable versions. 
+     * It is recommended that individual software components utilize the 
+     * ListVersionsCommand, instead of calling this service method directly.
+     * @param datasetId
+     * @param offset for pagination through long lists of versions
+     * @param length for pagination through long lists of versions
+     * @param includeUnpublished retrieves all the versions, including drafts and deaccessioned. 
+     * @return (partial) list of versions
+     */
+    public List<DatasetVersion> findVersions(Long datasetId, Integer offset, Integer length, boolean includeUnpublished) {
+        TypedQuery<DatasetVersion> query;  
+        if (includeUnpublished) {
+            query = em.createNamedQuery("DatasetVersion.findByDataset", DatasetVersion.class);
+        } else {
+            query = em.createNamedQuery("DatasetVersion.findReleasedByDataset", DatasetVersion.class)
+                    .setParameter("datasetId", datasetId);
+        }
+
+        query.setParameter("datasetId", datasetId);
+
+        if (offset != null) {
+            query.setFirstResult(offset);
+        }
+        if (length != null) {
+            query.setMaxResults(length);
+        }
+
+        return query.getResultList();
+    }
+
     public DatasetVersion findByFriendlyVersionNumber(Long datasetId, String friendlyVersionNumber) {
         Long majorVersionNumber = null;
         Long minorVersionNumber = null;
@@ -1224,4 +1287,89 @@ public List<DatasetVersion> getUnarchivedDatasetVersions(){
             return null;
         }
     } // end getUnarchivedDatasetVersions
+
+    /**
+     * Returns a FileMetadata list of files in the specified DatasetVersion
+     *
+     * @param datasetVersion the DatasetVersion to access
+     * @param limit for pagination, can be null
+     * @param offset for pagination, can be null
+     * @param orderCriteria a FileMetadatasOrderCriteria to order the results
+     * @return a FileMetadata list of the specified DatasetVersion
+     */
+    public List<FileMetadata> getFileMetadatas(DatasetVersion datasetVersion, Integer limit, Integer offset, FileMetadatasOrderCriteria orderCriteria) {
+        TypedQuery<FileMetadata> query = em.createQuery(getQueryStringFromFileMetadatasOrderCriteria(orderCriteria), FileMetadata.class)
+                .setParameter("datasetVersionId", datasetVersion.getId());
+
+        if (limit == null && offset == null) {
+            query = query.setHint("eclipselink.left-join-fetch", "fm.dataFile.ingestRequest")
+                    .setHint("eclipselink.left-join-fetch", "fm.dataFile.thumbnailForDataset")
+                    .setHint("eclipselink.left-join-fetch", "fm.dataFile.dataTables")
+                    .setHint("eclipselink.left-join-fetch", "fm.fileCategories")
+                    .setHint("eclipselink.left-join-fetch", "fm.dataFile.embargo")
+                    .setHint("eclipselink.left-join-fetch", "fm.datasetVersion")
+                    .setHint("eclipselink.left-join-fetch", "fm.dataFile.releaseUser")
+                    .setHint("eclipselink.left-join-fetch", "fm.dataFile.dataFileTags")
+                    .setHint("eclipselink.left-join-fetch", "fm.dataFile.creator");
+        } else {
+            // @todo: is there really no way to use offset-limit with left join hints?
+            if (limit != null) {
+                query = query.setMaxResults(limit);
+            }
+            if (offset != null) {
+                query = query.setFirstResult(offset);
+            }
+        }
+        return query.getResultList();
+    }
+
+    public List<FileMetadata> getFileMetadatasByDbId(Long versionId, Integer limit, Integer offset, FileMetadatasOrderCriteria orderCriteria) {
+        TypedQuery<FileMetadata> query = em.createQuery(getQueryStringFromFileMetadatasOrderCriteria(orderCriteria), FileMetadata.class)
+                .setParameter("datasetVersionId", versionId);
+
+        if (limit == null && offset == null) {
+            query = query.setHint("eclipselink.left-join-fetch", "fm.dataFile.ingestRequest")
+                    .setHint("eclipselink.left-join-fetch", "fm.dataFile.thumbnailForDataset")
+                    .setHint("eclipselink.left-join-fetch", "fm.dataFile.dataTables")
+                    .setHint("eclipselink.left-join-fetch", "fm.fileCategories")
+                    .setHint("eclipselink.left-join-fetch", "fm.dataFile.embargo")
+                    .setHint("eclipselink.left-join-fetch", "fm.datasetVersion")
+                    .setHint("eclipselink.left-join-fetch", "fm.dataFile.releaseUser")
+                    .setHint("eclipselink.left-join-fetch", "fm.dataFile.creator");
+        } else {
+            // @todo: is there really no way to use offset-limit with left join hints?
+            if (limit != null) {
+                query = query.setMaxResults(limit);
+            }
+            if (offset != null) {
+                query = query.setFirstResult(offset);
+            }
+        }
+        return query.getResultList();
+    }
+
+    private String getQueryStringFromFileMetadatasOrderCriteria(FileMetadatasOrderCriteria orderCriteria) {
+        String queryString;
+        switch (orderCriteria) {
+            case NameZA:
+                queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_LABEL + " DESC";
+                break;
+            case Newest:
+                queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_DATE + " DESC";
+                break;
+            case Oldest:
+                queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_DATE;
+                break;
+            case Size:
+                queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_SIZE;
+                break;
+            case Type:
+                queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_TYPE;
+                break;
+            default:
+                queryString = QUERY_STR_FIND_ALL_FILE_METADATAS_ORDER_BY_LABEL;
+                break;
+        }
+        return queryString;
+    }
 } // end class