Skip to content

Commit

Permalink
Additional changes needed for the optimized "embargo publication date…
Browse files Browse the repository at this point in the history
…" aggregate. #9763
  • Loading branch information
landreev committed Sep 6, 2023
1 parent e08f26a commit 7b1e799
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 2 deletions.
4 changes: 4 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/Dataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -692,15 +692,19 @@ public Timestamp getCitationDate() {
Timestamp citationDate = null;
//Only calculate if this dataset doesn't use an alternate date field for publication date
if (citationDateDatasetFieldType == null) {
// @todo: remove this commented-out code once/if the PR passes review - L.A.
//List<DatasetVersion> versions = this.versions;
// TODo - is this ever not version 1.0 (or draft if not published yet)
//DatasetVersion oldest = versions.get(versions.size() - 1);
// - I believe the answer is yes, the oldest versions will always be
// either 1.0 or draft - L.A.
citationDate = super.getPublicationDate();
if (embargoCitationDate != null) {
if (citationDate.compareTo(embargoCitationDate) < 0) {
return embargoCitationDate;
}
}
// @todo: remove this commented-out code once/if the PR passes review - L.A.
/*if (oldest.isPublished()) {
List<FileMetadata> fms = oldest.getFileMetadatas();
for (FileMetadata fm : fms) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import edu.harvard.iq.dataverse.DatasetVersionUser;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DvObject;
import edu.harvard.iq.dataverse.Embargo;
import edu.harvard.iq.dataverse.UserNotification;
import edu.harvard.iq.dataverse.authorization.Permission;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
Expand Down Expand Up @@ -117,9 +118,37 @@ public Dataset execute(CommandContext ctxt) throws CommandException {
// is this the first publication of the dataset?
if (theDataset.getPublicationDate() == null) {
theDataset.setReleaseUser((AuthenticatedUser) getUser());
}
if ( theDataset.getPublicationDate() == null ) {

theDataset.setPublicationDate(new Timestamp(new Date().getTime()));

// if there are any embargoed files in this version, we will save
// the latest availability date as the "embargoCitationDate" for future
// reference (if the files are not available yet, as of publishing of
// the dataset, this date will be used as the "ciatation date" of the dataset,
// instead of the publicatonDate, in compliance with the DataCite
// best practices).
// the code below replicates the logic that used to be in the method
// Dataset.getCitationDate() that calculated this adjusted date in real time.

Timestamp latestEmbargoDate = null;
for (DataFile dataFile : theDataset.getFiles()) {
// this is the first version of the dataset that is being published.
// therefore we can iterate through .getFiles() instead of obtaining
// the DataFiles by going through the FileMetadatas in the current version.
Embargo embargo = dataFile.getEmbargo();
if (embargo != null) {
// "dataAvailable" is not nullable in the Embargo class, no need for a null check
Timestamp embargoDate = Timestamp.valueOf(embargo.getDateAvailable().atStartOfDay());
if (latestEmbargoDate == null || latestEmbargoDate.compareTo(embargoDate) < 0) {
latestEmbargoDate = embargoDate;
}
}
}
// the above loop could be easily replaced with a database query;
// but we iterate through .getFiles() elsewhere in the command, when
// updating and/or registering the files, so it should not result in
// an extra performance hit.
theDataset.setEmbargoCitationDate(latestEmbargoDate);
}

//Clear any external status
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
-- An aggregated timestamp which is the latest of the availability dates of any embargoed files in the first published version, if present
ALTER TABLE dataset ADD COLUMN IF NOT EXISTS embargoCitationDate timestamp without time zone;
-- ... and an update query that will populate this column for all the published datasets with embargoed files in the first released version:
UPDATE dataset SET embargocitationdate=o.embargocitationdate
FROM (SELECT d.id, MAX(e.dateavailable) AS embargocitationdate
FROM embargo e, dataset d, datafile f, datasetversion v, filemetadata m
WHERE v.dataset_id = d.id
AND v.versionstate = 'RELEASED'
AND v.versionnumber = 1
AND v.minorversionnumber = 0
AND f.embargo_id = e.id
AND m.datasetversion_id = v.id
AND m.datafile_id = f.id GROUP BY d.id) o WHERE o.id = dataset.id;
-- (the query follows the logic that used to be in the method Dataset.getCitationDate() that calculated this adjusted date in real time).

0 comments on commit 7b1e799

Please sign in to comment.