Skip to content

Commit

Permalink
Changed: using query-based implementation for files download size
Browse files Browse the repository at this point in the history
  • Loading branch information
GPortas committed Oct 2, 2023
1 parent 4aa34ff commit 907fd40
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 14 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package edu.harvard.iq.dataverse;

import edu.harvard.iq.dataverse.QDataFileCategory;
import edu.harvard.iq.dataverse.QDataTable;
import edu.harvard.iq.dataverse.QDvObject;
import edu.harvard.iq.dataverse.QEmbargo;
import edu.harvard.iq.dataverse.QFileMetadata;
Expand Down Expand Up @@ -36,6 +37,7 @@ public class DatasetVersionFilesServiceBean implements Serializable {
private final QFileMetadata fileMetadata = QFileMetadata.fileMetadata;
private final QDvObject dvObject = QDvObject.dvObject;
private final QDataFileCategory dataFileCategory = QDataFileCategory.dataFileCategory;
private final QDataTable dataTable = QDataTable.dataTable;

/**
* Different criteria to sort the results of FileMetadata queries used in {@link DatasetVersionFilesServiceBean#getFileMetadatas}
Expand All @@ -51,6 +53,19 @@ public enum DataFileAccessStatus {
Public, Restricted, EmbargoedThenRestricted, EmbargoedThenPublic
}

/**
* Mode to base the search in {@link DatasetVersionFilesServiceBean#getFilesDownloadSize(DatasetVersion, FileDownloadSizeMode)}
* <p>
* All: Includes both archival and original sizes for tabular files
* Archival: Includes only the archival size for tabular files
* Original: Includes only the original size for tabular files
* <p>
* All the modes include archival sizes for non-tabular files
*/
public enum FileDownloadSizeMode {
All, Original, Archival
}

/**
* Given a DatasetVersion, returns its total file metadata count
*
Expand Down Expand Up @@ -159,6 +174,23 @@ public List<FileMetadata> getFileMetadatas(DatasetVersion datasetVersion, Intege
return baseQuery.fetch();
}

/**
* Returns the total download size of all files for a particular DatasetVersion
*
* @param datasetVersion the DatasetVersion to access
* @param mode a FileDownloadSizeMode to base the search on
* @return long value of total file download size
*/
public long getFilesDownloadSize(DatasetVersion datasetVersion, FileDownloadSizeMode mode) {
return switch (mode) {
case All ->
Long.sum(getOriginalTabularFilesSize(datasetVersion), getArchivalFilesSize(datasetVersion, false));
case Original ->
Long.sum(getOriginalTabularFilesSize(datasetVersion), getArchivalFilesSize(datasetVersion, true));
case Archival -> getArchivalFilesSize(datasetVersion, false);
};
}

private void addAccessStatusCountToTotal(DatasetVersion datasetVersion, Map<DataFileAccessStatus, Long> totalCounts, DataFileAccessStatus dataFileAccessStatus) {
long fileMetadataCount = getFileMetadataCountByAccessStatus(datasetVersion, dataFileAccessStatus);
if (fileMetadataCount > 0) {
Expand Down Expand Up @@ -230,4 +262,29 @@ private void applyOrderCriteriaToGetFileMetadatasQuery(JPAQuery<FileMetadata> qu
break;
}
}

private long getOriginalTabularFilesSize(DatasetVersion datasetVersion) {
JPAQueryFactory queryFactory = new JPAQueryFactory(em);
Long result = queryFactory
.from(fileMetadata)
.where(fileMetadata.datasetVersion.id.eq(datasetVersion.getId()))
.from(dataTable)
.where(fileMetadata.dataFile.dataTables.isNotEmpty().and(dataTable.dataFile.eq(fileMetadata.dataFile)))
.select(dataTable.originalFileSize.sum()).fetchFirst();
return (result == null) ? 0 : result;
}

private long getArchivalFilesSize(DatasetVersion datasetVersion, boolean ignoreTabular) {
JPAQueryFactory queryFactory = new JPAQueryFactory(em);
JPAQuery<?> baseQuery = queryFactory
.from(fileMetadata)
.where(fileMetadata.datasetVersion.id.eq(datasetVersion.getId()));
Long result;
if (ignoreTabular) {
result = baseQuery.where(fileMetadata.dataFile.dataTables.isEmpty()).select(fileMetadata.dataFile.filesize.sum()).fetchFirst();
} else {
result = baseQuery.select(fileMetadata.dataFile.filesize.sum()).fetchFirst();
}
return (result == null) ? 0 : result;
}
}
12 changes: 9 additions & 3 deletions src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
Original file line number Diff line number Diff line change
Expand Up @@ -2962,13 +2962,19 @@ public Response getStorageSize(@Context ContainerRequestContext crc, @PathParam(
public Response getDownloadSize(@Context ContainerRequestContext crc,
@PathParam("identifier") String dvIdtf,
@PathParam("versionId") String version,
@QueryParam("ignoreOriginalTabularSize") boolean ignoreOriginalTabularSize,
@QueryParam("mode") String mode,
@Context UriInfo uriInfo,
@Context HttpHeaders headers) {

return response(req -> {
DatasetVersionFilesServiceBean.FileDownloadSizeMode fileDownloadSizeMode;
try {
fileDownloadSizeMode = mode != null ? DatasetVersionFilesServiceBean.FileDownloadSizeMode.valueOf(mode) : DatasetVersionFilesServiceBean.FileDownloadSizeMode.All;
} catch (IllegalArgumentException e) {
return error(Response.Status.BAD_REQUEST, "Invalid mode: " + mode);
}
DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers);
Long datasetStorageSize = ignoreOriginalTabularSize ? DatasetUtil.getDownloadSizeNumeric(datasetVersion, false)
: execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), false, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, datasetVersion));
long datasetStorageSize = datasetVersionFilesServiceBean.getFilesDownloadSize(datasetVersion, fileDownloadSizeMode);
String message = MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.download"), datasetStorageSize);
JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder();
jsonObjectBuilder.add("message", message);
Expand Down
30 changes: 21 additions & 9 deletions src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -3632,7 +3632,7 @@ public void deaccessionDataset() {
}

@Test
public void getDownloadSize() throws IOException {
public void getDownloadSize() throws IOException, InterruptedException {
Response createUser = UtilIT.createRandomUser();
createUser.then().assertThat().statusCode(OK.getStatusCode());
String apiToken = UtilIT.getApiTokenFromResponse(createUser);
Expand All @@ -3658,7 +3658,8 @@ public void getDownloadSize() throws IOException {

int expectedTextFilesStorageSize = testFileSize1 + testFileSize2;

Response getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, false, apiToken);
// Get the total size when there are no tabular files
Response getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken);
getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode())
.body("data.storageSize", equalTo(expectedTextFilesStorageSize));

Expand All @@ -3670,20 +3671,31 @@ public void getDownloadSize() throws IOException {
// Get the original tabular file size
int tabularOriginalSize = Integer.parseInt(uploadTabularFileResponse.getBody().jsonPath().getString("data.files[0].dataFile.filesize"));

// Get the size ignoring the original tabular file sizes
getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, true, apiToken);
// Ensure tabular file is ingested
Thread.sleep(2000);

// Get the total size ignoring the original tabular file sizes
getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Archival.toString(), apiToken);
getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode());

int actualSizeIgnoringOriginalTabularSizes = Integer.parseInt(getDownloadSizeResponse.getBody().jsonPath().getString("data.storageSize"));

// Assert that the size has been incremented with the last uploaded file
assertTrue(actualSizeIgnoringOriginalTabularSizes > expectedTextFilesStorageSize);

// Get the size including the original tabular file sizes
int tabularProcessedSize = actualSizeIgnoringOriginalTabularSizes - expectedTextFilesStorageSize;
int expectedSizeIncludingOriginalTabularSizes = tabularOriginalSize + tabularProcessedSize + expectedTextFilesStorageSize;
// Get the total size including only original sizes and ignoring archival sizes for tabular files
int expectedSizeIncludingOnlyOriginalForTabular = tabularOriginalSize + expectedTextFilesStorageSize;

getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), apiToken);
getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode())
.body("data.storageSize", equalTo(expectedSizeIncludingOnlyOriginalForTabular));

// Get the total size including both the original and archival tabular file sizes
int tabularArchivalSize = actualSizeIgnoringOriginalTabularSizes - expectedTextFilesStorageSize;
int expectedSizeIncludingAllSizes = tabularArchivalSize + tabularOriginalSize + expectedTextFilesStorageSize;

getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, false, apiToken);
getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken);
getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode())
.body("data.storageSize", equalTo(expectedSizeIncludingOriginalTabularSizes));
.body("data.storageSize", equalTo(expectedSizeIncludingAllSizes));
}
}
4 changes: 2 additions & 2 deletions src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -3410,10 +3410,10 @@ static Response deaccessionDataset(Integer datasetId, String version, String api
.put("/api/datasets/" + datasetId + "/versions/" + version + "/deaccession");
}

static Response getDownloadSize(Integer datasetId, String version, boolean ignoreOriginalTabularSize, String apiToken) {
static Response getDownloadSize(Integer datasetId, String version, String mode, String apiToken) {
return given()
.header(API_TOKEN_HTTP_HEADER, apiToken)
.queryParam("ignoreOriginalTabularSize", ignoreOriginalTabularSize)
.queryParam("mode", mode)
.get("/api/datasets/" + datasetId + "/versions/" + version + "/downloadsize");
}
}

0 comments on commit 907fd40

Please sign in to comment.