From cea36d6aadcc3a21d70c3029b498a279256d6c07 Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 16 Oct 2023 12:19:15 +0100 Subject: [PATCH 1/5] Added: file search criteria to getDownloadSize datasets API endpoint --- .../DatasetVersionFilesServiceBean.java | 23 +++++++++++-------- .../harvard/iq/dataverse/api/Datasets.java | 19 ++++++++++++++- .../harvard/iq/dataverse/api/DatasetsIT.java | 21 +++++++++++------ .../edu/harvard/iq/dataverse/api/UtilIT.java | 22 +++++++++++++++--- 4 files changed, 64 insertions(+), 21 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java index 2c14498caa9..6ea9262bbc4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java @@ -53,7 +53,7 @@ public enum FileOrderCriteria { } /** - * Mode to base the search in {@link DatasetVersionFilesServiceBean#getFilesDownloadSize(DatasetVersion, FileDownloadSizeMode)} + * Mode to base the search in {@link DatasetVersionFilesServiceBean#getFilesDownloadSize(DatasetVersion, FileSearchCriteria, FileDownloadSizeMode)} *

* All: Includes both archival and original sizes for tabular files * Archival: Includes only the archival size for tabular files @@ -191,16 +191,17 @@ public List getFileMetadatas(DatasetVersion datasetVersion, Intege * Returns the total download size of all files for a particular DatasetVersion * * @param datasetVersion the DatasetVersion to access + * @param searchCriteria for retrieving only files matching this criteria * @param mode a FileDownloadSizeMode to base the search on * @return long value of total file download size */ - public long getFilesDownloadSize(DatasetVersion datasetVersion, FileDownloadSizeMode mode) { + public long getFilesDownloadSize(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria, FileDownloadSizeMode mode) { return switch (mode) { case All -> - Long.sum(getOriginalTabularFilesSize(datasetVersion), getArchivalFilesSize(datasetVersion, false)); + Long.sum(getOriginalTabularFilesSize(datasetVersion, searchCriteria), getArchivalFilesSize(datasetVersion, false, searchCriteria)); case Original -> - Long.sum(getOriginalTabularFilesSize(datasetVersion), getArchivalFilesSize(datasetVersion, true)); - case Archival -> getArchivalFilesSize(datasetVersion, false); + Long.sum(getOriginalTabularFilesSize(datasetVersion, searchCriteria), getArchivalFilesSize(datasetVersion, true, searchCriteria)); + case Archival -> getArchivalFilesSize(datasetVersion, false, searchCriteria); }; } @@ -301,22 +302,24 @@ private void applyOrderCriteriaToGetFileMetadatasQuery(JPAQuery qu } } - private long getOriginalTabularFilesSize(DatasetVersion datasetVersion) { + private long getOriginalTabularFilesSize(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) { JPAQueryFactory queryFactory = new JPAQueryFactory(em); - Long result = queryFactory + JPAQuery baseQuery = queryFactory .from(fileMetadata) .where(fileMetadata.datasetVersion.id.eq(datasetVersion.getId())) .from(dataTable) - .where(dataTable.dataFile.eq(fileMetadata.dataFile)) - .select(dataTable.originalFileSize.sum()).fetchFirst(); + .where(dataTable.dataFile.eq(fileMetadata.dataFile)); + applyFileSearchCriteriaToQuery(baseQuery, searchCriteria); + Long result = baseQuery.select(dataTable.originalFileSize.sum()).fetchFirst(); return (result == null) ? 0 : result; } - private long getArchivalFilesSize(DatasetVersion datasetVersion, boolean ignoreTabular) { + private long getArchivalFilesSize(DatasetVersion datasetVersion, boolean ignoreTabular, FileSearchCriteria searchCriteria) { JPAQueryFactory queryFactory = new JPAQueryFactory(em); JPAQuery baseQuery = queryFactory .from(fileMetadata) .where(fileMetadata.datasetVersion.id.eq(datasetVersion.getId())); + applyFileSearchCriteriaToQuery(baseQuery, searchCriteria); Long result; if (ignoreTabular) { result = baseQuery.where(fileMetadata.dataFile.dataTables.isEmpty()).select(fileMetadata.dataFile.filesize.sum()).fetchFirst(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index f8929c5e8d8..8605b4772f4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2992,11 +2992,28 @@ public Response getStorageSize(@Context ContainerRequestContext crc, @PathParam( public Response getDownloadSize(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, @PathParam("versionId") String version, + @QueryParam("contentType") String contentType, + @QueryParam("accessStatus") String accessStatus, + @QueryParam("categoryName") String categoryName, + @QueryParam("tabularTagName") String tabularTagName, + @QueryParam("searchText") String searchText, @QueryParam("mode") String mode, @Context UriInfo uriInfo, @Context HttpHeaders headers) { return response(req -> { + FileSearchCriteria fileSearchCriteria; + try { + fileSearchCriteria = new FileSearchCriteria( + contentType, + accessStatus != null ? FileSearchCriteria.FileAccessStatus.valueOf(accessStatus) : null, + categoryName, + tabularTagName, + searchText + ); + } catch (IllegalArgumentException e) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus))); + } DatasetVersionFilesServiceBean.FileDownloadSizeMode fileDownloadSizeMode; try { fileDownloadSizeMode = mode != null ? DatasetVersionFilesServiceBean.FileDownloadSizeMode.valueOf(mode) : DatasetVersionFilesServiceBean.FileDownloadSizeMode.All; @@ -3004,7 +3021,7 @@ public Response getDownloadSize(@Context ContainerRequestContext crc, return error(Response.Status.BAD_REQUEST, "Invalid mode: " + mode); } DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers); - long datasetStorageSize = datasetVersionFilesServiceBean.getFilesDownloadSize(datasetVersion, fileDownloadSizeMode); + long datasetStorageSize = datasetVersionFilesServiceBean.getFilesDownloadSize(datasetVersion, fileSearchCriteria, fileDownloadSizeMode); String message = MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.download"), datasetStorageSize); JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); jsonObjectBuilder.add("message", message); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 34eccd3172a..66a67887405 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3863,7 +3863,7 @@ public void getDownloadSize() throws IOException, InterruptedException { int expectedTextFilesStorageSize = testFileSize1 + testFileSize2; // Get the total size when there are no tabular files - Response getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); + Response getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedTextFilesStorageSize)); @@ -3878,7 +3878,7 @@ public void getDownloadSize() throws IOException, InterruptedException { Thread.sleep(2000); // Get the total size ignoring the original tabular file sizes - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Archival.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Archival.toString(), apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()); int actualSizeIgnoringOriginalTabularSizes = Integer.parseInt(getDownloadSizeResponse.getBody().jsonPath().getString("data.storageSize")); @@ -3889,7 +3889,7 @@ public void getDownloadSize() throws IOException, InterruptedException { // Get the total size including only original sizes and ignoring archival sizes for tabular files int expectedSizeIncludingOnlyOriginalForTabular = tabularOriginalSize + expectedTextFilesStorageSize; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedSizeIncludingOnlyOriginalForTabular)); @@ -3897,13 +3897,13 @@ public void getDownloadSize() throws IOException, InterruptedException { int tabularArchivalSize = actualSizeIgnoringOriginalTabularSizes - expectedTextFilesStorageSize; int expectedSizeIncludingAllSizes = tabularArchivalSize + tabularOriginalSize + expectedTextFilesStorageSize; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedSizeIncludingAllSizes)); // Get the total size sending invalid file download size mode String invalidMode = "invalidMode"; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, invalidMode, apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, invalidMode, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()) .body("message", equalTo("Invalid mode: " + invalidMode)); @@ -3917,15 +3917,22 @@ public void getDownloadSize() throws IOException, InterruptedException { // Get the total size including only original sizes and ignoring archival sizes for tabular files expectedSizeIncludingOnlyOriginalForTabular = tabularOriginalSize + expectedSizeIncludingOnlyOriginalForTabular; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedSizeIncludingOnlyOriginalForTabular)); // Get the total size including both the original and archival tabular file sizes expectedSizeIncludingAllSizes = tabularArchivalSize + tabularOriginalSize + expectedSizeIncludingAllSizes; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedSizeIncludingAllSizes)); + + // Get the total size including both the original and archival tabular file sizes with search criteria + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, "text/plain", FileSearchCriteria.FileAccessStatus.Public.toString(), null, null, "test_", DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); + // We exclude tabular sizes from the expected result since the search criteria filters by content type "text/plain" and search text "test_" + int expectedSizeIncludingAllSizesAndApplyingCriteria = testFileSize1 + testFileSize2; + getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.storageSize", equalTo(expectedSizeIncludingAllSizesAndApplyingCriteria)); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 4421e9280b3..38cc44c8c0d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3450,10 +3450,26 @@ static Response deaccessionDataset(Integer datasetId, String version, String dea .post("/api/datasets/" + datasetId + "/versions/" + version + "/deaccession"); } - static Response getDownloadSize(Integer datasetId, String version, String mode, String apiToken) { - return given() + static Response getDownloadSize(Integer datasetId, String version, String contentType, String accessStatus, String categoryName, String tabularTagName, String searchText, String mode, String apiToken) { + RequestSpecification requestSpecification = given() .header(API_TOKEN_HTTP_HEADER, apiToken) - .queryParam("mode", mode) + .queryParam("mode", mode); + if (contentType != null) { + requestSpecification = requestSpecification.queryParam("contentType", contentType); + } + if (accessStatus != null) { + requestSpecification = requestSpecification.queryParam("accessStatus", accessStatus); + } + if (categoryName != null) { + requestSpecification = requestSpecification.queryParam("categoryName", categoryName); + } + if (tabularTagName != null) { + requestSpecification = requestSpecification.queryParam("tabularTagName", tabularTagName); + } + if (searchText != null) { + requestSpecification = requestSpecification.queryParam("searchText", searchText); + } + return requestSpecification .get("/api/datasets/" + datasetId + "/versions/" + version + "/downloadsize"); } } From b6bcbf7cadcf8e7b2f05825836d155f6a589b710 Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 16 Oct 2023 12:47:35 +0100 Subject: [PATCH 2/5] Added: getDownloadSize API endpoint deaccessioned dataset support --- .../harvard/iq/dataverse/api/Datasets.java | 3 +- .../harvard/iq/dataverse/api/DatasetsIT.java | 41 +++++++++++++++---- .../edu/harvard/iq/dataverse/api/UtilIT.java | 26 ++++++++++-- 3 files changed, 58 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 8605b4772f4..852dd18ee84 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2998,6 +2998,7 @@ public Response getDownloadSize(@Context ContainerRequestContext crc, @QueryParam("tabularTagName") String tabularTagName, @QueryParam("searchText") String searchText, @QueryParam("mode") String mode, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, @Context UriInfo uriInfo, @Context HttpHeaders headers) { @@ -3020,7 +3021,7 @@ public Response getDownloadSize(@Context ContainerRequestContext crc, } catch (IllegalArgumentException e) { return error(Response.Status.BAD_REQUEST, "Invalid mode: " + mode); } - DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers); + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers, includeDeaccessioned); long datasetStorageSize = datasetVersionFilesServiceBean.getFilesDownloadSize(datasetVersion, fileSearchCriteria, fileDownloadSizeMode); String message = MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.download"), datasetStorageSize); JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 66a67887405..e12de1e23cc 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3863,7 +3863,7 @@ public void getDownloadSize() throws IOException, InterruptedException { int expectedTextFilesStorageSize = testFileSize1 + testFileSize2; // Get the total size when there are no tabular files - Response getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); + Response getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedTextFilesStorageSize)); @@ -3878,7 +3878,7 @@ public void getDownloadSize() throws IOException, InterruptedException { Thread.sleep(2000); // Get the total size ignoring the original tabular file sizes - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Archival.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Archival.toString(), false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()); int actualSizeIgnoringOriginalTabularSizes = Integer.parseInt(getDownloadSizeResponse.getBody().jsonPath().getString("data.storageSize")); @@ -3889,7 +3889,7 @@ public void getDownloadSize() throws IOException, InterruptedException { // Get the total size including only original sizes and ignoring archival sizes for tabular files int expectedSizeIncludingOnlyOriginalForTabular = tabularOriginalSize + expectedTextFilesStorageSize; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedSizeIncludingOnlyOriginalForTabular)); @@ -3897,13 +3897,13 @@ public void getDownloadSize() throws IOException, InterruptedException { int tabularArchivalSize = actualSizeIgnoringOriginalTabularSizes - expectedTextFilesStorageSize; int expectedSizeIncludingAllSizes = tabularArchivalSize + tabularOriginalSize + expectedTextFilesStorageSize; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedSizeIncludingAllSizes)); // Get the total size sending invalid file download size mode String invalidMode = "invalidMode"; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, invalidMode, apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, invalidMode, false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()) .body("message", equalTo("Invalid mode: " + invalidMode)); @@ -3917,22 +3917,47 @@ public void getDownloadSize() throws IOException, InterruptedException { // Get the total size including only original sizes and ignoring archival sizes for tabular files expectedSizeIncludingOnlyOriginalForTabular = tabularOriginalSize + expectedSizeIncludingOnlyOriginalForTabular; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedSizeIncludingOnlyOriginalForTabular)); // Get the total size including both the original and archival tabular file sizes expectedSizeIncludingAllSizes = tabularArchivalSize + tabularOriginalSize + expectedSizeIncludingAllSizes; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedSizeIncludingAllSizes)); // Get the total size including both the original and archival tabular file sizes with search criteria - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, "text/plain", FileSearchCriteria.FileAccessStatus.Public.toString(), null, null, "test_", DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, "text/plain", FileSearchCriteria.FileAccessStatus.Public.toString(), null, null, "test_", DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken); // We exclude tabular sizes from the expected result since the search criteria filters by content type "text/plain" and search text "test_" int expectedSizeIncludingAllSizesAndApplyingCriteria = testFileSize1 + testFileSize2; getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedSizeIncludingAllSizesAndApplyingCriteria)); + + // Test Deaccessioned + Response publishDataverseResponse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + publishDataverseResponse.then().assertThat().statusCode(OK.getStatusCode()); + Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken); + deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // includeDeaccessioned false + Response getVersionFileCountsResponseNoDeaccessioned = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST_PUBLISHED, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken); + getVersionFileCountsResponseNoDeaccessioned.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + // includeDeaccessioned true + Response getVersionFileCountsResponseDeaccessioned = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST_PUBLISHED, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), true, apiToken); + getVersionFileCountsResponseDeaccessioned.then().assertThat().statusCode(OK.getStatusCode()); + + // Test that the dataset file counts for a deaccessioned dataset cannot be accessed by a guest + // By latest published version + Response getVersionFileCountsGuestUserResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST_PUBLISHED, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), true, null); + getVersionFileCountsGuestUserResponse.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + // By specific version 1.0 + getVersionFileCountsGuestUserResponse = UtilIT.getDownloadSize(datasetId, "1.0", null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), true, null); + getVersionFileCountsGuestUserResponse.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 38cc44c8c0d..15350782fa1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3373,7 +3373,15 @@ static Response createFileEmbargo(Integer datasetId, Integer fileId, String date .post("/api/datasets/" + datasetId + "/files/actions/:set-embargo"); } - static Response getVersionFileCounts(Integer datasetId, String version, String contentType, String accessStatus, String categoryName, String tabularTagName, String searchText, boolean includeDeaccessioned, String apiToken) { + static Response getVersionFileCounts(Integer datasetId, + String version, + String contentType, + String accessStatus, + String categoryName, + String tabularTagName, + String searchText, + boolean includeDeaccessioned, + String apiToken) { RequestSpecification requestSpecification = given() .queryParam("includeDeaccessioned", includeDeaccessioned); if (apiToken != null) { @@ -3450,10 +3458,22 @@ static Response deaccessionDataset(Integer datasetId, String version, String dea .post("/api/datasets/" + datasetId + "/versions/" + version + "/deaccession"); } - static Response getDownloadSize(Integer datasetId, String version, String contentType, String accessStatus, String categoryName, String tabularTagName, String searchText, String mode, String apiToken) { + static Response getDownloadSize(Integer datasetId, + String version, + String contentType, + String accessStatus, + String categoryName, + String tabularTagName, + String searchText, + String mode, + boolean includeDeaccessioned, + String apiToken) { RequestSpecification requestSpecification = given() - .header(API_TOKEN_HTTP_HEADER, apiToken) + .queryParam("includeDeaccessioned", includeDeaccessioned) .queryParam("mode", mode); + if (apiToken != null) { + requestSpecification.header(API_TOKEN_HTTP_HEADER, apiToken); + } if (contentType != null) { requestSpecification = requestSpecification.queryParam("contentType", contentType); } From 794c5b64e6daa52e1d84fb2cb58468b5104d3161 Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 16 Oct 2023 13:03:08 +0100 Subject: [PATCH 3/5] Added: extended docs for getDownloadSize API endpoint --- doc/sphinx-guides/source/api/native-api.rst | 61 ++++++++++++++++++++- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 2e1a878dce8..98e2722ac5e 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -964,7 +964,7 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files" -This endpoint supports optional pagination, through the ``limit`` and ``offset`` query params: +This endpoint supports optional pagination, through the ``limit`` and ``offset`` query parameters: .. code-block:: bash @@ -1044,7 +1044,7 @@ Usage example: curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files?includeDeaccessioned=true" -.. note:: Keep in mind that you can combine all of the above query params depending on the results you are looking for. +.. note:: Keep in mind that you can combine all of the above query parameters depending on the results you are looking for. Get File Counts in a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1132,7 +1132,7 @@ Usage example: Please note that filtering values are case sensitive and must be correctly typed for the endpoint to recognize them. -Keep in mind that you can combine all of the above query params depending on the results you are looking for. +Keep in mind that you can combine all of the above query parameters depending on the results you are looking for. View Dataset Files and Folders as a Directory Index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1888,6 +1888,61 @@ Usage example: curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?mode=Archival" +Category name filtering is also optionally supported. To return the size of all files available for download matching the requested category name. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?categoryName=Data" + +Tabular tag name filtering is also optionally supported. To return the size of all files available for download for which the requested tabular tag has been added. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?tabularTagName=Survey" + +Content type filtering is also optionally supported. To return the size of all files available for download matching the requested content type. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?contentType=image/png" + +Filtering by search text is also optionally supported. The search will be applied to the labels and descriptions of the dataset files, to return the size of all files available for download that contain the text searched in one of such fields. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?searchText=word" + +File access filtering is also optionally supported. In particular, by the following possible values: + +* ``Public`` +* ``Restricted`` +* ``EmbargoedThenRestricted`` +* ``EmbargoedThenPublic`` + +If no filter is specified, the files will match all of the above categories. + +Please note that filtering query parameters are case sensitive and must be correctly typed for the endpoint to recognize them. + +By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "not found" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below. + +If you want to include deaccessioned dataset versions, you must set ``includeDeaccessioned`` query parameter to ``true``. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?includeDeaccessioned=true" + +.. note:: Keep in mind that you can combine all of the above query parameters depending on the results you are looking for. + Submit a Dataset for Review ~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 28cd109303ec22cbb898f32f3141cc281f4c7c62 Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 16 Oct 2023 13:10:38 +0100 Subject: [PATCH 4/5] Added: release notes for #9995 --- ...adsize-with-criteria-and-deaccessioned-support.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md diff --git a/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md b/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md new file mode 100644 index 00000000000..71c7aa3b516 --- /dev/null +++ b/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md @@ -0,0 +1,12 @@ +Extended the getDownloadSize endpoint (/api/datasets/{id}/versions/{versionId}/files/downloadsize), including the following new features: + +- The endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned dataset versions when searching for versions to obtain the file total download size. + + +- The endpoint now supports filtering by criteria. In particular, it accepts the following optional criteria query parameters: + + - contentType + - accessStatus + - categoryName + - tabularTagName + - searchText From ab237777309b90e299e584cff6995618bc378ebd Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 16 Oct 2023 13:22:30 +0100 Subject: [PATCH 5/5] Fixed: release notes --- ...-api-downloadsize-with-criteria-and-deaccessioned-support.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md b/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md index 71c7aa3b516..020224b2094 100644 --- a/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md +++ b/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md @@ -1,4 +1,4 @@ -Extended the getDownloadSize endpoint (/api/datasets/{id}/versions/{versionId}/files/downloadsize), including the following new features: +Extended the getDownloadSize endpoint (/api/datasets/{id}/versions/{versionId}/downloadsize), including the following new features: - The endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned dataset versions when searching for versions to obtain the file total download size.