diff --git a/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md b/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md new file mode 100644 index 00000000000..020224b2094 --- /dev/null +++ b/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md @@ -0,0 +1,12 @@ +Extended the getDownloadSize endpoint (/api/datasets/{id}/versions/{versionId}/downloadsize), including the following new features: + +- The endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned dataset versions when searching for versions to obtain the file total download size. + + +- The endpoint now supports filtering by criteria. In particular, it accepts the following optional criteria query parameters: + + - contentType + - accessStatus + - categoryName + - tabularTagName + - searchText diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index eedf23fd04e..c103b88e682 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -971,7 +971,7 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files" -This endpoint supports optional pagination, through the ``limit`` and ``offset`` query params: +This endpoint supports optional pagination, through the ``limit`` and ``offset`` query parameters: .. code-block:: bash @@ -1051,7 +1051,7 @@ Usage example: curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files?includeDeaccessioned=true" -.. note:: Keep in mind that you can combine all of the above query params depending on the results you are looking for. +.. note:: Keep in mind that you can combine all of the above query parameters depending on the results you are looking for. Get File Counts in a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1139,7 +1139,7 @@ Usage example: Please note that filtering values are case sensitive and must be correctly typed for the endpoint to recognize them. -Keep in mind that you can combine all of the above query params depending on the results you are looking for. +Keep in mind that you can combine all of the above query parameters depending on the results you are looking for. View Dataset Files and Folders as a Directory Index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1895,6 +1895,61 @@ Usage example: curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?mode=Archival" +Category name filtering is also optionally supported. To return the size of all files available for download matching the requested category name. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?categoryName=Data" + +Tabular tag name filtering is also optionally supported. To return the size of all files available for download for which the requested tabular tag has been added. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?tabularTagName=Survey" + +Content type filtering is also optionally supported. To return the size of all files available for download matching the requested content type. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?contentType=image/png" + +Filtering by search text is also optionally supported. The search will be applied to the labels and descriptions of the dataset files, to return the size of all files available for download that contain the text searched in one of such fields. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?searchText=word" + +File access filtering is also optionally supported. In particular, by the following possible values: + +* ``Public`` +* ``Restricted`` +* ``EmbargoedThenRestricted`` +* ``EmbargoedThenPublic`` + +If no filter is specified, the files will match all of the above categories. + +Please note that filtering query parameters are case sensitive and must be correctly typed for the endpoint to recognize them. + +By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "not found" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below. + +If you want to include deaccessioned dataset versions, you must set ``includeDeaccessioned`` query parameter to ``true``. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?includeDeaccessioned=true" + +.. note:: Keep in mind that you can combine all of the above query parameters depending on the results you are looking for. + Submit a Dataset for Review ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java index 701ff4474ea..fc662ee80bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java @@ -53,7 +53,7 @@ public enum FileOrderCriteria { } /** - * Mode to base the search in {@link DatasetVersionFilesServiceBean#getFilesDownloadSize(DatasetVersion, FileDownloadSizeMode)} + * Mode to base the search in {@link DatasetVersionFilesServiceBean#getFilesDownloadSize(DatasetVersion, FileSearchCriteria, FileDownloadSizeMode)} *

* All: Includes both archival and original sizes for tabular files * Archival: Includes only the archival size for tabular files @@ -191,16 +191,17 @@ public List getFileMetadatas(DatasetVersion datasetVersion, Intege * Returns the total download size of all files for a particular DatasetVersion * * @param datasetVersion the DatasetVersion to access + * @param searchCriteria for retrieving only files matching this criteria * @param mode a FileDownloadSizeMode to base the search on * @return long value of total file download size */ - public long getFilesDownloadSize(DatasetVersion datasetVersion, FileDownloadSizeMode mode) { + public long getFilesDownloadSize(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria, FileDownloadSizeMode mode) { return switch (mode) { case All -> - Long.sum(getOriginalTabularFilesSize(datasetVersion), getArchivalFilesSize(datasetVersion, false)); + Long.sum(getOriginalTabularFilesSize(datasetVersion, searchCriteria), getArchivalFilesSize(datasetVersion, false, searchCriteria)); case Original -> - Long.sum(getOriginalTabularFilesSize(datasetVersion), getArchivalFilesSize(datasetVersion, true)); - case Archival -> getArchivalFilesSize(datasetVersion, false); + Long.sum(getOriginalTabularFilesSize(datasetVersion, searchCriteria), getArchivalFilesSize(datasetVersion, true, searchCriteria)); + case Archival -> getArchivalFilesSize(datasetVersion, false, searchCriteria); }; } @@ -301,22 +302,24 @@ private void applyOrderCriteriaToGetFileMetadatasQuery(JPAQuery qu } } - private long getOriginalTabularFilesSize(DatasetVersion datasetVersion) { + private long getOriginalTabularFilesSize(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) { JPAQueryFactory queryFactory = new JPAQueryFactory(em); - Long result = queryFactory + JPAQuery baseQuery = queryFactory .from(fileMetadata) .where(fileMetadata.datasetVersion.id.eq(datasetVersion.getId())) .from(dataTable) - .where(dataTable.dataFile.eq(fileMetadata.dataFile)) - .select(dataTable.originalFileSize.sum()).fetchFirst(); + .where(dataTable.dataFile.eq(fileMetadata.dataFile)); + applyFileSearchCriteriaToQuery(baseQuery, searchCriteria); + Long result = baseQuery.select(dataTable.originalFileSize.sum()).fetchFirst(); return (result == null) ? 0 : result; } - private long getArchivalFilesSize(DatasetVersion datasetVersion, boolean ignoreTabular) { + private long getArchivalFilesSize(DatasetVersion datasetVersion, boolean ignoreTabular, FileSearchCriteria searchCriteria) { JPAQueryFactory queryFactory = new JPAQueryFactory(em); JPAQuery baseQuery = queryFactory .from(fileMetadata) .where(fileMetadata.datasetVersion.id.eq(datasetVersion.getId())); + applyFileSearchCriteriaToQuery(baseQuery, searchCriteria); Long result; if (ignoreTabular) { result = baseQuery.where(fileMetadata.dataFile.dataTables.isEmpty()).select(fileMetadata.dataFile.filesize.sum()).fetchFirst(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 5b12e0a372f..9f9826ca53b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3005,19 +3005,37 @@ public Response getStorageSize(@Context ContainerRequestContext crc, @PathParam( public Response getDownloadSize(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, @PathParam("versionId") String version, + @QueryParam("contentType") String contentType, + @QueryParam("accessStatus") String accessStatus, + @QueryParam("categoryName") String categoryName, + @QueryParam("tabularTagName") String tabularTagName, + @QueryParam("searchText") String searchText, @QueryParam("mode") String mode, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, @Context UriInfo uriInfo, @Context HttpHeaders headers) { return response(req -> { + FileSearchCriteria fileSearchCriteria; + try { + fileSearchCriteria = new FileSearchCriteria( + contentType, + accessStatus != null ? FileSearchCriteria.FileAccessStatus.valueOf(accessStatus) : null, + categoryName, + tabularTagName, + searchText + ); + } catch (IllegalArgumentException e) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus))); + } DatasetVersionFilesServiceBean.FileDownloadSizeMode fileDownloadSizeMode; try { fileDownloadSizeMode = mode != null ? DatasetVersionFilesServiceBean.FileDownloadSizeMode.valueOf(mode) : DatasetVersionFilesServiceBean.FileDownloadSizeMode.All; } catch (IllegalArgumentException e) { return error(Response.Status.BAD_REQUEST, "Invalid mode: " + mode); } - DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers); - long datasetStorageSize = datasetVersionFilesServiceBean.getFilesDownloadSize(datasetVersion, fileDownloadSizeMode); + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers, includeDeaccessioned); + long datasetStorageSize = datasetVersionFilesServiceBean.getFilesDownloadSize(datasetVersion, fileSearchCriteria, fileDownloadSizeMode); String message = MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.download"), datasetStorageSize); JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); jsonObjectBuilder.add("message", message); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index d246fc3e62a..e4e832f81d4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3960,7 +3960,7 @@ public void getDownloadSize() throws IOException, InterruptedException { int expectedTextFilesStorageSize = testFileSize1 + testFileSize2; // Get the total size when there are no tabular files - Response getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); + Response getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedTextFilesStorageSize)); @@ -3975,7 +3975,7 @@ public void getDownloadSize() throws IOException, InterruptedException { Thread.sleep(2000); // Get the total size ignoring the original tabular file sizes - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Archival.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Archival.toString(), false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()); int actualSizeIgnoringOriginalTabularSizes = Integer.parseInt(getDownloadSizeResponse.getBody().jsonPath().getString("data.storageSize")); @@ -3986,7 +3986,7 @@ public void getDownloadSize() throws IOException, InterruptedException { // Get the total size including only original sizes and ignoring archival sizes for tabular files int expectedSizeIncludingOnlyOriginalForTabular = tabularOriginalSize + expectedTextFilesStorageSize; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedSizeIncludingOnlyOriginalForTabular)); @@ -3994,13 +3994,13 @@ public void getDownloadSize() throws IOException, InterruptedException { int tabularArchivalSize = actualSizeIgnoringOriginalTabularSizes - expectedTextFilesStorageSize; int expectedSizeIncludingAllSizes = tabularArchivalSize + tabularOriginalSize + expectedTextFilesStorageSize; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedSizeIncludingAllSizes)); // Get the total size sending invalid file download size mode String invalidMode = "invalidMode"; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, invalidMode, apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, invalidMode, false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()) .body("message", equalTo("Invalid mode: " + invalidMode)); @@ -4014,15 +4014,47 @@ public void getDownloadSize() throws IOException, InterruptedException { // Get the total size including only original sizes and ignoring archival sizes for tabular files expectedSizeIncludingOnlyOriginalForTabular = tabularOriginalSize + expectedSizeIncludingOnlyOriginalForTabular; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.Original.toString(), false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedSizeIncludingOnlyOriginalForTabular)); // Get the total size including both the original and archival tabular file sizes expectedSizeIncludingAllSizes = tabularArchivalSize + tabularOriginalSize + expectedSizeIncludingAllSizes; - getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), apiToken); + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken); getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) .body("data.storageSize", equalTo(expectedSizeIncludingAllSizes)); + + // Get the total size including both the original and archival tabular file sizes with search criteria + getDownloadSizeResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST, "text/plain", FileSearchCriteria.FileAccessStatus.Public.toString(), null, null, "test_", DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken); + // We exclude tabular sizes from the expected result since the search criteria filters by content type "text/plain" and search text "test_" + int expectedSizeIncludingAllSizesAndApplyingCriteria = testFileSize1 + testFileSize2; + getDownloadSizeResponse.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.storageSize", equalTo(expectedSizeIncludingAllSizesAndApplyingCriteria)); + + // Test Deaccessioned + Response publishDataverseResponse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + publishDataverseResponse.then().assertThat().statusCode(OK.getStatusCode()); + Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken); + deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // includeDeaccessioned false + Response getVersionFileCountsResponseNoDeaccessioned = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST_PUBLISHED, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), false, apiToken); + getVersionFileCountsResponseNoDeaccessioned.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + // includeDeaccessioned true + Response getVersionFileCountsResponseDeaccessioned = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST_PUBLISHED, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), true, apiToken); + getVersionFileCountsResponseDeaccessioned.then().assertThat().statusCode(OK.getStatusCode()); + + // Test that the dataset file counts for a deaccessioned dataset cannot be accessed by a guest + // By latest published version + Response getVersionFileCountsGuestUserResponse = UtilIT.getDownloadSize(datasetId, DS_VERSION_LATEST_PUBLISHED, null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), true, null); + getVersionFileCountsGuestUserResponse.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + // By specific version 1.0 + getVersionFileCountsGuestUserResponse = UtilIT.getDownloadSize(datasetId, "1.0", null, null, null, null, null, DatasetVersionFilesServiceBean.FileDownloadSizeMode.All.toString(), true, null); + getVersionFileCountsGuestUserResponse.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index e8311a4fc77..9ad151d0ee6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3448,7 +3448,15 @@ static Response createFileEmbargo(Integer datasetId, Integer fileId, String date .post("/api/datasets/" + datasetId + "/files/actions/:set-embargo"); } - static Response getVersionFileCounts(Integer datasetId, String version, String contentType, String accessStatus, String categoryName, String tabularTagName, String searchText, boolean includeDeaccessioned, String apiToken) { + static Response getVersionFileCounts(Integer datasetId, + String version, + String contentType, + String accessStatus, + String categoryName, + String tabularTagName, + String searchText, + boolean includeDeaccessioned, + String apiToken) { RequestSpecification requestSpecification = given() .queryParam("includeDeaccessioned", includeDeaccessioned); if (apiToken != null) { @@ -3525,10 +3533,38 @@ static Response deaccessionDataset(Integer datasetId, String version, String dea .post("/api/datasets/" + datasetId + "/versions/" + version + "/deaccession"); } - static Response getDownloadSize(Integer datasetId, String version, String mode, String apiToken) { - return given() - .header(API_TOKEN_HTTP_HEADER, apiToken) - .queryParam("mode", mode) + static Response getDownloadSize(Integer datasetId, + String version, + String contentType, + String accessStatus, + String categoryName, + String tabularTagName, + String searchText, + String mode, + boolean includeDeaccessioned, + String apiToken) { + RequestSpecification requestSpecification = given() + .queryParam("includeDeaccessioned", includeDeaccessioned) + .queryParam("mode", mode); + if (apiToken != null) { + requestSpecification.header(API_TOKEN_HTTP_HEADER, apiToken); + } + if (contentType != null) { + requestSpecification = requestSpecification.queryParam("contentType", contentType); + } + if (accessStatus != null) { + requestSpecification = requestSpecification.queryParam("accessStatus", accessStatus); + } + if (categoryName != null) { + requestSpecification = requestSpecification.queryParam("categoryName", categoryName); + } + if (tabularTagName != null) { + requestSpecification = requestSpecification.queryParam("tabularTagName", tabularTagName); + } + if (searchText != null) { + requestSpecification = requestSpecification.queryParam("searchText", searchText); + } + return requestSpecification .get("/api/datasets/" + datasetId + "/versions/" + version + "/downloadsize"); } }