affectedFileIds = fileService.selectFilesWithMissingOriginalTypes();
+
+ if (affectedFileIds.isEmpty()) {
+ info.add("message",
+ "All the tabular files in the database already have the original types set correctly; exiting.");
+ } else {
+ for (Long fileid : affectedFileIds) {
+ logger.fine("found file id: " + fileid);
+ }
+ info.add("message", "Found " + affectedFileIds.size()
+ + " tabular files with missing original types. Kicking off an async job that will repair the files in the background.");
+ }
+
+ ingestService.fixMissingOriginalTypes(affectedFileIds);
+
+ return ok(info);
+ }
@Path("datafiles/integrity/fixmissingoriginalsizes")
@GET
@@ -1441,60 +1421,60 @@ public Response fixMissingOriginalSizes(@QueryParam("limit") Integer limit) {
return ok(info);
}
- /**
- * This method is used in API tests, called from UtilIt.java.
- */
- @GET
- @Path("datasets/thumbnailMetadata/{id}")
- public Response getDatasetThumbnailMetadata(@PathParam("id") Long idSupplied) {
- Dataset dataset = datasetSvc.find(idSupplied);
- if (dataset == null) {
- return error(Response.Status.NOT_FOUND, "Could not find dataset based on id supplied: " + idSupplied + ".");
- }
- JsonObjectBuilder data = Json.createObjectBuilder();
- DatasetThumbnail datasetThumbnail = dataset.getDatasetThumbnail(ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE);
- data.add("isUseGenericThumbnail", dataset.isUseGenericThumbnail());
- data.add("datasetLogoPresent", DatasetUtil.isDatasetLogoPresent(dataset, ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE));
- if (datasetThumbnail != null) {
- data.add("datasetThumbnailBase64image", datasetThumbnail.getBase64image());
- DataFile dataFile = datasetThumbnail.getDataFile();
- if (dataFile != null) {
- /**
- * @todo Change this from a String to a long.
- */
- data.add("dataFileId", dataFile.getId().toString());
- }
- }
- return ok(data);
- }
-
- /**
- * validatePassword
- *
- * Validate a password with an API call
- *
- * @param password
- * The password
- * @return A response with the validation result.
- */
- @Path("validatePassword")
- @POST
- public Response validatePassword(String password) {
-
- final List errors = passwordValidatorService.validate(password, new Date(), false);
- final JsonArrayBuilder errorArray = Json.createArrayBuilder();
- errors.forEach(errorArray::add);
- return ok(Json.createObjectBuilder().add("password", password).add("errors", errorArray));
- }
-
- @GET
- @Path("/isOrcid")
- public Response isOrcidEnabled() {
- return authSvc.isOrcidEnabled() ? ok("Orcid is enabled") : ok("no orcid for you.");
- }
+ /**
+ * This method is used in API tests, called from UtilIt.java.
+ */
+ @GET
+ @Path("datasets/thumbnailMetadata/{id}")
+ public Response getDatasetThumbnailMetadata(@PathParam("id") Long idSupplied) {
+ Dataset dataset = datasetSvc.find(idSupplied);
+ if (dataset == null) {
+ return error(Response.Status.NOT_FOUND, "Could not find dataset based on id supplied: " + idSupplied + ".");
+ }
+ JsonObjectBuilder data = Json.createObjectBuilder();
+ DatasetThumbnail datasetThumbnail = dataset.getDatasetThumbnail(ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE);
+ data.add("isUseGenericThumbnail", dataset.isUseGenericThumbnail());
+ data.add("datasetLogoPresent", DatasetUtil.isDatasetLogoPresent(dataset, ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE));
+ if (datasetThumbnail != null) {
+ data.add("datasetThumbnailBase64image", datasetThumbnail.getBase64image());
+ DataFile dataFile = datasetThumbnail.getDataFile();
+ if (dataFile != null) {
+ /**
+ * @todo Change this from a String to a long.
+ */
+ data.add("dataFileId", dataFile.getId().toString());
+ }
+ }
+ return ok(data);
+ }
+
+ /**
+ * validatePassword
+ *
+ * Validate a password with an API call
+ *
+ * @param password
+ * The password
+ * @return A response with the validation result.
+ */
+ @Path("validatePassword")
+ @POST
+ public Response validatePassword(String password) {
+
+ final List errors = passwordValidatorService.validate(password, new Date(), false);
+ final JsonArrayBuilder errorArray = Json.createArrayBuilder();
+ errors.forEach(errorArray::add);
+ return ok(Json.createObjectBuilder().add("password", password).add("errors", errorArray));
+ }
+
+ @GET
+ @Path("/isOrcid")
+ public Response isOrcidEnabled() {
+ return authSvc.isOrcidEnabled() ? ok("Orcid is enabled") : ok("no orcid for you.");
+ }
@POST
- @AuthRequired
+ @AuthRequired
@Path("{id}/reregisterHDLToPID")
public Response reregisterHdlToPID(@Context ContainerRequestContext crc, @PathParam("id") String id) {
logger.info("Starting to reregister " + id + " Dataset Id. (from hdl to doi)" + new Date());
@@ -1825,7 +1805,7 @@ public Response updateHashValues(@Context ContainerRequestContext crc, @PathPara
}
@POST
- @AuthRequired
+ @AuthRequired
@Path("/computeDataFileHashValue/{fileId}/algorithm/{alg}")
public Response computeDataFileHashValue(@Context ContainerRequestContext crc, @PathParam("fileId") String fileId, @PathParam("alg") String alg) {
@@ -1887,7 +1867,7 @@ public Response computeDataFileHashValue(@Context ContainerRequestContext crc, @
}
@POST
- @AuthRequired
+ @AuthRequired
@Path("/validateDataFileHashValue/{fileId}")
public Response validateDataFileHashValue(@Context ContainerRequestContext crc, @PathParam("fileId") String fileId) {
@@ -1954,7 +1934,7 @@ public Response validateDataFileHashValue(@Context ContainerRequestContext crc,
}
@POST
- @AuthRequired
+ @AuthRequired
@Path("/submitDatasetVersionToArchive/{id}/{version}")
public Response submitDatasetVersionToArchive(@Context ContainerRequestContext crc, @PathParam("id") String dsid,
@PathParam("version") String versionNumber) {
@@ -2027,7 +2007,7 @@ public void run() {
* @return
*/
@POST
- @AuthRequired
+ @AuthRequired
@Path("/archiveAllUnarchivedDatasetVersions")
public Response archiveAllUnarchivedDatasetVersions(@Context ContainerRequestContext crc, @QueryParam("listonly") boolean listonly, @QueryParam("limit") Integer limit, @QueryParam("latestonly") boolean latestonly) {
@@ -2126,7 +2106,7 @@ public Response clearMetricsCacheByName(@PathParam("name") String name) {
}
@GET
- @AuthRequired
+ @AuthRequired
@Path("/dataverse/{alias}/addRoleAssignmentsToChildren")
public Response addRoleAssignementsToChildren(@Context ContainerRequestContext crc, @PathParam("alias") String alias) throws WrappedResponse {
Dataverse owner = dataverseSvc.findByAlias(alias);
@@ -2157,90 +2137,90 @@ public Response addRoleAssignementsToChildren(@Context ContainerRequestContext c
}
@GET
- @AuthRequired
+ @AuthRequired
@Path("/dataverse/{alias}/storageDriver")
public Response getStorageDriver(@Context ContainerRequestContext crc, @PathParam("alias") String alias) throws WrappedResponse {
- Dataverse dataverse = dataverseSvc.findByAlias(alias);
- if (dataverse == null) {
- return error(Response.Status.NOT_FOUND, "Could not find dataverse based on alias supplied: " + alias + ".");
- }
- try {
- AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc);
- if (!user.isSuperuser()) {
- return error(Response.Status.FORBIDDEN, "Superusers only.");
- }
- } catch (WrappedResponse wr) {
- return wr.getResponse();
- }
- //Note that this returns what's set directly on this dataverse. If null/DataAccess.UNDEFINED_STORAGE_DRIVER_IDENTIFIER, the user would have to recurse the chain of parents to find the effective storageDriver
- return ok(dataverse.getStorageDriverId());
+ Dataverse dataverse = dataverseSvc.findByAlias(alias);
+ if (dataverse == null) {
+ return error(Response.Status.NOT_FOUND, "Could not find dataverse based on alias supplied: " + alias + ".");
+ }
+ try {
+ AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc);
+ if (!user.isSuperuser()) {
+ return error(Response.Status.FORBIDDEN, "Superusers only.");
+ }
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
+ }
+ //Note that this returns what's set directly on this dataverse. If null/DataAccess.UNDEFINED_STORAGE_DRIVER_IDENTIFIER, the user would have to recurse the chain of parents to find the effective storageDriver
+ return ok(dataverse.getStorageDriverId());
}
@PUT
- @AuthRequired
+ @AuthRequired
@Path("/dataverse/{alias}/storageDriver")
public Response setStorageDriver(@Context ContainerRequestContext crc, @PathParam("alias") String alias, String label) throws WrappedResponse {
- Dataverse dataverse = dataverseSvc.findByAlias(alias);
- if (dataverse == null) {
- return error(Response.Status.NOT_FOUND, "Could not find dataverse based on alias supplied: " + alias + ".");
- }
- try {
- AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc);
- if (!user.isSuperuser()) {
- return error(Response.Status.FORBIDDEN, "Superusers only.");
- }
- } catch (WrappedResponse wr) {
- return wr.getResponse();
- }
- for (Entry store: DataAccess.getStorageDriverLabels().entrySet()) {
- if(store.getKey().equals(label)) {
- dataverse.setStorageDriverId(store.getValue());
- return ok("Storage set to: " + store.getKey() + "/" + store.getValue());
- }
- }
- return error(Response.Status.BAD_REQUEST,
- "No Storage Driver found for : " + label);
+ Dataverse dataverse = dataverseSvc.findByAlias(alias);
+ if (dataverse == null) {
+ return error(Response.Status.NOT_FOUND, "Could not find dataverse based on alias supplied: " + alias + ".");
+ }
+ try {
+ AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc);
+ if (!user.isSuperuser()) {
+ return error(Response.Status.FORBIDDEN, "Superusers only.");
+ }
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
+ }
+ for (Entry store: DataAccess.getStorageDriverLabels().entrySet()) {
+ if(store.getKey().equals(label)) {
+ dataverse.setStorageDriverId(store.getValue());
+ return ok("Storage set to: " + store.getKey() + "/" + store.getValue());
+ }
+ }
+ return error(Response.Status.BAD_REQUEST,
+ "No Storage Driver found for : " + label);
}
@DELETE
- @AuthRequired
+ @AuthRequired
@Path("/dataverse/{alias}/storageDriver")
public Response resetStorageDriver(@Context ContainerRequestContext crc, @PathParam("alias") String alias) throws WrappedResponse {
- Dataverse dataverse = dataverseSvc.findByAlias(alias);
- if (dataverse == null) {
- return error(Response.Status.NOT_FOUND, "Could not find dataverse based on alias supplied: " + alias + ".");
- }
- try {
- AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc);
- if (!user.isSuperuser()) {
- return error(Response.Status.FORBIDDEN, "Superusers only.");
- }
- } catch (WrappedResponse wr) {
- return wr.getResponse();
- }
- dataverse.setStorageDriverId("");
- return ok("Storage reset to default: " + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
+ Dataverse dataverse = dataverseSvc.findByAlias(alias);
+ if (dataverse == null) {
+ return error(Response.Status.NOT_FOUND, "Could not find dataverse based on alias supplied: " + alias + ".");
+ }
+ try {
+ AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc);
+ if (!user.isSuperuser()) {
+ return error(Response.Status.FORBIDDEN, "Superusers only.");
+ }
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
+ }
+ dataverse.setStorageDriverId("");
+ return ok("Storage reset to default: " + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
}
@GET
- @AuthRequired
+ @AuthRequired
@Path("/dataverse/storageDrivers")
public Response listStorageDrivers(@Context ContainerRequestContext crc) throws WrappedResponse {
- try {
- AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc);
- if (!user.isSuperuser()) {
- return error(Response.Status.FORBIDDEN, "Superusers only.");
- }
- } catch (WrappedResponse wr) {
- return wr.getResponse();
- }
- JsonObjectBuilder bld = jsonObjectBuilder();
- DataAccess.getStorageDriverLabels().entrySet().forEach(s -> bld.add(s.getKey(), s.getValue()));
- return ok(bld);
+ try {
+ AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc);
+ if (!user.isSuperuser()) {
+ return error(Response.Status.FORBIDDEN, "Superusers only.");
+ }
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
+ }
+ JsonObjectBuilder bld = jsonObjectBuilder();
+ DataAccess.getStorageDriverLabels().entrySet().forEach(s -> bld.add(s.getKey(), s.getValue()));
+ return ok(bld);
}
@GET
- @AuthRequired
+ @AuthRequired
@Path("/dataverse/{alias}/curationLabelSet")
public Response getCurationLabelSet(@Context ContainerRequestContext crc, @PathParam("alias") String alias) throws WrappedResponse {
Dataverse dataverse = dataverseSvc.findByAlias(alias);
@@ -2262,7 +2242,7 @@ public Response getCurationLabelSet(@Context ContainerRequestContext crc, @PathP
}
@PUT
- @AuthRequired
+ @AuthRequired
@Path("/dataverse/{alias}/curationLabelSet")
public Response setCurationLabelSet(@Context ContainerRequestContext crc, @PathParam("alias") String alias, @QueryParam("name") String name) throws WrappedResponse {
Dataverse dataverse = dataverseSvc.findByAlias(alias);
@@ -2293,7 +2273,7 @@ public Response setCurationLabelSet(@Context ContainerRequestContext crc, @PathP
}
@DELETE
- @AuthRequired
+ @AuthRequired
@Path("/dataverse/{alias}/curationLabelSet")
public Response resetCurationLabelSet(@Context ContainerRequestContext crc, @PathParam("alias") String alias) throws WrappedResponse {
Dataverse dataverse = dataverseSvc.findByAlias(alias);
@@ -2313,7 +2293,7 @@ public Response resetCurationLabelSet(@Context ContainerRequestContext crc, @Pat
}
@GET
- @AuthRequired
+ @AuthRequired
@Path("/dataverse/curationLabelSets")
public Response listCurationLabelSets(@Context ContainerRequestContext crc) throws WrappedResponse {
try {
@@ -2423,7 +2403,7 @@ public Response getBannerMessages(@PathParam("id") Long id) throws WrappedRespon
}
@POST
- @AuthRequired
+ @AuthRequired
@Consumes("application/json")
@Path("/requestSignedUrl")
public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject urlInfo) {
@@ -2541,4 +2521,160 @@ public Response getFeatureFlag(@PathParam("flag") String flagIn) {
}
}
+ @GET
+ @AuthRequired
+ @Path("/datafiles/auditFiles")
+ public Response getAuditFiles(@Context ContainerRequestContext crc,
+ @QueryParam("firstId") Long firstId, @QueryParam("lastId") Long lastId,
+ @QueryParam("datasetIdentifierList") String datasetIdentifierList) throws WrappedResponse {
+ try {
+ AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc);
+ if (!user.isSuperuser()) {
+ return error(Response.Status.FORBIDDEN, "Superusers only.");
+ }
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
+ }
+
+ int datasetsChecked = 0;
+ long startId = (firstId == null ? 0 : firstId);
+ long endId = (lastId == null ? Long.MAX_VALUE : lastId);
+
+ List datasetIdentifiers;
+ if (datasetIdentifierList == null || datasetIdentifierList.isEmpty()) {
+ datasetIdentifiers = Collections.emptyList();
+ } else {
+ startId = 0;
+ endId = Long.MAX_VALUE;
+ datasetIdentifiers = List.of(datasetIdentifierList.split(","));
+ }
+ if (endId < startId) {
+ return badRequest("Invalid Parameters: lastId must be equal to or greater than firstId");
+ }
+
+ NullSafeJsonBuilder jsonObjectBuilder = NullSafeJsonBuilder.jsonObjectBuilder();
+ JsonArrayBuilder jsonDatasetsArrayBuilder = Json.createArrayBuilder();
+ JsonArrayBuilder jsonFailuresArrayBuilder = Json.createArrayBuilder();
+
+ if (startId > 0) {
+ jsonObjectBuilder.add("firstId", startId);
+ }
+ if (endId < Long.MAX_VALUE) {
+ jsonObjectBuilder.add("lastId", endId);
+ }
+
+ // compile the list of ids to process
+ List datasetIds;
+ if (datasetIdentifiers.isEmpty()) {
+ datasetIds = datasetService.findAllLocalDatasetIds();
+ } else {
+ datasetIds = new ArrayList<>(datasetIdentifiers.size());
+ JsonArrayBuilder jab = Json.createArrayBuilder();
+ datasetIdentifiers.forEach(id -> {
+ String dId = id.trim();
+ jab.add(dId);
+ Dataset d = datasetService.findByGlobalId(dId);
+ if (d != null) {
+ datasetIds.add(d.getId());
+ } else {
+ NullSafeJsonBuilder job = NullSafeJsonBuilder.jsonObjectBuilder();
+ job.add("datasetIdentifier",dId);
+ job.add("reason","Not Found");
+ jsonFailuresArrayBuilder.add(job);
+ }
+ });
+ jsonObjectBuilder.add("datasetIdentifierList", jab);
+ }
+
+ for (Long datasetId : datasetIds) {
+ if (datasetId < startId) {
+ continue;
+ } else if (datasetId > endId) {
+ break;
+ }
+ Dataset dataset;
+ try {
+ dataset = findDatasetOrDie(String.valueOf(datasetId));
+ datasetsChecked++;
+ } catch (WrappedResponse e) {
+ NullSafeJsonBuilder job = NullSafeJsonBuilder.jsonObjectBuilder();
+ job.add("datasetId", datasetId);
+ job.add("reason", e.getMessage());
+ jsonFailuresArrayBuilder.add(job);
+ continue;
+ }
+
+ List missingFiles = new ArrayList<>();
+ List missingFileMetadata = new ArrayList<>();
+ try {
+ Predicate filter = s -> true;
+ StorageIO datasetIO = DataAccess.getStorageIO(dataset);
+ final List result = datasetIO.cleanUp(filter, true);
+ // add files that are in dataset files but not in cleanup result or DataFiles with missing FileMetadata
+ dataset.getFiles().forEach(df -> {
+ try {
+ StorageIO datafileIO = df.getStorageIO();
+ String storageId = df.getStorageIdentifier();
+ FileMetadata fm = df.getFileMetadata();
+ if (!datafileIO.exists()) {
+ missingFiles.add(storageId + "," + (fm != null ?
+ (fm.getDirectoryLabel() != null || !fm.getDirectoryLabel().isEmpty() ? "directoryLabel,"+fm.getDirectoryLabel()+"," : "")
+ +"label,"+fm.getLabel() : "type,"+df.getContentType()));
+ }
+ if (fm == null) {
+ missingFileMetadata.add(storageId + ",dataFileId," + df.getId());
+ }
+ } catch (IOException e) {
+ NullSafeJsonBuilder job = NullSafeJsonBuilder.jsonObjectBuilder();
+ job.add("dataFileId", df.getId());
+ job.add("reason", e.getMessage());
+ jsonFailuresArrayBuilder.add(job);
+ }
+ });
+ } catch (IOException e) {
+ NullSafeJsonBuilder job = NullSafeJsonBuilder.jsonObjectBuilder();
+ job.add("datasetId", datasetId);
+ job.add("reason", e.getMessage());
+ jsonFailuresArrayBuilder.add(job);
+ }
+
+ JsonObjectBuilder job = Json.createObjectBuilder();
+ if (!missingFiles.isEmpty() || !missingFileMetadata.isEmpty()) {
+ job.add("id", dataset.getId());
+ job.add("pid", dataset.getProtocol() + ":" + dataset.getAuthority() + "/" + dataset.getIdentifier());
+ job.add("persistentURL", dataset.getPersistentURL());
+ if (!missingFileMetadata.isEmpty()) {
+ JsonArrayBuilder jabMissingFileMetadata = Json.createArrayBuilder();
+ missingFileMetadata.forEach(mm -> {
+ String[] missingMetadata = mm.split(",");
+ NullSafeJsonBuilder jobj = NullSafeJsonBuilder.jsonObjectBuilder()
+ .add("storageIdentifier", missingMetadata[0])
+ .add(missingMetadata[1], missingMetadata[2]);
+ jabMissingFileMetadata.add(jobj);
+ });
+ job.add("missingFileMetadata", jabMissingFileMetadata);
+ }
+ if (!missingFiles.isEmpty()) {
+ JsonArrayBuilder jabMissingFiles = Json.createArrayBuilder();
+ missingFiles.forEach(mf -> {
+ String[] missingFile = mf.split(",");
+ NullSafeJsonBuilder jobj = NullSafeJsonBuilder.jsonObjectBuilder()
+ .add("storageIdentifier", missingFile[0]);
+ for (int i = 2; i < missingFile.length; i+=2) {
+ jobj.add(missingFile[i-1], missingFile[i]);
+ }
+ jabMissingFiles.add(jobj);
+ });
+ job.add("missingFiles", jabMissingFiles);
+ }
+ jsonDatasetsArrayBuilder.add(job);
+ }
+ }
+
+ jsonObjectBuilder.add("datasetsChecked", datasetsChecked);
+ jsonObjectBuilder.add("datasets", jsonDatasetsArrayBuilder);
+ jsonObjectBuilder.add("failures", jsonFailuresArrayBuilder);
+
+ return ok(jsonObjectBuilder);
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java
index 01c51dc2b4c..907295ad848 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java
@@ -126,7 +126,7 @@ public Response getByName(@PathParam("name") String name) {
String solrFieldSearchable = dsf.getSolrField().getNameSearchable();
String solrFieldFacetable = dsf.getSolrField().getNameFacetable();
String metadataBlock = dsf.getMetadataBlock().getName();
- String uri=dsf.getUri();
+ String uri = dsf.getUri();
boolean hasParent = dsf.isHasParent();
boolean allowsMultiples = dsf.isAllowMultiples();
boolean isRequired = dsf.isRequired();
@@ -243,7 +243,9 @@ public Response loadDatasetFields(File file) {
br = new BufferedReader(new FileReader("/" + file));
while ((line = br.readLine()) != null) {
lineNumber++;
- values = line.split(splitBy);
+ values = Arrays.stream(line.split(splitBy))
+ .map(String::trim)
+ .toArray(String[]::new);
if (values[0].startsWith("#")) { // Header row
switch (values[0]) {
case "#metadataBlock":
@@ -326,7 +328,7 @@ public Response loadDatasetFields(File file) {
*/
public String getGeneralErrorMessage(HeaderType header, int lineNumber, String message) {
List arguments = new ArrayList<>();
- arguments.add(header.name());
+ arguments.add(header != null ? header.name() : "unknown");
arguments.add(String.valueOf(lineNumber));
arguments.add(message);
return BundleUtil.getStringFromBundle("api.admin.datasetfield.load.GeneralErrorMessage", arguments);
@@ -334,9 +336,9 @@ public String getGeneralErrorMessage(HeaderType header, int lineNumber, String m
/**
* Turn ArrayIndexOutOfBoundsException into an informative error message
- * @param lineNumber
* @param header
- * @param e
+ * @param lineNumber
+ * @param wrongIndex
* @return
*/
public String getArrayIndexOutOfBoundMessage(HeaderType header,
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 369a22fe8d7..2ec10816acc 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -2171,8 +2171,32 @@ public Response getAssignments(@Context ContainerRequestContext crc, @PathParam(
@GET
@AuthRequired
+ @Deprecated(forRemoval = true, since = "2024-10-17")
@Path("{id}/privateUrl")
public Response getPrivateUrlData(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied) {
+ return getPreviewUrlData(crc, idSupplied);
+ }
+
+ @POST
+ @AuthRequired
+ @Deprecated(forRemoval = true, since = "2024-10-17")
+ @Path("{id}/privateUrl")
+ public Response createPrivateUrl(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied, @DefaultValue("false") @QueryParam("anonymizedAccess") boolean anonymizedAccess) {
+ return createPreviewUrl(crc, idSupplied, anonymizedAccess);
+ }
+
+ @DELETE
+ @AuthRequired
+ @Deprecated(forRemoval = true, since = "2024-10-17")
+ @Path("{id}/privateUrl")
+ public Response deletePrivateUrl(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied) {
+ return deletePreviewUrl(crc, idSupplied);
+ }
+
+ @GET
+ @AuthRequired
+ @Path("{id}/previewUrl")
+ public Response getPreviewUrlData(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied) {
return response( req -> {
PrivateUrl privateUrl = execCommand(new GetPrivateUrlCommand(req, findDatasetOrDie(idSupplied)));
return (privateUrl != null) ? ok(json(privateUrl))
@@ -2182,8 +2206,8 @@ public Response getPrivateUrlData(@Context ContainerRequestContext crc, @PathPar
@POST
@AuthRequired
- @Path("{id}/privateUrl")
- public Response createPrivateUrl(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied,@DefaultValue("false") @QueryParam ("anonymizedAccess") boolean anonymizedAccess) {
+ @Path("{id}/previewUrl")
+ public Response createPreviewUrl(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied,@DefaultValue("false") @QueryParam ("anonymizedAccess") boolean anonymizedAccess) {
if(anonymizedAccess && settingsSvc.getValueForKey(SettingsServiceBean.Key.AnonymizedFieldTypeNames)==null) {
throw new NotAcceptableException("Anonymized Access not enabled");
}
@@ -2194,8 +2218,8 @@ public Response createPrivateUrl(@Context ContainerRequestContext crc, @PathPara
@DELETE
@AuthRequired
- @Path("{id}/privateUrl")
- public Response deletePrivateUrl(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied) {
+ @Path("{id}/previewUrl")
+ public Response deletePreviewUrl(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied) {
return response( req -> {
Dataset dataset = findDatasetOrDie(idSupplied);
PrivateUrl privateUrl = execCommand(new GetPrivateUrlCommand(req, dataset));
@@ -2208,6 +2232,7 @@ public Response deletePrivateUrl(@Context ContainerRequestContext crc, @PathPara
}, getRequestUser(crc));
}
+
@GET
@AuthRequired
@Path("{id}/thumbnail/candidates")
@@ -2992,6 +3017,26 @@ public Response cleanStorage(@Context ContainerRequestContext crc, @PathParam("i
}
+ @GET
+ @AuthRequired
+ @Path("{id}/versions/{versionId1}/compare/{versionId2}")
+ public Response getCompareVersions(@Context ContainerRequestContext crc, @PathParam("id") String id,
+ @PathParam("versionId1") String versionId1,
+ @PathParam("versionId2") String versionId2,
+ @Context UriInfo uriInfo, @Context HttpHeaders headers) {
+ try {
+ DataverseRequest req = createDataverseRequest(getRequestUser(crc));
+ DatasetVersion dsv1 = getDatasetVersionOrDie(req, versionId1, findDatasetOrDie(id), uriInfo, headers);
+ DatasetVersion dsv2 = getDatasetVersionOrDie(req, versionId2, findDatasetOrDie(id), uriInfo, headers);
+ if (dsv1.getCreateTime().getTime() > dsv2.getCreateTime().getTime()) {
+ return error(BAD_REQUEST, BundleUtil.getStringFromBundle("dataset.version.compare.incorrect.order"));
+ }
+ return ok(DatasetVersion.compareVersions(dsv1, dsv2));
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
+ }
+ }
+
private static Set getDatasetFilenames(Dataset dataset) {
Set files = new HashSet<>();
for (DataFile dataFile: dataset.getFiles()) {
@@ -4833,6 +4878,33 @@ public Response getPrivateUrlDatasetVersion(@PathParam("privateUrlToken") String
}
return ok(responseJson);
}
+
+ @GET
+ @Path("previewUrlDatasetVersion/{previewUrlToken}")
+ public Response getPreviewUrlDatasetVersion(@PathParam("previewUrlToken") String previewUrlToken, @QueryParam("returnOwners") boolean returnOwners) {
+ PrivateUrlUser privateUrlUser = privateUrlService.getPrivateUrlUserFromToken(previewUrlToken);
+ if (privateUrlUser == null) {
+ return notFound("Private URL user not found");
+ }
+ boolean isAnonymizedAccess = privateUrlUser.hasAnonymizedAccess();
+ String anonymizedFieldTypeNames = settingsSvc.getValueForKey(SettingsServiceBean.Key.AnonymizedFieldTypeNames);
+ if(isAnonymizedAccess && anonymizedFieldTypeNames == null) {
+ throw new NotAcceptableException("Anonymized Access not enabled");
+ }
+ DatasetVersion dsv = privateUrlService.getDraftDatasetVersionFromToken(previewUrlToken);
+ if (dsv == null || dsv.getId() == null) {
+ return notFound("Dataset version not found");
+ }
+ JsonObjectBuilder responseJson;
+ if (isAnonymizedAccess) {
+ List anonymizedFieldTypeNamesList = new ArrayList<>(Arrays.asList(anonymizedFieldTypeNames.split(",\\s")));
+ responseJson = json(dsv, anonymizedFieldTypeNamesList, true, returnOwners);
+ } else {
+ responseJson = json(dsv, null, true, returnOwners);
+ }
+ return ok(responseJson);
+ }
+
@GET
@Path("privateUrlDatasetVersion/{privateUrlToken}/citation")
@@ -4845,6 +4917,18 @@ public Response getPrivateUrlDatasetVersionCitation(@PathParam("privateUrlToken"
return (dsv == null || dsv.getId() == null) ? notFound("Dataset version not found")
: ok(dsv.getCitation(true, privateUrlUser.hasAnonymizedAccess()));
}
+
+ @GET
+ @Path("previewUrlDatasetVersion/{previewUrlToken}/citation")
+ public Response getPreviewUrlDatasetVersionCitation(@PathParam("previewUrlToken") String previewUrlToken) {
+ PrivateUrlUser privateUrlUser = privateUrlService.getPrivateUrlUserFromToken(previewUrlToken);
+ if (privateUrlUser == null) {
+ return notFound("Private URL user not found");
+ }
+ DatasetVersion dsv = privateUrlService.getDraftDatasetVersionFromToken(previewUrlToken);
+ return (dsv == null || dsv.getId() == null) ? notFound("Dataset version not found")
+ : ok(dsv.getCitation(true, privateUrlUser.hasAnonymizedAccess()));
+ }
@GET
@AuthRequired
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
index 0ee146ed99b..f864a5a9d1c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
@@ -3,12 +3,9 @@
import edu.harvard.iq.dataverse.*;
import edu.harvard.iq.dataverse.api.auth.AuthRequired;
import edu.harvard.iq.dataverse.api.datadeposit.SwordServiceBean;
-import edu.harvard.iq.dataverse.api.dto.DataverseMetadataBlockFacetDTO;
+import edu.harvard.iq.dataverse.api.dto.*;
import edu.harvard.iq.dataverse.authorization.DataverseRole;
-import edu.harvard.iq.dataverse.api.dto.ExplicitGroupDTO;
-import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO;
-import edu.harvard.iq.dataverse.api.dto.RoleDTO;
import edu.harvard.iq.dataverse.api.imports.ImportException;
import edu.harvard.iq.dataverse.api.imports.ImportServiceBean;
import edu.harvard.iq.dataverse.authorization.Permission;
@@ -127,75 +124,158 @@ public Response addRoot(@Context ContainerRequestContext crc, String body) {
@Path("{identifier}")
public Response addDataverse(@Context ContainerRequestContext crc, String body, @PathParam("identifier") String parentIdtf) {
Dataverse newDataverse;
- JsonObject newDataverseJson;
try {
- newDataverseJson = JsonUtil.getJsonObject(body);
- newDataverse = jsonParser().parseDataverse(newDataverseJson);
+ newDataverse = parseAndValidateAddDataverseRequestBody(body);
} catch (JsonParsingException jpe) {
- logger.log(Level.SEVERE, "Json: {0}", body);
return error(Status.BAD_REQUEST, MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.create.error.jsonparse"), jpe.getMessage()));
} catch (JsonParseException ex) {
- logger.log(Level.SEVERE, "Error parsing dataverse from json: " + ex.getMessage(), ex);
return error(Status.BAD_REQUEST, MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.create.error.jsonparsetodataverse"), ex.getMessage()));
}
try {
- JsonObject metadataBlocksJson = newDataverseJson.getJsonObject("metadataBlocks");
- List inputLevels = null;
- List metadataBlocks = null;
- List facetList = null;
- if (metadataBlocksJson != null) {
- JsonArray inputLevelsArray = metadataBlocksJson.getJsonArray("inputLevels");
- inputLevels = inputLevelsArray != null ? parseInputLevels(inputLevelsArray, newDataverse) : null;
-
- JsonArray metadataBlockNamesArray = metadataBlocksJson.getJsonArray("metadataBlockNames");
- metadataBlocks = metadataBlockNamesArray != null ? parseNewDataverseMetadataBlocks(metadataBlockNamesArray) : null;
-
- JsonArray facetIdsArray = metadataBlocksJson.getJsonArray("facetIds");
- facetList = facetIdsArray != null ? parseFacets(facetIdsArray) : null;
- }
+ List inputLevels = parseInputLevels(body, newDataverse);
+ List metadataBlocks = parseMetadataBlocks(body);
+ List facets = parseFacets(body);
if (!parentIdtf.isEmpty()) {
Dataverse owner = findDataverseOrDie(parentIdtf);
newDataverse.setOwner(owner);
}
- // set the dataverse - contact relationship in the contacts
- for (DataverseContact dc : newDataverse.getDataverseContacts()) {
- dc.setDataverse(newDataverse);
- }
-
AuthenticatedUser u = getRequestAuthenticatedUserOrDie(crc);
- newDataverse = execCommand(new CreateDataverseCommand(newDataverse, createDataverseRequest(u), facetList, inputLevels, metadataBlocks));
+ newDataverse = execCommand(new CreateDataverseCommand(newDataverse, createDataverseRequest(u), facets, inputLevels, metadataBlocks));
return created("/dataverses/" + newDataverse.getAlias(), json(newDataverse));
- } catch (WrappedResponse ww) {
-
- String error = ConstraintViolationUtil.getErrorStringForConstraintViolations(ww.getCause());
- if (!error.isEmpty()) {
- logger.log(Level.INFO, error);
- return ww.refineResponse(error);
- }
- return ww.getResponse();
+ } catch (WrappedResponse ww) {
+ return handleWrappedResponse(ww);
} catch (EJBException ex) {
- Throwable cause = ex;
- StringBuilder sb = new StringBuilder();
- sb.append("Error creating dataverse.");
- while (cause.getCause() != null) {
- cause = cause.getCause();
- if (cause instanceof ConstraintViolationException) {
- sb.append(ConstraintViolationUtil.getErrorStringForConstraintViolations(cause));
- }
- }
- logger.log(Level.SEVERE, sb.toString());
- return error(Response.Status.INTERNAL_SERVER_ERROR, "Error creating dataverse: " + sb.toString());
+ return handleEJBException(ex, "Error creating dataverse.");
} catch (Exception ex) {
logger.log(Level.SEVERE, "Error creating dataverse", ex);
return error(Response.Status.INTERNAL_SERVER_ERROR, "Error creating dataverse: " + ex.getMessage());
+ }
+ }
+
+ private Dataverse parseAndValidateAddDataverseRequestBody(String body) throws JsonParsingException, JsonParseException {
+ try {
+ JsonObject addDataverseJson = JsonUtil.getJsonObject(body);
+ return jsonParser().parseDataverse(addDataverseJson);
+ } catch (JsonParsingException jpe) {
+ logger.log(Level.SEVERE, "Json: {0}", body);
+ throw jpe;
+ } catch (JsonParseException ex) {
+ logger.log(Level.SEVERE, "Error parsing dataverse from json: " + ex.getMessage(), ex);
+ throw ex;
+ }
+ }
+
+ @PUT
+ @AuthRequired
+ @Path("{identifier}")
+ public Response updateDataverse(@Context ContainerRequestContext crc, String body, @PathParam("identifier") String identifier) {
+ Dataverse dataverse;
+ try {
+ dataverse = findDataverseOrDie(identifier);
+ } catch (WrappedResponse e) {
+ return e.getResponse();
+ }
+
+ DataverseDTO updatedDataverseDTO;
+ try {
+ updatedDataverseDTO = parseAndValidateUpdateDataverseRequestBody(body);
+ } catch (JsonParsingException jpe) {
+ return error(Status.BAD_REQUEST, MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.create.error.jsonparse"), jpe.getMessage()));
+ } catch (JsonParseException ex) {
+ return error(Status.BAD_REQUEST, MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.create.error.jsonparsetodataverse"), ex.getMessage()));
+ }
+
+ try {
+ List inputLevels = parseInputLevels(body, dataverse);
+ List metadataBlocks = parseMetadataBlocks(body);
+ List facets = parseFacets(body);
+
+ AuthenticatedUser u = getRequestAuthenticatedUserOrDie(crc);
+ dataverse = execCommand(new UpdateDataverseCommand(dataverse, facets, null, createDataverseRequest(u), inputLevels, metadataBlocks, updatedDataverseDTO, true));
+ return ok(json(dataverse));
+ } catch (WrappedResponse ww) {
+ return handleWrappedResponse(ww);
+ } catch (EJBException ex) {
+ return handleEJBException(ex, "Error updating dataverse.");
+ } catch (Exception ex) {
+ logger.log(Level.SEVERE, "Error updating dataverse", ex);
+ return error(Response.Status.INTERNAL_SERVER_ERROR, "Error updating dataverse: " + ex.getMessage());
}
}
+ private DataverseDTO parseAndValidateUpdateDataverseRequestBody(String body) throws JsonParsingException, JsonParseException {
+ try {
+ JsonObject updateDataverseJson = JsonUtil.getJsonObject(body);
+ return jsonParser().parseDataverseDTO(updateDataverseJson);
+ } catch (JsonParsingException jpe) {
+ logger.log(Level.SEVERE, "Json: {0}", body);
+ throw jpe;
+ } catch (JsonParseException ex) {
+ logger.log(Level.SEVERE, "Error parsing DataverseDTO from json: " + ex.getMessage(), ex);
+ throw ex;
+ }
+ }
+
+ private List parseInputLevels(String body, Dataverse dataverse) throws WrappedResponse {
+ JsonObject metadataBlocksJson = getMetadataBlocksJson(body);
+ if (metadataBlocksJson == null) {
+ return null;
+ }
+ JsonArray inputLevelsArray = metadataBlocksJson.getJsonArray("inputLevels");
+ return inputLevelsArray != null ? parseInputLevels(inputLevelsArray, dataverse) : null;
+ }
+
+ private List parseMetadataBlocks(String body) throws WrappedResponse {
+ JsonObject metadataBlocksJson = getMetadataBlocksJson(body);
+ if (metadataBlocksJson == null) {
+ return null;
+ }
+ JsonArray metadataBlocksArray = metadataBlocksJson.getJsonArray("metadataBlockNames");
+ return metadataBlocksArray != null ? parseNewDataverseMetadataBlocks(metadataBlocksArray) : null;
+ }
+
+ private List parseFacets(String body) throws WrappedResponse {
+ JsonObject metadataBlocksJson = getMetadataBlocksJson(body);
+ if (metadataBlocksJson == null) {
+ return null;
+ }
+ JsonArray facetsArray = metadataBlocksJson.getJsonArray("facetIds");
+ return facetsArray != null ? parseFacets(facetsArray) : null;
+ }
+
+ private JsonObject getMetadataBlocksJson(String body) {
+ JsonObject dataverseJson = JsonUtil.getJsonObject(body);
+ return dataverseJson.getJsonObject("metadataBlocks");
+ }
+
+ private Response handleWrappedResponse(WrappedResponse ww) {
+ String error = ConstraintViolationUtil.getErrorStringForConstraintViolations(ww.getCause());
+ if (!error.isEmpty()) {
+ logger.log(Level.INFO, error);
+ return ww.refineResponse(error);
+ }
+ return ww.getResponse();
+ }
+
+ private Response handleEJBException(EJBException ex, String action) {
+ Throwable cause = ex;
+ StringBuilder sb = new StringBuilder();
+ sb.append(action);
+ while (cause.getCause() != null) {
+ cause = cause.getCause();
+ if (cause instanceof ConstraintViolationException) {
+ sb.append(ConstraintViolationUtil.getErrorStringForConstraintViolations(cause));
+ }
+ }
+ logger.log(Level.SEVERE, sb.toString());
+ return error(Response.Status.INTERNAL_SERVER_ERROR, sb.toString());
+ }
+
private List parseNewDataverseMetadataBlocks(JsonArray metadataBlockNamesArray) throws WrappedResponse {
List selectedMetadataBlocks = new ArrayList<>();
for (JsonString metadataBlockName : metadataBlockNamesArray.getValuesAs(JsonString.class)) {
@@ -621,62 +701,22 @@ public Response deleteDataverse(@Context ContainerRequestContext crc, @PathParam
public Response updateAttribute(@Context ContainerRequestContext crc, @PathParam("identifier") String identifier,
@PathParam("attribute") String attribute, @QueryParam("value") String value) {
try {
- Dataverse collection = findDataverseOrDie(identifier);
- User user = getRequestUser(crc);
- DataverseRequest dvRequest = createDataverseRequest(user);
-
- // TODO: The cases below use hard coded strings, because we have no place for definitions of those!
- // They are taken from util.json.JsonParser / util.json.JsonPrinter. This shall be changed.
- // This also should be extended to more attributes, like the type, theme, contacts, some booleans, etc.
- switch (attribute) {
- case "alias":
- collection.setAlias(value);
- break;
- case "name":
- collection.setName(value);
- break;
- case "description":
- collection.setDescription(value);
- break;
- case "affiliation":
- collection.setAffiliation(value);
- break;
- /* commenting out the code from the draft pr #9462:
- case "versionPidsConduct":
- CollectionConduct conduct = CollectionConduct.findBy(value);
- if (conduct == null) {
- return badRequest("'" + value + "' is not one of [" +
- String.join(",", CollectionConduct.asList()) + "]");
- }
- collection.setDatasetVersionPidConduct(conduct);
- break;
- */
- case "filePIDsEnabled":
- if(!user.isSuperuser()) {
- return forbidden("You must be a superuser to change this setting");
- }
- if(!settingsService.isTrueForKey(SettingsServiceBean.Key.AllowEnablingFilePIDsPerCollection, false)) {
- return forbidden("Changing File PID policy per collection is not enabled on this server");
- }
- collection.setFilePIDsEnabled(parseBooleanOrDie(value));
- break;
- default:
- return badRequest("'" + attribute + "' is not a supported attribute");
- }
-
- // Off to persistence layer
- execCommand(new UpdateDataverseCommand(collection, null, null, dvRequest, null));
-
- // Also return modified collection to user
- return ok("Update successful", JsonPrinter.json(collection));
-
- // TODO: This is an anti-pattern, necessary due to this bean being an EJB, causing very noisy and unnecessary
- // logging by the EJB container for bubbling exceptions. (It would be handled by the error handlers.)
+ Dataverse dataverse = findDataverseOrDie(identifier);
+ Object formattedValue = formatAttributeValue(attribute, value);
+ dataverse = execCommand(new UpdateDataverseAttributeCommand(createDataverseRequest(getRequestUser(crc)), dataverse, attribute, formattedValue));
+ return ok("Update successful", JsonPrinter.json(dataverse));
} catch (WrappedResponse e) {
return e.getResponse();
}
}
+ private Object formatAttributeValue(String attribute, String value) throws WrappedResponse {
+ if (attribute.equals("filePIDsEnabled")) {
+ return parseBooleanOrDie(value);
+ }
+ return value;
+ }
+
@GET
@AuthRequired
@Path("{identifier}/inputLevels")
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
index d786aab35a8..633d420c527 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
@@ -343,10 +343,10 @@ public Response deleteFileInDataset(@Context ContainerRequestContext crc, @PathP
DataFile dataFile = findDataFileOrDie(fileIdOrPersistentId);
FileMetadata fileToDelete = dataFile.getLatestFileMetadata();
Dataset dataset = dataFile.getOwner();
- DatasetVersion v = dataset.getOrCreateEditVersion();
+ dataset.getOrCreateEditVersion();
deletePhysicalFile = !dataFile.isReleased();
- UpdateDatasetVersionCommand update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, Arrays.asList(fileToDelete), v);
+ UpdateDatasetVersionCommand update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, Arrays.asList(fileToDelete));
update_cmd.setValidateLenient(true);
try {
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Info.java b/src/main/java/edu/harvard/iq/dataverse/api/Info.java
index 257519677d3..2439c996816 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Info.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Info.java
@@ -12,12 +12,17 @@
import jakarta.ws.rs.Produces;
import org.apache.commons.io.IOUtils;
+import edu.harvard.iq.dataverse.export.ExportService;
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
+import io.gdcc.spi.export.Exporter;
+import io.gdcc.spi.export.ExportException;
+import io.gdcc.spi.export.XMLExporter;
import jakarta.ejb.EJB;
import jakarta.json.Json;
+import jakarta.json.JsonObjectBuilder;
import jakarta.json.JsonValue;
import jakarta.ws.rs.GET;
import jakarta.ws.rs.Path;
@@ -92,6 +97,32 @@ public Response getZipDownloadLimit() {
return ok(zipDownloadLimit);
}
+ @GET
+ @Path("exportFormats")
+ public Response getExportFormats() {
+ JsonObjectBuilder responseModel = Json.createObjectBuilder();
+ ExportService instance = ExportService.getInstance();
+ for (String[] labels : instance.getExportersLabels()) {
+ try {
+ Exporter exporter = instance.getExporter(labels[1]);
+ JsonObjectBuilder exporterObject = Json.createObjectBuilder().add("displayName", labels[0])
+ .add("mediaType", exporter.getMediaType()).add("isHarvestable", exporter.isHarvestable())
+ .add("isVisibleInUserInterface", exporter.isAvailableToUsers());
+ if (exporter instanceof XMLExporter xmlExporter) {
+ exporterObject.add("XMLNameSpace", xmlExporter.getXMLNameSpace())
+ .add("XMLSchemaLocation", xmlExporter.getXMLSchemaLocation())
+ .add("XMLSchemaVersion", xmlExporter.getXMLSchemaVersion());
+ }
+ responseModel.add(labels[1], exporterObject);
+ }
+ catch (ExportException ex){
+ logger.warning("Failed to get: " + labels[1]);
+ logger.warning(ex.getLocalizedMessage());
+ }
+ }
+ return ok(responseModel);
+ }
+
private Response getSettingResponseByKey(SettingsServiceBean.Key key) {
String setting = settingsService.getValueForKey(key);
if (setting != null) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
index 1f2f1039327..306b863c9e4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
@@ -19,6 +19,9 @@
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.Iterator;
import java.util.List;
@@ -152,10 +155,17 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE
// DataCite wants "doi=", not "doi:".
String authorityPlusIdentifier = persistentId.replaceFirst("doi:", "");
// Request max page size and then loop to handle multiple pages
- URL url = new URL(JvmSettings.DATACITE_REST_API_URL.lookup() +
+ URL url = null;
+ try {
+ url = new URI(JvmSettings.DATACITE_REST_API_URL.lookup(pidProvider.getId()) +
"/events?doi=" +
authorityPlusIdentifier +
- "&source=crossref&page[size]=1000");
+ "&source=crossref&page[size]=1000").toURL();
+ } catch (URISyntaxException e) {
+ //Nominally this means a config error/ bad DATACITE_REST_API_URL for this provider
+ logger.warning("Unable to create URL for " + persistentId + ", pidProvider " + pidProvider.getId());
+ return error(Status.INTERNAL_SERVER_ERROR, "Unable to create DataCite URL to retrieve citations.");
+ }
logger.fine("Retrieving Citations from " + url.toString());
boolean nextPage = true;
JsonArrayBuilder dataBuilder = Json.createArrayBuilder();
@@ -178,7 +188,12 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE
dataBuilder.add(iter.next());
}
if (links.containsKey("next")) {
- url = new URL(links.getString("next"));
+ try {
+ url = new URI(links.getString("next")).toURL();
+ } catch (URISyntaxException e) {
+ logger.warning("Unable to create URL from DataCite response: " + links.getString("next"));
+ return error(Status.INTERNAL_SERVER_ERROR, "Unable to retrieve all results from DataCite");
+ }
} else {
nextPage = false;
}
@@ -187,7 +202,7 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE
JsonArray allData = dataBuilder.build();
List datasetExternalCitations = datasetExternalCitationsService.parseCitations(allData);
/*
- * ToDo: If this is the only source of citations, we should remove all the existing ones for the dataset and repopuate them.
+ * ToDo: If this is the only source of citations, we should remove all the existing ones for the dataset and repopulate them.
* As is, this call doesn't remove old citations if there are now none (legacy issue if we decide to stop counting certain types of citation
* as we've done for 'hasPart').
* If there are some, this call individually checks each one and if a matching item exists, it removes it and adds it back. Faster and better to delete all and
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java b/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java
index 452e5df9f9a..f36c514859e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java
@@ -206,12 +206,13 @@ public Response getDatasetsTimeSeriest(@Context Request req, @Context UriInfo ur
return error(BAD_REQUEST, ia.getLocalizedMessage());
}
String metricName = "datasets";
- JsonArray jsonArray = MetricsUtil.stringToJsonArray(metricsSvc.returnUnexpiredCacheAllTime(metricName, null, d));
+ String validDataLocation = MetricsUtil.validateDataLocationStringType(dataLocation);
+ JsonArray jsonArray = MetricsUtil.stringToJsonArray(metricsSvc.returnUnexpiredCacheAllTime(metricName, validDataLocation, d));
if (null == jsonArray) { // run query and save
- jsonArray = metricsSvc.getDatasetsTimeSeries(uriInfo, dataLocation, d);
- metricsSvc.save(new Metric(metricName, null, null, d, jsonArray.toString()));
+ jsonArray = metricsSvc.getDatasetsTimeSeries(uriInfo, validDataLocation, d);
+ metricsSvc.save(new Metric(metricName, null, validDataLocation, d, jsonArray.toString()));
}
MediaType requestedType = getVariant(req, MediaType.valueOf(FileUtil.MIME_TYPE_CSV), MediaType.APPLICATION_JSON_TYPE);
if ((requestedType != null) && (requestedType.equals(MediaType.APPLICATION_JSON_TYPE))) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/SavedSearches.java b/src/main/java/edu/harvard/iq/dataverse/api/SavedSearches.java
index 33a11a2df23..e6519c9ff36 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/SavedSearches.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/SavedSearches.java
@@ -181,7 +181,7 @@ public Response delete(@PathParam("id") long doomedId, @QueryParam("unlink") boo
try {
wasDeleted = savedSearchSvc.delete(doomedId, unlink);
} catch (Exception e) {
- return error(INTERNAL_SERVER_ERROR, "Problem while trying to unlink links of saved search id " + doomedId);
+ return error(INTERNAL_SERVER_ERROR, "Problem while trying to unlink links of saved search id " + doomedId + ". Exception: " + e.getLocalizedMessage());
}
if (wasDeleted) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Search.java b/src/main/java/edu/harvard/iq/dataverse/api/Search.java
index 6b9fcb38305..f86f9f446fa 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Search.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Search.java
@@ -175,7 +175,7 @@ public Response search(
JsonArrayBuilder itemsArrayBuilder = Json.createArrayBuilder();
List solrSearchResults = solrQueryResponse.getSolrSearchResults();
for (SolrSearchResult solrSearchResult : solrSearchResults) {
- itemsArrayBuilder.add(solrSearchResult.json(showRelevance, showEntityIds, showApiUrls, metadataFields, getDatasetFileCount(solrSearchResult)));
+ itemsArrayBuilder.add(solrSearchResult.json(showRelevance, showEntityIds, showApiUrls, metadataFields));
}
JsonObjectBuilder spelling_alternatives = Json.createObjectBuilder();
@@ -229,15 +229,6 @@ public Response search(
}
}
- private Long getDatasetFileCount(SolrSearchResult solrSearchResult) {
- DvObject dvObject = solrSearchResult.getEntity();
- if (dvObject.isInstanceofDataset()) {
- DatasetVersion datasetVersion = ((Dataset) dvObject).getVersionFromId(solrSearchResult.getDatasetVersionId());
- return datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion);
- }
- return null;
- }
-
private User getUser(ContainerRequestContext crc) throws WrappedResponse {
User userToExecuteSearchAs = GuestUser.get();
try {
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Users.java b/src/main/java/edu/harvard/iq/dataverse/api/Users.java
index c1a7c95dbff..ecf7839e616 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Users.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Users.java
@@ -137,15 +137,20 @@ public Response deleteToken(@Context ContainerRequestContext crc) {
@Path("token")
@AuthRequired
@GET
- public Response getTokenExpirationDate() {
- ApiToken token = authSvc.findApiToken(getRequestApiKey());
-
- if (token == null) {
- return notFound("Token " + getRequestApiKey() + " not found.");
+ public Response getTokenExpirationDate(@Context ContainerRequestContext crc) {
+ try {
+ AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc);
+ ApiToken token = authSvc.findApiTokenByUser(user);
+
+ if (token == null) {
+ return notFound("Token not found.");
+ }
+
+ return ok(String.format("Token %s expires on %s", token.getTokenString(), token.getExpireTime()));
+
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
}
-
- return ok("Token " + getRequestApiKey() + " expires on " + token.getExpireTime());
-
}
@Path("token/recreate")
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/dto/DataverseDTO.java b/src/main/java/edu/harvard/iq/dataverse/api/dto/DataverseDTO.java
new file mode 100644
index 00000000000..4f2f1032c07
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/api/dto/DataverseDTO.java
@@ -0,0 +1,63 @@
+package edu.harvard.iq.dataverse.api.dto;
+
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.DataverseContact;
+
+import java.util.List;
+
+public class DataverseDTO {
+ private String alias;
+ private String name;
+ private String description;
+ private String affiliation;
+ private List dataverseContacts;
+ private Dataverse.DataverseType dataverseType;
+
+ public String getAlias() {
+ return alias;
+ }
+
+ public void setAlias(String alias) {
+ this.alias = alias;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+
+ public void setDescription(String description) {
+ this.description = description;
+ }
+
+ public String getAffiliation() {
+ return affiliation;
+ }
+
+ public void setAffiliation(String affiliation) {
+ this.affiliation = affiliation;
+ }
+
+ public List getDataverseContacts() {
+ return dataverseContacts;
+ }
+
+ public void setDataverseContacts(List dataverseContacts) {
+ this.dataverseContacts = dataverseContacts;
+ }
+
+ public Dataverse.DataverseType getDataverseType() {
+ return dataverseType;
+ }
+
+ public void setDataverseType(Dataverse.DataverseType dataverseType) {
+ this.dataverseType = dataverseType;
+ }
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java
index 85d4868605d..35d35316f73 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java
@@ -210,7 +210,7 @@ private void processDDI(ImportType importType, XMLStreamReader xmlr, DatasetDTO
// study description section. we'll use the one we found in
// the codeBook entry:
FieldDTO otherIdValue = FieldDTO.createPrimitiveFieldDTO("otherIdValue", codeBookLevelId);
- FieldDTO otherId = FieldDTO.createCompoundFieldDTO("otherId", otherIdValue);
+ FieldDTO otherId = FieldDTO.createMultipleCompoundFieldDTO("otherId", otherIdValue);
citationBlock.getFields().add(otherId);
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java
index d32a548c8bf..aa5b25e3967 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java
@@ -150,12 +150,16 @@ public DatasetDTO processXML( XMLStreamReader xmlr, ForeignMetadataFormatMapping
}
- // Helper method for importing harvested Dublin Core xml.
+ // Helper methods for importing harvested Dublin Core xml.
// Dublin Core is considered a mandatory, built in metadata format mapping.
// It is distributed as required content, in reference_data.sql.
// Note that arbitrary formatting tags are supported for the outer xml
// wrapper. -- L.A. 4.5
public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException {
+ return processOAIDCxml(DcXmlToParse, null, false);
+ }
+
+ public DatasetDTO processOAIDCxml(String DcXmlToParse, String oaiIdentifier, boolean preferSuppliedIdentifier) throws XMLStreamException {
// look up DC metadata mapping:
ForeignMetadataFormatMapping dublinCoreMapping = findFormatMappingByName(DCTERMS);
@@ -185,18 +189,37 @@ public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException
datasetDTO.getDatasetVersion().setVersionState(DatasetVersion.VersionState.RELEASED);
- // Our DC import handles the contents of the dc:identifier field
- // as an "other id". In the context of OAI harvesting, we expect
- // the identifier to be a global id, so we need to rearrange that:
+ // In some cases, the identifier that we want to use for the dataset is
+ // already supplied to the method explicitly. For example, in some
+ // harvesting cases we'll want to use the OAI identifier (the identifier
+ // from the section of the OAI record) for that purpose, without
+ // expecting to find a valid persistent id in the body of the DC record:
- String identifier = getOtherIdFromDTO(datasetDTO.getDatasetVersion());
- logger.fine("Imported identifier: "+identifier);
+ String globalIdentifier;
- String globalIdentifier = reassignIdentifierAsGlobalId(identifier, datasetDTO);
- logger.fine("Detected global identifier: "+globalIdentifier);
+ if (oaiIdentifier != null) {
+ logger.fine("Attempting to use " + oaiIdentifier + " as the persistentId of the imported dataset");
+
+ globalIdentifier = reassignIdentifierAsGlobalId(oaiIdentifier, datasetDTO);
+ } else {
+ // Our DC import handles the contents of the dc:identifier field
+ // as an "other id". Unless we are using an externally supplied
+ // global id, we will be using the first such "other id" that we
+ // can parse and recognize as the global id for the imported dataset
+ // (note that this is the default behavior during harvesting),
+ // so we need to reaassign it accordingly:
+ String identifier = selectIdentifier(datasetDTO.getDatasetVersion(), oaiIdentifier, preferSuppliedIdentifier);
+ logger.fine("Imported identifier: " + identifier);
+
+ globalIdentifier = reassignIdentifierAsGlobalId(identifier, datasetDTO);
+ logger.fine("Detected global identifier: " + globalIdentifier);
+ }
if (globalIdentifier == null) {
- throw new EJBException("Failed to find a global identifier in the OAI_DC XML record.");
+ String exceptionMsg = oaiIdentifier == null ?
+ "Failed to find a global identifier in the OAI_DC XML record." :
+ "Failed to parse the supplied identifier as a valid Persistent Id";
+ throw new EJBException(exceptionMsg);
}
return datasetDTO;
@@ -205,8 +228,17 @@ public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException
private void processXMLElement(XMLStreamReader xmlr, String currentPath, String openingTag, ForeignMetadataFormatMapping foreignFormatMapping, DatasetDTO datasetDTO) throws XMLStreamException {
logger.fine("entering processXMLElement; ("+currentPath+")");
-
- for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) {
+
+ while (xmlr.hasNext()) {
+
+ int event;
+ try {
+ event = xmlr.next();
+ } catch (XMLStreamException ex) {
+ logger.warning("Error occurred in the XML parsing : " + ex.getMessage());
+ continue; // Skip Undeclared namespace prefix and Unexpected close tag related to com.ctc.wstx.exc.WstxParsingException
+ }
+
if (event == XMLStreamConstants.START_ELEMENT) {
String currentElement = xmlr.getLocalName();
@@ -335,8 +367,20 @@ private FieldDTO makeDTO(DatasetFieldType dataverseFieldType, FieldDTO value, St
return value;
}
- private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) {
+ public String selectIdentifier(DatasetVersionDTO datasetVersionDTO, String suppliedIdentifier) {
+ return selectIdentifier(datasetVersionDTO, suppliedIdentifier, false);
+ }
+
+ private String selectIdentifier(DatasetVersionDTO datasetVersionDTO, String suppliedIdentifier, boolean preferSuppliedIdentifier) {
List otherIds = new ArrayList<>();
+
+ if (suppliedIdentifier != null && preferSuppliedIdentifier) {
+ // This supplied identifier (in practice, his is likely the OAI-PMH
+ // identifier from the section) will be our first
+ // choice candidate for the pid of the imported dataset:
+ otherIds.add(suppliedIdentifier);
+ }
+
for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) {
String key = entry.getKey();
MetadataBlockDTO value = entry.getValue();
@@ -354,6 +398,16 @@ private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) {
}
}
}
+
+ if (suppliedIdentifier != null && !preferSuppliedIdentifier) {
+ // Unless specifically instructed to prefer this extra identifier
+ // (in practice, this is likely the OAI-PMH identifier from the
+ // section), we will try to use it as the *last*
+ // possible candidate for the pid, so, adding it to the end of the
+ // list:
+ otherIds.add(suppliedIdentifier);
+ }
+
if (!otherIds.isEmpty()) {
// We prefer doi or hdl identifiers like "doi:10.7910/DVN/1HE30F"
for (String otherId : otherIds) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java
index d2bba56f884..7dc2aed799e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java
@@ -7,7 +7,6 @@
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
-import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetField;
import edu.harvard.iq.dataverse.DatasetFieldConstant;
@@ -20,6 +19,7 @@
import edu.harvard.iq.dataverse.DataverseContact;
import edu.harvard.iq.dataverse.DataverseServiceBean;
import edu.harvard.iq.dataverse.EjbDataverseEngine;
+import edu.harvard.iq.dataverse.GlobalId;
import edu.harvard.iq.dataverse.MetadataBlockServiceBean;
import edu.harvard.iq.dataverse.api.dto.DatasetDTO;
import edu.harvard.iq.dataverse.api.imports.ImportUtil.ImportType;
@@ -31,6 +31,7 @@
import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestedDatasetCommand;
import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand;
import edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestedDatasetCommand;
import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
import edu.harvard.iq.dataverse.search.IndexServiceBean;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
@@ -40,6 +41,7 @@
import edu.harvard.iq.dataverse.util.json.JsonUtil;
import edu.harvard.iq.dataverse.license.LicenseServiceBean;
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
+import edu.harvard.iq.dataverse.util.DatasetFieldUtil;
import java.io.File;
import java.io.FileOutputStream;
@@ -206,9 +208,15 @@ public JsonObjectBuilder handleFile(DataverseRequest dataverseRequest, Dataverse
}
@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
- public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, String harvestIdentifier, String metadataFormat, File metadataFile, Date oaiDateStamp, PrintWriter cleanupLog) throws ImportException, IOException {
+ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest,
+ HarvestingClient harvestingClient,
+ String harvestIdentifier,
+ String metadataFormat,
+ File metadataFile,
+ Date oaiDateStamp,
+ PrintWriter cleanupLog) throws ImportException, IOException {
if (harvestingClient == null || harvestingClient.getDataverse() == null) {
- throw new ImportException("importHarvestedDataset called wiht a null harvestingClient, or an invalid harvestingClient.");
+ throw new ImportException("importHarvestedDataset called with a null harvestingClient, or an invalid harvestingClient.");
}
Dataverse owner = harvestingClient.getDataverse();
Dataset importedDataset = null;
@@ -242,8 +250,8 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve
} else if ("dc".equalsIgnoreCase(metadataFormat) || "oai_dc".equals(metadataFormat)) {
logger.fine("importing DC "+metadataFile.getAbsolutePath());
try {
- String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath()));
- dsDTO = importGenericService.processOAIDCxml(xmlToParse);
+ String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath()));
+ dsDTO = importGenericService.processOAIDCxml(xmlToParse, harvestIdentifier, harvestingClient.isUseOaiIdentifiersAsPids());
} catch (IOException | XMLStreamException e) {
throw new ImportException("Failed to process Dublin Core XML record: "+ e.getClass() + " (" + e.getMessage() + ")");
}
@@ -268,116 +276,121 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve
}
JsonObject obj = JsonUtil.getJsonObject(json);
- //and call parse Json to read it into a dataset
+
+ String protocol = obj.getString("protocol", null);
+ String authority = obj.getString("authority", null);
+ String identifier = obj.getString("identifier",null);
+
+ GlobalId globalId;
+
+ // A Global ID is required:
+ // (meaning, we will fail with an exception if the imports above have
+ // not managed to find an acceptable global identifier in the harvested
+ // metadata)
+
+ try {
+ globalId = PidUtil.parseAsGlobalID(protocol, authority, identifier);
+ } catch (IllegalArgumentException iax) {
+ throw new ImportException("The harvested metadata record with the OAI server identifier " + harvestIdentifier + " does not contain a global identifier this Dataverse can parse, skipping.");
+ }
+
+ if (globalId == null) {
+ throw new ImportException("The harvested metadata record with the OAI server identifier " + harvestIdentifier + " does not contain a global identifier this Dataverse recognizes, skipping.");
+ }
+
+ String globalIdString = globalId.asString();
+
+ if (StringUtils.isEmpty(globalIdString)) {
+ // @todo this check may not be necessary, now that there's a null check above
+ throw new ImportException("The harvested metadata record with the OAI server identifier " + harvestIdentifier + " does not contain a global identifier this Dataverse recognizes, skipping.");
+ }
+
+ DatasetVersion harvestedVersion;
+
+ Dataset existingDataset = datasetService.findByGlobalId(globalIdString);
+
try {
+ Dataset harvestedDataset;
+
JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService, licenseService, datasetTypeService, harvestingClient);
parser.setLenient(true);
- Dataset ds = parser.parseDataset(obj);
- // For ImportType.NEW, if the metadata contains a global identifier, and it's not a protocol
- // we support, it should be rejected.
- // (TODO: ! - add some way of keeping track of supported protocols!)
- //if (ds.getGlobalId() != null && !ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, ""))) {
- // throw new ImportException("Could not register id " + ds.getGlobalId() + ", protocol not supported");
- //}
- ds.setOwner(owner);
- ds.getLatestVersion().setDatasetFields(ds.getLatestVersion().initDatasetFields());
+ if (existingDataset == null) {
+ // Creating a new dataset from scratch:
+
+ harvestedDataset = parser.parseDataset(obj);
- if (ds.getVersions().get(0).getReleaseTime() == null) {
- ds.getVersions().get(0).setReleaseTime(oaiDateStamp);
- }
-
- // Check data against required contraints
- List> violations = ds.getVersions().get(0).validateRequired();
- if (!violations.isEmpty()) {
- // For migration and harvest, add NA for missing required values
- for (ConstraintViolation v : violations) {
- DatasetField f = v.getRootBean();
- f.setSingleValue(DatasetField.NA_VALUE);
+ harvestedDataset.setHarvestedFrom(harvestingClient);
+ harvestedDataset.setHarvestIdentifier(harvestIdentifier);
+
+ harvestedVersion = harvestedDataset.getVersions().get(0);
+ } else {
+ // We already have a dataset with this id in the database.
+ // Let's check a few things before we go any further with it:
+
+ // If this dataset already exists IN ANOTHER COLLECTION
+ // we are just going to skip it!
+ if (existingDataset.getOwner() != null && !owner.getId().equals(existingDataset.getOwner().getId())) {
+ throw new ImportException("The dataset with the global id " + globalIdString + " already exists, in the dataverse " + existingDataset.getOwner().getAlias() + ", skipping.");
}
- }
-
- // Check data against validation constraints
- // If we are migrating and "scrub migration data" is true we attempt to fix invalid data
- // if the fix fails stop processing of this file by throwing exception
- Set invalidViolations = ds.getVersions().get(0).validate();
- ValidatorFactory factory = Validation.buildDefaultValidatorFactory();
- Validator validator = factory.getValidator();
- if (!invalidViolations.isEmpty()) {
- for (ConstraintViolation v : invalidViolations) {
- DatasetFieldValue f = v.getRootBean();
- boolean fixed = false;
- boolean converted = false;
- // TODO: Is this scrubbing something we want to continue doing?
- if (settingsService.isTrueForKey(SettingsServiceBean.Key.ScrubMigrationData, false)) {
- fixed = processMigrationValidationError(f, cleanupLog, metadataFile.getName());
- converted = true;
- if (fixed) {
- Set> scrubbedViolations = validator.validate(f);
- if (!scrubbedViolations.isEmpty()) {
- fixed = false;
- }
- }
- }
- if (!fixed) {
- String msg = "Data modified - File: " + metadataFile.getName() + "; Field: " + f.getDatasetField().getDatasetFieldType().getDisplayName() + "; "
- + "Invalid value: '" + f.getValue() + "'" + " Converted Value:'" + DatasetField.NA_VALUE + "'";
- cleanupLog.println(msg);
- f.setValue(DatasetField.NA_VALUE);
-
- }
+ // And if we already have a dataset with this same global id at
+ // this Dataverse instance, but it is a LOCAL dataset (can happen!),
+ // we're going to skip it also:
+ if (!existingDataset.isHarvested()) {
+ throw new ImportException("A LOCAL dataset with the global id " + globalIdString + " already exists in this dataverse; skipping.");
}
+ // For harvested datasets, there should always only be one version.
+ if (existingDataset.getVersions().size() != 1) {
+ throw new ImportException("Error importing Harvested Dataset, existing dataset has " + existingDataset.getVersions().size() + " versions");
+ }
+
+ // We will attempt to import the new version, and replace the
+ // current, already existing version with it.
+ harvestedVersion = parser.parseDatasetVersion(obj.getJsonObject("datasetVersion"));
+
+ // For the purposes of validation, the version needs to be attached
+ // to a non-null dataset. We will create a throwaway temporary
+ // dataset for this:
+ harvestedDataset = createTemporaryHarvestedDataset(harvestedVersion);
}
+
+ harvestedDataset.setOwner(owner);
- // A Global ID is required, in order for us to be able to harvest and import
- // this dataset:
- if (StringUtils.isEmpty(ds.getGlobalId().asString())) {
- throw new ImportException("The harvested metadata record with the OAI server identifier "+harvestIdentifier+" does not contain a global unique identifier that we could recognize, skipping.");
- }
-
- ds.setHarvestedFrom(harvestingClient);
- ds.setHarvestIdentifier(harvestIdentifier);
+ // Either a full new import, or an update of an existing harvested
+ // Dataset, perform some cleanup on the new version imported from the
+ // parsed metadata:
- Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId().asString());
+ harvestedVersion.setDatasetFields(harvestedVersion.initDatasetFields());
- if (existingDs != null) {
- // If this dataset already exists IN ANOTHER DATAVERSE
- // we are just going to skip it!
- if (existingDs.getOwner() != null && !owner.getId().equals(existingDs.getOwner().getId())) {
- throw new ImportException("The dataset with the global id "+ds.getGlobalId().asString()+" already exists, in the dataverse "+existingDs.getOwner().getAlias()+", skipping.");
- }
- // And if we already have a dataset with this same id, in this same
- // dataverse, but it is LOCAL dataset (can happen!), we're going to
- // skip it also:
- if (!existingDs.isHarvested()) {
- throw new ImportException("A LOCAL dataset with the global id "+ds.getGlobalId().asString()+" already exists in this dataverse; skipping.");
- }
- // For harvested datasets, there should always only be one version.
- // We will replace the current version with the imported version.
- if (existingDs.getVersions().size() != 1) {
- throw new ImportException("Error importing Harvested Dataset, existing dataset has " + existingDs.getVersions().size() + " versions");
- }
- // Purge all the SOLR documents associated with this client from the
- // index server:
- indexService.deleteHarvestedDocuments(existingDs);
- // files from harvested datasets are removed unceremoniously,
- // directly in the database. no need to bother calling the
- // DeleteFileCommand on them.
- for (DataFile harvestedFile : existingDs.getFiles()) {
- DataFile merged = em.merge(harvestedFile);
- em.remove(merged);
- harvestedFile = null;
- }
- // TODO:
- // Verify what happens with the indexed files in SOLR?
- // are they going to be overwritten by the reindexing of the dataset?
- existingDs.setFiles(null);
- Dataset merged = em.merge(existingDs);
- // harvested datasets don't have physical files - so no need to worry about that.
- engineSvc.submit(new DestroyDatasetCommand(merged, dataverseRequest));
+ if (harvestedVersion.getReleaseTime() == null) {
+ harvestedVersion.setReleaseTime(oaiDateStamp);
}
+
+ // Check data against validation constraints.
+ // Make an attempt to sanitize any invalid fields encountered -
+ // missing required fields or invalid values, by filling the values
+ // with NAs.
+
+ boolean sanitized = validateAndSanitizeVersionMetadata(harvestedVersion, cleanupLog);
+
+ // Note: this sanitizing approach, of replacing invalid values with
+ // "NA" does not work with certain fields. For example, using it to
+ // populate a GeoBox coordinate value will result in an invalid
+ // field. So we will attempt to re-validate the santized version.
+ // This time around, it will throw an exception if still invalid, so
+ // that we'll stop before proceeding any further:
- importedDataset = engineSvc.submit(new CreateHarvestedDatasetCommand(ds, dataverseRequest));
+ if (sanitized) {
+ validateVersionMetadata(harvestedVersion, cleanupLog);
+ }
+
+ DatasetFieldUtil.tidyUpFields(harvestedVersion.getDatasetFields(), true);
+
+ if (existingDataset != null) {
+ importedDataset = engineSvc.submit(new UpdateHarvestedDatasetCommand(existingDataset, harvestedVersion, dataverseRequest));
+ } else {
+ importedDataset = engineSvc.submit(new CreateHarvestedDatasetCommand(harvestedDataset, dataverseRequest));
+ }
} catch (JsonParseException | ImportException | CommandException ex) {
logger.fine("Failed to import harvested dataset: " + ex.getClass() + ": " + ex.getMessage());
@@ -439,7 +452,7 @@ public JsonObjectBuilder doImport(DataverseRequest dataverseRequest, Dataverse o
ds.setOwner(owner);
ds.getLatestVersion().setDatasetFields(ds.getLatestVersion().initDatasetFields());
- // Check data against required contraints
+ // Check data against required constraints
List> violations = ds.getVersions().get(0).validateRequired();
if (!violations.isEmpty()) {
if ( importType.equals(ImportType.HARVEST) ) {
@@ -696,6 +709,104 @@ private String convertInvalidDateString(String inString){
return null;
}
+ /**
+ * A shortcut method for validating AND attempting to sanitize a DatasetVersion
+ * @param version
+ * @param cleanupLog - any invalid values and their replacements are logged there
+ * @return true if any invalid values were encountered and sanitized
+ * @throws ImportException (although it should never happen in this mode)
+ */
+ private boolean validateAndSanitizeVersionMetadata(DatasetVersion version, PrintWriter cleanupLog) throws ImportException {
+ return validateVersionMetadata(version, true, cleanupLog);
+ }
+
+ /**
+ * A shortcut method for validating a DatasetVersion; will throw an exception
+ * if invalid, without attempting to sanitize the invalid values.
+ * @param version
+ * @param log - will log the invalid fields encountered there
+ * @throws ImportException
+ */
+ private void validateVersionMetadata(DatasetVersion version, PrintWriter log) throws ImportException {
+ validateVersionMetadata(version, false, log);
+ }
+
+ /**
+ * Validate the metadata fields of a newly-created version, and depending on
+ * the "sanitize" flag supplied, may or may not attempt to sanitize the supplied
+ * values by replacing them with "NA"s.
+ * @param version
+ * @param sanitize - boolean indicating whether to attempt to fix invalid values
+ * @param cleanupLog - to log any invalid values encountered will be logged
+ * @return - true if any invalid values have been replaced
+ * @throws ImportException
+ */
+ private boolean validateVersionMetadata(DatasetVersion version, boolean sanitize, PrintWriter cleanupLog) throws ImportException {
+ boolean fixed = false;
+ Set invalidViolations = version.validate();
+ if (!invalidViolations.isEmpty()) {
+ for (ConstraintViolation v : invalidViolations) {
+ Object invalid = v.getRootBean();
+ String msg = "";
+ if (invalid instanceof DatasetField) {
+ DatasetField f = (DatasetField) invalid;
+
+ msg += "Missing required field: " + f.getDatasetFieldType().getDisplayName() + ";";
+ if (sanitize) {
+ msg += " populated with '" + DatasetField.NA_VALUE + "'";
+ f.setSingleValue(DatasetField.NA_VALUE);
+ fixed = true;
+ }
+ } else if (invalid instanceof DatasetFieldValue) {
+ DatasetFieldValue fv = (DatasetFieldValue) invalid;
+
+ msg += "Invalid metadata field: " + fv.getDatasetField().getDatasetFieldType().getDisplayName() + "; "
+ + "Invalid value: '" + fv.getValue() + "'";
+ if (sanitize) {
+ msg += ", replaced with '" + DatasetField.NA_VALUE + "'";
+ fv.setValue(DatasetField.NA_VALUE);
+ fixed = true;
+ }
+ } else {
+ // DatasetVersion.validate() can also produce constraint violations
+ // in TermsOfUse and FileMetadata classes.
+ // We do not make any attempt to sanitize those.
+ if (invalid != null) {
+ msg += "Invalid " + invalid.getClass().getName() + ": " + v.getMessage();
+ }
+ }
+ cleanupLog.println(msg);
+
+ // Note: "NA" does not work with certain fields. For example,
+ // using it to populate a GeoBox coordinate value is going
+ // to result in an invalid field. So we'll need to validate the
+ // version again after the first, sanitizing pass and see if it
+ // helped or not.
+ }
+ if (!sanitize) {
+ throw new ImportException("Version was still failing validation after the first attempt to sanitize the invalid values.");
+ }
+ }
+ return fixed;
+ }
+
+ /**
+ * Helper method that creates a throwaway Harvested Dataset to temporarily
+ * attach the newly-harvested version to. We need this when, instead of
+ * importing a brand-new harvested dataset from scratch, we are planning to
+ * attempt to update an already existing dataset harvested from the same
+ * archival location.
+ * @param harvestedVersion - a newly created Version imported from harvested metadata
+ * @return - a temporary dataset to which the new version has been attached
+ */
+ private Dataset createTemporaryHarvestedDataset(DatasetVersion harvestedVersion) {
+ Dataset tempDataset = new Dataset();
+ harvestedVersion.setDataset(tempDataset);
+ tempDataset.setVersions(new ArrayList<>(1));
+ tempDataset.getVersions().add(harvestedVersion);
+
+ return tempDataset;
+ }
private static class MyCustomFormatter extends Formatter {
diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/AbstractOAuth2AuthenticationProvider.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/AbstractOAuth2AuthenticationProvider.java
index 7fd7bf3e885..a6b7c1b9d49 100644
--- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/AbstractOAuth2AuthenticationProvider.java
+++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/AbstractOAuth2AuthenticationProvider.java
@@ -30,7 +30,7 @@
*/
public abstract class AbstractOAuth2AuthenticationProvider implements AuthenticationProvider {
- final static Logger logger = Logger.getLogger(AbstractOAuth2AuthenticationProvider.class.getName());
+ static final Logger logger = Logger.getLogger(AbstractOAuth2AuthenticationProvider.class.getName());
protected static class ParsedUserResponse {
public final AuthenticatedUserDisplayInfo displayInfo;
diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java
index 089ca40e164..323c78ab47a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java
+++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java
@@ -49,7 +49,7 @@
*/
public class OrcidOAuth2AP extends AbstractOAuth2AuthenticationProvider {
- final static Logger logger = Logger.getLogger(OrcidOAuth2AP.class.getName());
+ static final Logger logger = Logger.getLogger(OrcidOAuth2AP.class.getName());
public static final String PROVIDER_ID_PRODUCTION = "orcid";
public static final String PROVIDER_ID_SANDBOX = "orcid-sandbox";
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 3bf2107e52b..d0da66c38e0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -215,7 +215,7 @@ public long retrieveSizeFromMedia() {
JsonArray dataArray = responseJson.getJsonArray("DATA");
if (dataArray != null && dataArray.size() != 0) {
//File found
- return (long) responseJson.getJsonArray("DATA").getJsonObject(0).getJsonNumber("size").longValueExact();
+ return (long) dataArray.getJsonObject(0).getJsonNumber("size").longValueExact();
}
} else {
logger.warning("Response from " + get.getURI().toString() + " was "
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index d2fdec7b323..5b9e496281f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -753,6 +753,12 @@ public Path getFileSystemPath() throws UnsupportedDataAccessOperationException {
@Override
public boolean exists() {
+ try {
+ key = getMainFileKey();
+ } catch (IOException e) {
+ logger.warning("Caught an IOException in S3AccessIO.exists(): " + e.getMessage());
+ return false;
+ }
String destinationKey = null;
if (dvObject instanceof DataFile) {
destinationKey = key;
diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
index a470f08f736..6b98848021c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
@@ -136,8 +136,6 @@ public class AddReplaceFileHelper{
private String newFileName; // step 30
private String newFileContentType; // step 30
private String newStorageIdentifier; // step 30
- private String newCheckSum; // step 30
- private ChecksumType newCheckSumType; //step 30
// -- Optional
private DataFile fileToReplace; // step 25
@@ -146,6 +144,7 @@ public class AddReplaceFileHelper{
private DatasetVersion clone;
List initialFileList;
List finalFileList;
+ private boolean trustSuppliedFileSizes;
// -----------------------------------
// Ingested files
@@ -610,15 +609,9 @@ private boolean runAddReplacePhase1(Dataset owner,
return false;
}
- if(optionalFileParams != null) {
- if(optionalFileParams.hasCheckSum()) {
- newCheckSum = optionalFileParams.getCheckSum();
- newCheckSumType = optionalFileParams.getCheckSumType();
- }
- }
msgt("step_030_createNewFilesViaIngest");
- if (!this.step_030_createNewFilesViaIngest()){
+ if (!this.step_030_createNewFilesViaIngest(optionalFileParams)){
return false;
}
@@ -1191,7 +1184,7 @@ private boolean step_007_auto_isReplacementInLatestVersion(DataFile existingFile
}
- private boolean step_030_createNewFilesViaIngest(){
+ private boolean step_030_createNewFilesViaIngest(OptionalFileParams optionalFileParams){
if (this.hasError()){
return false;
@@ -1203,21 +1196,28 @@ private boolean step_030_createNewFilesViaIngest(){
//Don't repeatedly update the clone (losing changes) in multifile case
clone = workingVersion.cloneDatasetVersion();
}
+
+ Long suppliedFileSize = null;
+ String newCheckSum = null;
+ ChecksumType newCheckSumType = null;
+
+
+ if (optionalFileParams != null) {
+ if (optionalFileParams.hasCheckSum()) {
+ newCheckSum = optionalFileParams.getCheckSum();
+ newCheckSumType = optionalFileParams.getCheckSumType();
+ }
+ if (trustSuppliedFileSizes && optionalFileParams.hasFileSize()) {
+ suppliedFileSize = optionalFileParams.getFileSize();
+ }
+ }
+
try {
- /*CreateDataFileResult result = FileUtil.createDataFiles(workingVersion,
- this.newFileInputStream,
- this.newFileName,
- this.newFileContentType,
- this.newStorageIdentifier,
- this.newCheckSum,
- this.newCheckSumType,
- this.systemConfig);*/
-
UploadSessionQuotaLimit quota = null;
if (systemConfig.isStorageQuotasEnforced()) {
quota = fileService.getUploadSessionQuotaLimit(dataset);
}
- Command cmd = new CreateNewDataFilesCommand(dvRequest, workingVersion, newFileInputStream, newFileName, newFileContentType, newStorageIdentifier, quota, newCheckSum, newCheckSumType);
+ Command cmd = new CreateNewDataFilesCommand(dvRequest, workingVersion, newFileInputStream, newFileName, newFileContentType, newStorageIdentifier, quota, newCheckSum, newCheckSumType, suppliedFileSize);
CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd);
initialFileList = createDataFilesResult.getDataFiles();
@@ -2033,9 +2033,15 @@ public void setDuplicateFileWarning(String duplicateFileWarning) {
* @param jsonData - an array of jsonData entries (one per file) using the single add file jsonData format
* @param dataset
* @param authUser
+ * @param trustSuppliedSizes - whether to accept the fileSize values passed
+ * in jsonData (we don't want to trust the users of the S3 direct
+ * upload API with that information - we will verify the status of
+ * the files in the S3 bucket and confirm the sizes in the process.
+ * we do want GlobusService to be able to pass the file sizes, since
+ * they are obtained and verified via a Globus API lookup).
* @return
*/
- public Response addFiles(String jsonData, Dataset dataset, User authUser) {
+ public Response addFiles(String jsonData, Dataset dataset, User authUser, boolean trustSuppliedFileSizes) {
msgt("(addFilesToDataset) jsonData: " + jsonData.toString());
JsonArrayBuilder jarr = Json.createArrayBuilder();
@@ -2044,6 +2050,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
int totalNumberofFiles = 0;
int successNumberofFiles = 0;
+ this.trustSuppliedFileSizes = trustSuppliedFileSizes;
// -----------------------------------------------------------
// Read jsonData and Parse files information from jsondata :
// -----------------------------------------------------------
@@ -2176,6 +2183,10 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
.add("data", Json.createObjectBuilder().add("Files", jarr).add("Result", result)).build() ).build();
}
+ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
+ return addFiles(jsonData, dataset, authUser, false);
+ }
+
/**
* Replace multiple files with prepositioned replacements as listed in the
* jsonData. Works with direct upload, Globus, and other out-of-band methods.
diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java
index 959dbc4e262..54844160163 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java
@@ -39,6 +39,12 @@
* - Provenance related information
*
* @author rmp553
+ * @todo (?) We may want to consider renaming this class to DataFileParams or
+ * DataFileInfo... it was originally created to encode some bits of info -
+ * the file "tags" specifically, that didn't fit in elsewhere in the normal
+ * workflow; but it's been expanded to cover pretty much everything else associated
+ * with DataFiles and it's not really "optional" anymore when, for example, used
+ * in the direct upload workflow. (?)
*/
public class OptionalFileParams {
@@ -76,6 +82,8 @@ public class OptionalFileParams {
public static final String MIME_TYPE_ATTR_NAME = "mimeType";
private String checkSumValue;
private ChecksumType checkSumType;
+ public static final String FILE_SIZE_ATTR_NAME = "fileSize";
+ private Long fileSize;
public static final String LEGACY_CHECKSUM_ATTR_NAME = "md5Hash";
public static final String CHECKSUM_OBJECT_NAME = "checksum";
public static final String CHECKSUM_OBJECT_TYPE = "@type";
@@ -268,6 +276,18 @@ public String getCheckSum() {
public ChecksumType getCheckSumType() {
return checkSumType;
}
+
+ public boolean hasFileSize() {
+ return fileSize != null;
+ }
+
+ public Long getFileSize() {
+ return fileSize;
+ }
+
+ public void setFileSize(long fileSize) {
+ this.fileSize = fileSize;
+ }
/**
* Set tags
@@ -416,7 +436,13 @@ else if ((jsonObj.has(CHECKSUM_OBJECT_NAME)) && (!jsonObj.get(CHECKSUM_OBJECT_NA
this.checkSumType = ChecksumType.fromString(((JsonObject) jsonObj.get(CHECKSUM_OBJECT_NAME)).get(CHECKSUM_OBJECT_TYPE).getAsString());
}
-
+ // -------------------------------
+ // get file size as a Long, if supplied
+ // -------------------------------
+ if ((jsonObj.has(FILE_SIZE_ATTR_NAME)) && (!jsonObj.get(FILE_SIZE_ATTR_NAME).isJsonNull())){
+
+ this.fileSize = jsonObj.get(FILE_SIZE_ATTR_NAME).getAsLong();
+ }
// -------------------------------
// get tags
// -------------------------------
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/DataverseRequest.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/DataverseRequest.java
index d792b616a0c..4d3ec2842a1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/DataverseRequest.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/DataverseRequest.java
@@ -26,9 +26,9 @@ public class DataverseRequest {
private final String invocationId;
private final HttpServletRequest httpServletRequest;
- private final static String undefined = "0.0.0.0";
+ private static final String undefined = "0.0.0.0";
- private final static String MDKEY_PREFIX="mdkey.";
+ private static final String MDKEY_PREFIX="mdkey.";
private static final Logger logger = Logger.getLogger(DataverseRequest.class.getName());
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java
index db9dc142506..b36a638956f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java
@@ -13,8 +13,10 @@
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.pidproviders.PidProvider;
import static edu.harvard.iq.dataverse.util.StringUtil.isEmpty;
+import java.io.IOException;
import java.util.Objects;
import java.util.logging.Logger;
+import org.apache.solr.client.solrj.SolrServerException;
/**;
* An abstract base class for commands that creates {@link Dataset}s.
@@ -148,9 +150,19 @@ public Dataset execute(CommandContext ctxt) throws CommandException {
//Use for code that requires database ids
postDBFlush(theDataset, ctxt);
-
- ctxt.index().asyncIndexDataset(theDataset, true);
-
+
+ if (harvested) {
+ try {
+ ctxt.index().indexDataset(theDataset, true);
+ } catch (SolrServerException | IOException solrEx) {
+ logger.warning("Failed to index harvested dataset. " + solrEx.getMessage());
+ }
+ } else {
+ // The asynchronous version does not throw any exceptions,
+ // logging them internally instead.
+ ctxt.index().asyncIndexDataset(theDataset, true);
+ }
+
return theDataset;
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractWriteDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractWriteDataverseCommand.java
new file mode 100644
index 00000000000..91f3a5b823c
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractWriteDataverseCommand.java
@@ -0,0 +1,106 @@
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+import edu.harvard.iq.dataverse.*;
+import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An abstract base class for commands that perform write operations on {@link Dataverse}s.
+ */
+abstract class AbstractWriteDataverseCommand extends AbstractCommand {
+
+ protected Dataverse dataverse;
+ private final List inputLevels;
+ private final List facets;
+ protected final List metadataBlocks;
+ private final boolean resetRelationsOnNullValues;
+
+ public AbstractWriteDataverseCommand(Dataverse dataverse,
+ Dataverse affectedDataverse,
+ DataverseRequest request,
+ List facets,
+ List inputLevels,
+ List metadataBlocks,
+ boolean resetRelationsOnNullValues) {
+ super(request, affectedDataverse);
+ this.dataverse = dataverse;
+ if (facets != null) {
+ this.facets = new ArrayList<>(facets);
+ } else {
+ this.facets = null;
+ }
+ if (inputLevels != null) {
+ this.inputLevels = new ArrayList<>(inputLevels);
+ } else {
+ this.inputLevels = null;
+ }
+ if (metadataBlocks != null) {
+ this.metadataBlocks = new ArrayList<>(metadataBlocks);
+ } else {
+ this.metadataBlocks = null;
+ }
+ this.resetRelationsOnNullValues = resetRelationsOnNullValues;
+ }
+
+ @Override
+ public Dataverse execute(CommandContext ctxt) throws CommandException {
+ dataverse = innerExecute(ctxt);
+
+ processMetadataBlocks();
+ processFacets(ctxt);
+ processInputLevels(ctxt);
+
+ return ctxt.dataverses().save(dataverse);
+ }
+
+ private void processMetadataBlocks() {
+ if (metadataBlocks != null && !metadataBlocks.isEmpty()) {
+ dataverse.setMetadataBlockRoot(true);
+ dataverse.setMetadataBlocks(metadataBlocks);
+ } else if (resetRelationsOnNullValues) {
+ dataverse.setMetadataBlockRoot(false);
+ dataverse.clearMetadataBlocks();
+ }
+ }
+
+ private void processFacets(CommandContext ctxt) {
+ if (facets != null) {
+ ctxt.facets().deleteFacetsFor(dataverse);
+ dataverse.setDataverseFacets(new ArrayList<>());
+
+ if (!facets.isEmpty()) {
+ dataverse.setFacetRoot(true);
+ }
+
+ for (int i = 0; i < facets.size(); i++) {
+ ctxt.facets().create(i, facets.get(i), dataverse);
+ }
+ } else if (resetRelationsOnNullValues) {
+ ctxt.facets().deleteFacetsFor(dataverse);
+ dataverse.setFacetRoot(false);
+ }
+ }
+
+ private void processInputLevels(CommandContext ctxt) {
+ if (inputLevels != null) {
+ if (!inputLevels.isEmpty()) {
+ dataverse.addInputLevelsMetadataBlocksIfNotPresent(inputLevels);
+ }
+ ctxt.fieldTypeInputLevels().deleteFacetsFor(dataverse);
+ inputLevels.forEach(inputLevel -> {
+ inputLevel.setDataverse(dataverse);
+ ctxt.fieldTypeInputLevels().create(inputLevel);
+ });
+ } else if (resetRelationsOnNullValues) {
+ ctxt.fieldTypeInputLevels().deleteFacetsFor(dataverse);
+ }
+ }
+
+ abstract protected Dataverse innerExecute(CommandContext ctxt) throws IllegalCommandException;
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java
index 489b36e7cef..3728f3ee6ce 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java
@@ -6,11 +6,9 @@
import edu.harvard.iq.dataverse.authorization.groups.Group;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.authorization.users.User;
-import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
import edu.harvard.iq.dataverse.engine.command.CommandContext;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
-import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
@@ -27,48 +25,26 @@
* @author michael
*/
@RequiredPermissions(Permission.AddDataverse)
-public class CreateDataverseCommand extends AbstractCommand {
-
- private final Dataverse created;
- private final List inputLevelList;
- private final List facetList;
- private final List metadataBlocks;
+public class CreateDataverseCommand extends AbstractWriteDataverseCommand {
public CreateDataverseCommand(Dataverse created,
- DataverseRequest aRequest,
- List facetList,
- List inputLevelList) {
- this(created, aRequest, facetList, inputLevelList, null);
+ DataverseRequest request,
+ List facets,
+ List inputLevels) {
+ this(created, request, facets, inputLevels, null);
}
public CreateDataverseCommand(Dataverse created,
- DataverseRequest aRequest,
- List facetList,
- List inputLevelList,
+ DataverseRequest request,
+ List facets,
+ List inputLevels,
List metadataBlocks) {
- super(aRequest, created.getOwner());
- this.created = created;
- if (facetList != null) {
- this.facetList = new ArrayList<>(facetList);
- } else {
- this.facetList = null;
- }
- if (inputLevelList != null) {
- this.inputLevelList = new ArrayList<>(inputLevelList);
- } else {
- this.inputLevelList = null;
- }
- if (metadataBlocks != null) {
- this.metadataBlocks = new ArrayList<>(metadataBlocks);
- } else {
- this.metadataBlocks = null;
- }
+ super(created, created.getOwner(), request, facets, inputLevels, metadataBlocks, false);
}
@Override
- public Dataverse execute(CommandContext ctxt) throws CommandException {
-
- Dataverse owner = created.getOwner();
+ protected Dataverse innerExecute(CommandContext ctxt) throws IllegalCommandException {
+ Dataverse owner = dataverse.getOwner();
if (owner == null) {
if (ctxt.dataverses().isRootDataverseExists()) {
throw new IllegalCommandException("Root Dataverse already exists. Cannot create another one", this);
@@ -76,44 +52,44 @@ public Dataverse execute(CommandContext ctxt) throws CommandException {
}
if (metadataBlocks != null && !metadataBlocks.isEmpty()) {
- created.setMetadataBlockRoot(true);
- created.setMetadataBlocks(metadataBlocks);
+ dataverse.setMetadataBlockRoot(true);
+ dataverse.setMetadataBlocks(metadataBlocks);
}
- if (created.getCreateDate() == null) {
- created.setCreateDate(new Timestamp(new Date().getTime()));
+ if (dataverse.getCreateDate() == null) {
+ dataverse.setCreateDate(new Timestamp(new Date().getTime()));
}
- if (created.getCreator() == null) {
+ if (dataverse.getCreator() == null) {
final User user = getRequest().getUser();
if (user.isAuthenticated()) {
- created.setCreator((AuthenticatedUser) user);
+ dataverse.setCreator((AuthenticatedUser) user);
} else {
throw new IllegalCommandException("Guest users cannot create a Dataverse.", this);
}
}
- if (created.getDataverseType() == null) {
- created.setDataverseType(Dataverse.DataverseType.UNCATEGORIZED);
+ if (dataverse.getDataverseType() == null) {
+ dataverse.setDataverseType(Dataverse.DataverseType.UNCATEGORIZED);
}
- if (created.getDefaultContributorRole() == null) {
- created.setDefaultContributorRole(ctxt.roles().findBuiltinRoleByAlias(DataverseRole.EDITOR));
+ if (dataverse.getDefaultContributorRole() == null) {
+ dataverse.setDefaultContributorRole(ctxt.roles().findBuiltinRoleByAlias(DataverseRole.EDITOR));
}
// @todo for now we are saying all dataverses are permission root
- created.setPermissionRoot(true);
+ dataverse.setPermissionRoot(true);
- if (ctxt.dataverses().findByAlias(created.getAlias()) != null) {
- throw new IllegalCommandException("A dataverse with alias " + created.getAlias() + " already exists", this);
+ if (ctxt.dataverses().findByAlias(dataverse.getAlias()) != null) {
+ throw new IllegalCommandException("A dataverse with alias " + dataverse.getAlias() + " already exists", this);
}
- if (created.getFilePIDsEnabled() != null && !ctxt.settings().isTrueForKey(SettingsServiceBean.Key.AllowEnablingFilePIDsPerCollection, false)) {
+ if (dataverse.getFilePIDsEnabled() != null && !ctxt.settings().isTrueForKey(SettingsServiceBean.Key.AllowEnablingFilePIDsPerCollection, false)) {
throw new IllegalCommandException("File PIDs cannot be enabled per collection", this);
}
// Save the dataverse
- Dataverse managedDv = ctxt.dataverses().save(created);
+ Dataverse managedDv = ctxt.dataverses().save(dataverse);
// Find the built in admin role (currently by alias)
DataverseRole adminRole = ctxt.roles().findBuiltinRoleByAlias(DataverseRole.ADMIN);
@@ -160,33 +136,6 @@ public Dataverse execute(CommandContext ctxt) throws CommandException {
}
managedDv.setPermissionModificationTime(new Timestamp(new Date().getTime()));
-
- if (facetList != null) {
- ctxt.facets().deleteFacetsFor(managedDv);
-
- if (!facetList.isEmpty()) {
- managedDv.setFacetRoot(true);
- }
-
- int i = 0;
- for (DatasetFieldType df : facetList) {
- ctxt.facets().create(i++, df, managedDv);
- }
- }
-
- if (inputLevelList != null) {
- if (!inputLevelList.isEmpty()) {
- managedDv.addInputLevelsMetadataBlocksIfNotPresent(inputLevelList);
- }
- ctxt.fieldTypeInputLevels().deleteFacetsFor(managedDv);
- for (DataverseFieldTypeInputLevel inputLevel : inputLevelList) {
- inputLevel.setDataverse(managedDv);
- ctxt.fieldTypeInputLevels().create(inputLevel);
- }
- }
-
- // TODO: save is called here and above; we likely don't need both
- managedDv = ctxt.dataverses().save(managedDv);
return managedDv;
}
@@ -194,5 +143,4 @@ public Dataverse execute(CommandContext ctxt) throws CommandException {
public boolean onSuccess(CommandContext ctxt, Object r) {
return ctxt.dataverses().index((Dataverse) r);
}
-
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
index 3a21345448b..e9a2025b112 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
@@ -2,34 +2,29 @@
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.DatasetVersion;
+import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.authorization.Permission;
import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException;
import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker;
-import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable;
import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
import edu.harvard.iq.dataverse.engine.command.CommandContext;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
-//import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException;
import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper;
-import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
-import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.FileUtil;
-import static edu.harvard.iq.dataverse.util.FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT;
-import static edu.harvard.iq.dataverse.util.FileUtil.createIngestFailureReport;
-import static edu.harvard.iq.dataverse.util.FileUtil.determineFileType;
-import static edu.harvard.iq.dataverse.util.FileUtil.determineFileTypeByNameAndExtension;
-import static edu.harvard.iq.dataverse.util.FileUtil.getFilesTempDirectory;
-import static edu.harvard.iq.dataverse.util.FileUtil.saveInputStreamInTempFile;
-import static edu.harvard.iq.dataverse.util.FileUtil.useRecognizedType;
import edu.harvard.iq.dataverse.util.ShapefileHandler;
import edu.harvard.iq.dataverse.util.StringUtil;
import edu.harvard.iq.dataverse.util.file.BagItFileHandler;
import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory;
import edu.harvard.iq.dataverse.util.file.CreateDataFileResult;
+import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException;
+import jakarta.enterprise.inject.spi.CDI;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.StringUtils;
+
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@@ -42,7 +37,7 @@
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Enumeration;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -51,12 +46,17 @@
import java.util.Set;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
-import java.util.zip.ZipFile;
import java.util.zip.ZipEntry;
-import java.util.zip.ZipInputStream;
-import jakarta.enterprise.inject.spi.CDI;
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.StringUtils;
+import java.util.zip.ZipFile;
+
+import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable;
+import static edu.harvard.iq.dataverse.util.FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT;
+import static edu.harvard.iq.dataverse.util.FileUtil.createIngestFailureReport;
+import static edu.harvard.iq.dataverse.util.FileUtil.determineFileType;
+import static edu.harvard.iq.dataverse.util.FileUtil.determineFileTypeByNameAndExtension;
+import static edu.harvard.iq.dataverse.util.FileUtil.getFilesTempDirectory;
+import static edu.harvard.iq.dataverse.util.FileUtil.saveInputStreamInTempFile;
+import static edu.harvard.iq.dataverse.util.FileUtil.useRecognizedType;
/**
*
@@ -93,6 +93,10 @@ public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion versi
this(aRequest, version, inputStream, fileName, suppliedContentType, newStorageIdentifier, quota, newCheckSum, newCheckSumType, null, null);
}
+ public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UploadSessionQuotaLimit quota, String newCheckSum, DataFile.ChecksumType newCheckSumType, Long newFileSize) {
+ this(aRequest, version, inputStream, fileName, suppliedContentType, newStorageIdentifier, quota, newCheckSum, newCheckSumType, newFileSize, null);
+ }
+
// This version of the command must be used when files are created in the
// context of creating a brand new dataset (from the Add Dataset page):
@@ -140,9 +144,10 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
if (newStorageIdentifier == null) {
- if (getFilesTempDirectory() != null) {
+ var filesTempDirectory = getFilesTempDirectory();
+ if (filesTempDirectory != null) {
try {
- tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload");
+ tempFile = Files.createTempFile(Paths.get(filesTempDirectory), "tmp", "upload");
// "temporary" location is the key here; this is why we are not using
// the DataStore framework for this - the assumption is that
// temp files will always be stored on the local filesystem.
@@ -260,10 +265,6 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
// DataFile objects from its contents:
} else if (finalType.equals("application/zip")) {
- ZipFile zipFile = null;
- ZipInputStream unZippedIn = null;
- ZipEntry zipEntry = null;
-
int fileNumberLimit = ctxt.systemConfig().getZipUploadFilesLimit();
Long combinedUnzippedFileSize = 0L;
@@ -271,14 +272,14 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
Charset charset = null;
/*
TODO: (?)
- We may want to investigate somehow letting the user specify
+ We may want to investigate somehow letting the user specify
the charset for the filenames in the zip file...
- - otherwise, ZipInputStream bails out if it encounteres a file
- name that's not valid in the current charest (i.e., UTF-8, in
- our case). It would be a bit trickier than what we're doing for
- SPSS tabular ingests - with the lang. encoding pulldown menu -
+ - otherwise, ZipInputStream bails out if it encounteres a file
+ name that's not valid in the current charest (i.e., UTF-8, in
+ our case). It would be a bit trickier than what we're doing for
+ SPSS tabular ingests - with the lang. encoding pulldown menu -
because this encoding needs to be specified *before* we upload and
- attempt to unzip the file.
+ attempt to unzip the file.
-- L.A. 4.0 beta12
logger.info("default charset is "+Charset.defaultCharset().name());
if (Charset.isSupported("US-ASCII")) {
@@ -287,25 +288,21 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
if (charset != null) {
logger.info("was able to obtain charset for US-ASCII");
}
-
+
}
*/
- /**
- * Perform a quick check for how many individual files are
- * inside this zip archive. If it's above the limit, we can
- * give up right away, without doing any unpacking.
+ /**
+ * Perform a quick check for how many individual files are
+ * inside this zip archive. If it's above the limit, we can
+ * give up right away, without doing any unpacking.
* This should be a fairly inexpensive operation, we just need
- * to read the directory at the end of the file.
+ * to read the directory at the end of the file.
*/
-
- if (charset != null) {
- zipFile = new ZipFile(tempFile.toFile(), charset);
- } else {
- zipFile = new ZipFile(tempFile.toFile());
- }
+
+
/**
- * The ZipFile constructors above will throw ZipException -
+ * The ZipFile constructors in openZipFile will throw ZipException -
* a type of IOException - if there's something wrong
* with this file as a zip. There's no need to intercept it
* here, it will be caught further below, with other IOExceptions,
@@ -313,8 +310,8 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
* then attempt to save it as is.
*/
- int numberOfUnpackableFiles = 0;
-
+ int numberOfUnpackableFiles = 0;
+
/**
* Note that we can't just use zipFile.size(),
* unfortunately, since that's the total number of entries,
@@ -323,83 +320,46 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
* that are files.
*/
- for (Enumeration extends ZipEntry> entries = zipFile.entries(); entries.hasMoreElements();) {
- ZipEntry entry = entries.nextElement();
- logger.fine("inside first zip pass; this entry: "+entry.getName());
- if (!entry.isDirectory()) {
- String shortName = entry.getName().replaceFirst("^.*[\\/]", "");
- // ... and, finally, check if it's a "fake" file - a zip archive entry
- // created for a MacOS X filesystem element: (these
- // start with "._")
- if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) {
- numberOfUnpackableFiles++;
- if (numberOfUnpackableFiles > fileNumberLimit) {
- logger.warning("Zip upload - too many files in the zip to process individually.");
- warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit
- + "); please upload a zip archive with fewer files, if you want them to be ingested "
- + "as individual DataFiles.";
- throw new IOException();
- }
- // In addition to counting the files, we can
- // also check the file size while we're here,
- // provided the size limit is defined; if a single
- // file is above the individual size limit, unzipped,
- // we give up on unpacking this zip archive as well:
- if (fileSizeLimit != null && entry.getSize() > fileSizeLimit) {
- throw new FileExceedsMaxSizeException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(entry.getSize()), bytesToHumanReadable(fileSizeLimit)));
- }
- // Similarly, we want to check if saving all these unpacked
- // files is going to push the disk usage over the
- // quota:
- if (storageQuotaLimit != null) {
- combinedUnzippedFileSize = combinedUnzippedFileSize + entry.getSize();
- if (combinedUnzippedFileSize > storageQuotaLimit) {
- //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(combinedUnzippedFileSize), bytesToHumanReadable(storageQuotaLimit)));
- // change of plans: if the unzipped content inside exceeds the remaining quota,
- // we reject the upload outright, rather than accepting the zip
- // file as is.
- throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.unzipped.quota_exceeded"), bytesToHumanReadable(storageQuotaLimit)), this);
- }
+ try (var zipFile = openZipFile(tempFile, charset)) {
+ var zipEntries = filteredZipEntries(zipFile);
+ for (var entry : zipEntries) {
+ logger.fine("inside first zip pass; this entry: " + entry.getName());
+ numberOfUnpackableFiles++;
+ if (numberOfUnpackableFiles > fileNumberLimit) {
+ logger.warning("Zip upload - too many files in the zip to process individually.");
+ warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit
+ + "); please upload a zip archive with fewer files, if you want them to be ingested "
+ + "as individual DataFiles.";
+ throw new IOException();
+ }
+ // In addition to counting the files, we can
+ // also check the file size while we're here,
+ // provided the size limit is defined; if a single
+ // file is above the individual size limit, unzipped,
+ // we give up on unpacking this zip archive as well:
+ if (fileSizeLimit != null && entry.getSize() > fileSizeLimit) {
+ throw new FileExceedsMaxSizeException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(entry.getSize()), bytesToHumanReadable(fileSizeLimit)));
+ }
+ // Similarly, we want to check if saving all these unpacked
+ // files is going to push the disk usage over the
+ // quota:
+ if (storageQuotaLimit != null) {
+ combinedUnzippedFileSize = combinedUnzippedFileSize + entry.getSize();
+ if (combinedUnzippedFileSize > storageQuotaLimit) {
+ //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(combinedUnzippedFileSize), bytesToHumanReadable(storageQuotaLimit)));
+ // change of plans: if the unzipped content inside exceeds the remaining quota,
+ // we reject the upload outright, rather than accepting the zip
+ // file as is.
+ throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.unzipped.quota_exceeded"), bytesToHumanReadable(storageQuotaLimit)), this);
}
}
}
- }
-
- // OK we're still here - that means we can proceed unzipping.
-
- // Close the ZipFile, re-open as ZipInputStream:
- zipFile.close();
- // reset:
- combinedUnzippedFileSize = 0L;
-
- if (charset != null) {
- unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()), charset);
- } else {
- unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()));
- }
-
- while (true) {
- try {
- zipEntry = unZippedIn.getNextEntry();
- } catch (IllegalArgumentException iaex) {
- // Note:
- // ZipInputStream documentation doesn't even mention that
- // getNextEntry() throws an IllegalArgumentException!
- // but that's what happens if the file name of the next
- // entry is not valid in the current CharSet.
- // -- L.A.
- warningMessage = "Failed to unpack Zip file. (Unknown Character Set used in a file name?) Saving the file as is.";
- logger.warning(warningMessage);
- throw new IOException();
- }
+ // OK we're still here - that means we can proceed unzipping.
- if (zipEntry == null) {
- break;
- }
- // Note that some zip entries may be directories - we
- // simply skip them:
+ // reset:
+ combinedUnzippedFileSize = 0L;
- if (!zipEntry.isDirectory()) {
+ for (var entry : zipEntries) {
if (datafiles.size() > fileNumberLimit) {
logger.warning("Zip upload - too many files.");
warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit
@@ -407,72 +367,55 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
+ "as individual DataFiles.";
throw new IOException();
}
-
- String fileEntryName = zipEntry.getName();
+ var fileEntryName = entry.getName();
+ var shortName = getShortName(fileEntryName);
logger.fine("ZipEntry, file: " + fileEntryName);
+ String storageIdentifier = FileUtil.generateStorageIdentifier();
+ File unzippedFile = new File(getFilesTempDirectory() + "/" + storageIdentifier);
+ Files.copy(zipFile.getInputStream(entry), unzippedFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+ // No need to check the size of this unpacked file against the size limit,
+ // since we've already checked for that in the first pass.
+ DataFile datafile = FileUtil.createSingleDataFile(version, null, storageIdentifier, shortName,
+ MIME_TYPE_UNDETERMINED_DEFAULT,
+ ctxt.systemConfig().getFileFixityChecksumAlgorithm(), null, false);
+
+ if (!fileEntryName.equals(shortName)) {
+ // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes),
+ // we'll extract the directory name; then subject it to some "aggressive sanitizing" - strip all
+ // the leading, trailing and duplicate slashes; then replace all the characters that
+ // don't pass our validation rules.
+ String directoryName = fileEntryName.replaceFirst("[\\\\/][\\\\/]*[^\\\\/]*$", "");
+ directoryName = StringUtil.sanitizeFileDirectory(directoryName, true);
+ // if (!"".equals(directoryName)) {
+ if (!StringUtil.isEmpty(directoryName)) {
+ logger.fine("setting the directory label to " + directoryName);
+ datafile.getFileMetadata().setDirectoryLabel(directoryName);
+ }
+ }
- if (fileEntryName != null && !fileEntryName.equals("")) {
-
- String shortName = fileEntryName.replaceFirst("^.*[\\/]", "");
-
- // Check if it's a "fake" file - a zip archive entry
- // created for a MacOS X filesystem element: (these
- // start with "._")
- if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) {
- // OK, this seems like an OK file entry - we'll try
- // to read it and create a DataFile with it:
-
- String storageIdentifier = FileUtil.generateStorageIdentifier();
- File unzippedFile = new File(getFilesTempDirectory() + "/" + storageIdentifier);
- Files.copy(unZippedIn, unzippedFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
- // No need to check the size of this unpacked file against the size limit,
- // since we've already checked for that in the first pass.
-
- DataFile datafile = FileUtil.createSingleDataFile(version, null, storageIdentifier, shortName,
- MIME_TYPE_UNDETERMINED_DEFAULT,
- ctxt.systemConfig().getFileFixityChecksumAlgorithm(), null, false);
-
- if (!fileEntryName.equals(shortName)) {
- // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes),
- // we'll extract the directory name; then subject it to some "aggressive sanitizing" - strip all
- // the leading, trailing and duplicate slashes; then replace all the characters that
- // don't pass our validation rules.
- String directoryName = fileEntryName.replaceFirst("[\\\\/][\\\\/]*[^\\\\/]*$", "");
- directoryName = StringUtil.sanitizeFileDirectory(directoryName, true);
- // if (!"".equals(directoryName)) {
- if (!StringUtil.isEmpty(directoryName)) {
- logger.fine("setting the directory label to " + directoryName);
- datafile.getFileMetadata().setDirectoryLabel(directoryName);
- }
- }
+ if (datafile != null) {
+ // We have created this datafile with the mime type "unknown";
+ // Now that we have it saved in a temporary location,
+ // let's try and determine its real type:
- if (datafile != null) {
- // We have created this datafile with the mime type "unknown";
- // Now that we have it saved in a temporary location,
- // let's try and determine its real type:
-
- String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier();
-
- try {
- recognizedType = determineFileType(unzippedFile, shortName);
- // null the File explicitly, to release any open FDs:
- unzippedFile = null;
- logger.fine("File utility recognized unzipped file as " + recognizedType);
- if (recognizedType != null && !recognizedType.equals("")) {
- datafile.setContentType(recognizedType);
- }
- } catch (Exception ex) {
- logger.warning("Failed to run the file utility mime type check on file " + fileName);
- }
-
- datafiles.add(datafile);
- combinedUnzippedFileSize += datafile.getFilesize();
+ String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier();
+
+ try {
+ recognizedType = determineFileType(unzippedFile, shortName);
+ // null the File explicitly, to release any open FDs:
+ unzippedFile = null;
+ logger.fine("File utility recognized unzipped file as " + recognizedType);
+ if (recognizedType != null && !recognizedType.equals("")) {
+ datafile.setContentType(recognizedType);
}
+ } catch (Exception ex) {
+ logger.warning("Failed to run the file utility mime type check on file " + fileName);
}
+
+ datafiles.add(datafile);
+ combinedUnzippedFileSize += datafile.getFilesize();
}
}
- unZippedIn.closeEntry();
-
}
} catch (IOException ioex) {
@@ -494,18 +437,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
//warningMessage = BundleUtil.getStringFromBundle("file.addreplace.warning.unzip.failed.quota", Arrays.asList(FileSizeChecker.bytesToHumanReadable(storageQuotaLimit)));
//datafiles.clear();
throw new CommandExecutionException(fesqx.getMessage(), fesqx, this);
- }*/ finally {
- if (zipFile != null) {
- try {
- zipFile.close();
- } catch (Exception zEx) {}
- }
- if (unZippedIn != null) {
- try {
- unZippedIn.close();
- } catch (Exception zEx) {}
- }
- }
+ }*/
if (!datafiles.isEmpty()) {
// remove the uploaded zip file:
try {
@@ -591,7 +523,8 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
// The try-catch is due to error encountered in using NFS for stocking file,
// cf. https://github.com/IQSS/dataverse/issues/5909
try {
- FileUtils.deleteDirectory(rezipFolder);
+ if (rezipFolder!=null)
+ FileUtils.deleteDirectory(rezipFolder);
} catch (IOException ioex) {
// do nothing - it's a temp folder.
logger.warning("Could not remove temp folder, error message : " + ioex.getMessage());
@@ -730,7 +663,37 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
return CreateDataFileResult.error(fileName, finalType);
} // end createDataFiles
-
+
+ private static List extends ZipEntry> filteredZipEntries(ZipFile zipFile) {
+ var entries = Collections.list(zipFile.entries()).stream().filter(e -> {
+ var entryName = e.getName();
+ logger.fine("ZipEntry, file: " + entryName);
+ return !e.isDirectory() && !entryName.isEmpty() && !isFileToSkip(entryName);
+ }).toList();
+ return entries;
+ }
+
+ private static ZipFile openZipFile(Path tempFile, Charset charset) throws IOException {
+ if (charset != null) {
+ return new ZipFile(tempFile.toFile(), charset);
+ }
+ else {
+ return new ZipFile(tempFile.toFile());
+ }
+ }
+
+ private static boolean isFileToSkip(String fileName) {
+ // check if it's a "fake" file - a zip archive entry
+ // created for a MacOS X filesystem element: (these
+ // start with "._")
+ var shortName = getShortName(fileName);
+ return shortName.startsWith("._") || shortName.startsWith(".DS_Store") || "".equals(shortName);
+ }
+
+ private static String getShortName(String fileName) {
+ return fileName.replaceFirst("^.*[\\/]", "");
+ }
+
@Override
public Map> getRequiredPermissions() {
Map> ret = new HashMap<>();
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java
index e6e8279a314..e378e2e2ef7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java
@@ -131,7 +131,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException {
throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasetversion.update.failure"), this);
} else {
- metadataUpdated = DatasetVersionDifference.compareFileMetadatas(publishedFmd, draftFmd);
+ metadataUpdated = !DatasetVersionDifference.compareFileMetadatas(publishedFmd, draftFmd).isEmpty();
publishedFmd.setLabel(draftFmd.getLabel());
publishedFmd.setDescription(draftFmd.getDescription());
publishedFmd.setCategories(draftFmd.getCategories());
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java
index 1ac41105237..902bea7f833 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java
@@ -11,6 +11,9 @@
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
import edu.harvard.iq.dataverse.workflow.Workflow;
import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType;
+
+import jakarta.persistence.OptimisticLockException;
+
import java.util.Optional;
import java.util.logging.Logger;
import static java.util.stream.Collectors.joining;
@@ -105,10 +108,15 @@ public PublishDatasetResult execute(CommandContext ctxt) throws CommandException
Optional prePubWf = ctxt.workflows().getDefaultWorkflow(TriggerType.PrePublishDataset);
if ( prePubWf.isPresent() ) {
// We start a workflow
- theDataset = ctxt.em().merge(theDataset);
- ctxt.em().flush();
- ctxt.workflows().start(prePubWf.get(), buildContext(theDataset, TriggerType.PrePublishDataset, datasetExternallyReleased), true);
- return new PublishDatasetResult(theDataset, Status.Workflow);
+ try {
+ theDataset = ctxt.em().merge(theDataset);
+ ctxt.em().flush();
+ ctxt.workflows().start(prePubWf.get(),
+ buildContext(theDataset, TriggerType.PrePublishDataset, datasetExternallyReleased), true);
+ return new PublishDatasetResult(theDataset, Status.Workflow);
+ } catch (OptimisticLockException e) {
+ throw new CommandException(e.getMessage(), e, this);
+ }
} else{
// We will skip trying to register the global identifiers for datafiles
@@ -157,7 +165,12 @@ public PublishDatasetResult execute(CommandContext ctxt) throws CommandException
lock.setInfo(info);
ctxt.datasets().addDatasetLock(theDataset, lock);
}
- theDataset = ctxt.em().merge(theDataset);
+ try {
+ theDataset = ctxt.em().merge(theDataset);
+ } catch (OptimisticLockException e) {
+ ctxt.datasets().removeDatasetLocks(theDataset, DatasetLock.Reason.finalizePublication);
+ throw new CommandException(e.getMessage(), e, this);
+ }
// The call to FinalizePublicationCommand has been moved to the new @onSuccess()
// method:
//ctxt.datasets().callFinalizePublishCommandAsynchronously(theDataset.getId(), ctxt, request, datasetExternallyReleased);
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java
index bb5f5a71e24..dc8884405ef 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java
@@ -115,7 +115,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException {
*/
if(persistedVersion==null) {
Long id = getDataset().getLatestVersion().getId();
- persistedVersion = ctxt.datasetVersion().find(id!=null ? id: getDataset().getLatestVersionForCopy().getId());
+ persistedVersion = ctxt.datasetVersion().find(id!=null ? id : getDataset().getLatestVersionForCopy(true).getId());
}
//Will throw an IllegalCommandException if a system metadatablock is changed and the appropriate key is not supplied.
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseAttributeCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseAttributeCommand.java
new file mode 100644
index 00000000000..57ac20fcee6
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseAttributeCommand.java
@@ -0,0 +1,110 @@
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+
+import java.util.Collections;
+
+/**
+ * Command to update an existing Dataverse attribute.
+ */
+@RequiredPermissions(Permission.EditDataverse)
+public class UpdateDataverseAttributeCommand extends AbstractCommand {
+
+ private static final String ATTRIBUTE_ALIAS = "alias";
+ private static final String ATTRIBUTE_NAME = "name";
+ private static final String ATTRIBUTE_DESCRIPTION = "description";
+ private static final String ATTRIBUTE_AFFILIATION = "affiliation";
+ private static final String ATTRIBUTE_FILE_PIDS_ENABLED = "filePIDsEnabled";
+
+ private final Dataverse dataverse;
+ private final String attributeName;
+ private final Object attributeValue;
+
+ public UpdateDataverseAttributeCommand(DataverseRequest request, Dataverse dataverse, String attributeName, Object attributeValue) {
+ super(request, dataverse);
+ this.dataverse = dataverse;
+ this.attributeName = attributeName;
+ this.attributeValue = attributeValue;
+ }
+
+ @Override
+ public Dataverse execute(CommandContext ctxt) throws CommandException {
+ switch (attributeName) {
+ case ATTRIBUTE_ALIAS:
+ case ATTRIBUTE_NAME:
+ case ATTRIBUTE_DESCRIPTION:
+ case ATTRIBUTE_AFFILIATION:
+ setStringAttribute(attributeName, attributeValue);
+ break;
+ case ATTRIBUTE_FILE_PIDS_ENABLED:
+ setBooleanAttributeForFilePIDs(ctxt);
+ break;
+ default:
+ throw new IllegalCommandException("'" + attributeName + "' is not a supported attribute", this);
+ }
+
+ return ctxt.engine().submit(new UpdateDataverseCommand(dataverse, null, null, getRequest(), null));
+ }
+
+ /**
+ * Helper method to set a string attribute.
+ *
+ * @param attributeName The name of the attribute.
+ * @param attributeValue The value of the attribute (must be a String).
+ * @throws IllegalCommandException if the provided attribute value is not of String type.
+ */
+ private void setStringAttribute(String attributeName, Object attributeValue) throws IllegalCommandException {
+ if (!(attributeValue instanceof String stringValue)) {
+ throw new IllegalCommandException("'" + attributeName + "' requires a string value", this);
+ }
+
+ switch (attributeName) {
+ case ATTRIBUTE_ALIAS:
+ dataverse.setAlias(stringValue);
+ break;
+ case ATTRIBUTE_NAME:
+ dataverse.setName(stringValue);
+ break;
+ case ATTRIBUTE_DESCRIPTION:
+ dataverse.setDescription(stringValue);
+ break;
+ case ATTRIBUTE_AFFILIATION:
+ dataverse.setAffiliation(stringValue);
+ break;
+ default:
+ throw new IllegalCommandException("Unsupported string attribute: " + attributeName, this);
+ }
+ }
+
+ /**
+ * Helper method to handle the "filePIDsEnabled" boolean attribute.
+ *
+ * @param ctxt The command context.
+ * @throws PermissionException if the user doesn't have permission to modify this attribute.
+ */
+ private void setBooleanAttributeForFilePIDs(CommandContext ctxt) throws CommandException {
+ if (!getRequest().getUser().isSuperuser()) {
+ throw new PermissionException("You must be a superuser to change this setting",
+ this, Collections.singleton(Permission.EditDataset), dataverse);
+ }
+ if (!ctxt.settings().isTrueForKey(SettingsServiceBean.Key.AllowEnablingFilePIDsPerCollection, false)) {
+ throw new PermissionException("Changing File PID policy per collection is not enabled on this server",
+ this, Collections.singleton(Permission.EditDataset), dataverse);
+ }
+
+ if (!(attributeValue instanceof Boolean)) {
+ throw new IllegalCommandException("'" + ATTRIBUTE_FILE_PIDS_ENABLED + "' requires a boolean value", this);
+ }
+
+ dataverse.setFilePIDsEnabled((Boolean) attributeValue);
+ }
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
index bdb69dc918f..6dc4ab4d00d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
@@ -1,141 +1,144 @@
package edu.harvard.iq.dataverse.engine.command.impl;
-import edu.harvard.iq.dataverse.Dataset;
-import edu.harvard.iq.dataverse.DatasetFieldType;
-import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.*;
import edu.harvard.iq.dataverse.Dataverse.DataverseType;
-import edu.harvard.iq.dataverse.DataverseFieldTypeInputLevel;
+import edu.harvard.iq.dataverse.api.dto.DataverseDTO;
import edu.harvard.iq.dataverse.authorization.Permission;
import static edu.harvard.iq.dataverse.dataverse.DataverseUtil.validateDataverseMetadataExternally;
-import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
+
import edu.harvard.iq.dataverse.engine.command.CommandContext;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
-import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
import java.util.ArrayList;
import java.util.List;
-import java.util.logging.Logger;
/**
* Update an existing dataverse.
+ *
* @author michael
*/
-@RequiredPermissions( Permission.EditDataverse )
-public class UpdateDataverseCommand extends AbstractCommand {
- private static final Logger logger = Logger.getLogger(UpdateDataverseCommand.class.getName());
-
- private final Dataverse editedDv;
- private final List facetList;
+@RequiredPermissions(Permission.EditDataverse)
+public class UpdateDataverseCommand extends AbstractWriteDataverseCommand {
private final List featuredDataverseList;
- private final List inputLevelList;
+ private final DataverseDTO updatedDataverseDTO;
private boolean datasetsReindexRequired = false;
- public UpdateDataverseCommand(Dataverse editedDv, List facetList, List featuredDataverseList,
- DataverseRequest aRequest, List inputLevelList ) {
- super(aRequest, editedDv);
- this.editedDv = editedDv;
- // add update template uses this command but does not
- // update facet list or featured dataverses
- if (facetList != null){
- this.facetList = new ArrayList<>(facetList);
- } else {
- this.facetList = null;
- }
- if (featuredDataverseList != null){
- this.featuredDataverseList = new ArrayList<>(featuredDataverseList);
- } else {
- this.featuredDataverseList = null;
- }
- if (inputLevelList != null){
- this.inputLevelList = new ArrayList<>(inputLevelList);
- } else {
- this.inputLevelList = null;
- }
- }
-
- @Override
- public Dataverse execute(CommandContext ctxt) throws CommandException {
- logger.fine("Entering update dataverse command");
-
- // Perform any optional validation steps, if defined:
- if (ctxt.systemConfig().isExternalDataverseValidationEnabled()) {
- // For admins, an override of the external validation step may be enabled:
- if (!(getUser().isSuperuser() && ctxt.systemConfig().isExternalValidationAdminOverrideEnabled())) {
- String executable = ctxt.systemConfig().getDataverseValidationExecutable();
- boolean result = validateDataverseMetadataExternally(editedDv, executable, getRequest());
-
- if (!result) {
- String rejectionMessage = ctxt.systemConfig().getDataverseUpdateValidationFailureMsg();
- throw new IllegalCommandException(rejectionMessage, this);
- }
- }
- }
-
- Dataverse oldDv = ctxt.dataverses().find(editedDv.getId());
-
- DataverseType oldDvType = oldDv.getDataverseType();
- String oldDvAlias = oldDv.getAlias();
- String oldDvName = oldDv.getName();
- oldDv = null;
-
- Dataverse result = ctxt.dataverses().save(editedDv);
-
- if ( facetList != null ) {
- ctxt.facets().deleteFacetsFor(result);
- int i=0;
- for ( DatasetFieldType df : facetList ) {
- ctxt.facets().create(i++, df.getId(), result.getId());
- }
- }
- if ( featuredDataverseList != null ) {
- ctxt.featuredDataverses().deleteFeaturedDataversesFor(result);
- int i=0;
- for ( Object obj : featuredDataverseList ) {
- Dataverse dv = (Dataverse) obj;
- ctxt.featuredDataverses().create(i++, dv.getId(), result.getId());
+ public UpdateDataverseCommand(Dataverse dataverse,
+ List facets,
+ List featuredDataverses,
+ DataverseRequest request,
+ List inputLevels) {
+ this(dataverse, facets, featuredDataverses, request, inputLevels, null, null, false);
+ }
+
+ public UpdateDataverseCommand(Dataverse dataverse,
+ List facets,
+ List featuredDataverses,
+ DataverseRequest request,
+ List inputLevels,
+ List metadataBlocks,
+ DataverseDTO updatedDataverseDTO,
+ boolean resetRelationsOnNullValues) {
+ super(dataverse, dataverse, request, facets, inputLevels, metadataBlocks, resetRelationsOnNullValues);
+ if (featuredDataverses != null) {
+ this.featuredDataverseList = new ArrayList<>(featuredDataverses);
+ } else {
+ this.featuredDataverseList = null;
+ }
+ this.updatedDataverseDTO = updatedDataverseDTO;
+ }
+
+ @Override
+ protected Dataverse innerExecute(CommandContext ctxt) throws IllegalCommandException {
+ // Perform any optional validation steps, if defined:
+ if (ctxt.systemConfig().isExternalDataverseValidationEnabled()) {
+ // For admins, an override of the external validation step may be enabled:
+ if (!(getUser().isSuperuser() && ctxt.systemConfig().isExternalValidationAdminOverrideEnabled())) {
+ String executable = ctxt.systemConfig().getDataverseValidationExecutable();
+ boolean result = validateDataverseMetadataExternally(dataverse, executable, getRequest());
+
+ if (!result) {
+ String rejectionMessage = ctxt.systemConfig().getDataverseUpdateValidationFailureMsg();
+ throw new IllegalCommandException(rejectionMessage, this);
}
}
- if ( inputLevelList != null ) {
- ctxt.fieldTypeInputLevels().deleteFacetsFor(result);
- for ( DataverseFieldTypeInputLevel obj : inputLevelList ) {
- ctxt.fieldTypeInputLevels().create(obj);
- }
+ }
+
+ Dataverse oldDv = ctxt.dataverses().find(dataverse.getId());
+
+ DataverseType oldDvType = oldDv.getDataverseType();
+ String oldDvAlias = oldDv.getAlias();
+ String oldDvName = oldDv.getName();
+
+ // We don't want to reindex the children datasets unnecessarily:
+ // When these values are changed we need to reindex all children datasets
+ // This check is not recursive as all the values just report the immediate parent
+ if (!oldDvType.equals(dataverse.getDataverseType())
+ || !oldDvName.equals(dataverse.getName())
+ || !oldDvAlias.equals(dataverse.getAlias())) {
+ datasetsReindexRequired = true;
+ }
+
+ if (featuredDataverseList != null) {
+ ctxt.featuredDataverses().deleteFeaturedDataversesFor(dataverse);
+ int i = 0;
+ for (Object obj : featuredDataverseList) {
+ Dataverse dv = (Dataverse) obj;
+ ctxt.featuredDataverses().create(i++, dv.getId(), dataverse.getId());
}
-
- // We don't want to reindex the children datasets unnecessarily:
- // When these values are changed we need to reindex all children datasets
- // This check is not recursive as all the values just report the immediate parent
- if (!oldDvType.equals(editedDv.getDataverseType())
- || !oldDvName.equals(editedDv.getName())
- || !oldDvAlias.equals(editedDv.getAlias())) {
- datasetsReindexRequired = true;
+ }
+
+ if (updatedDataverseDTO != null) {
+ updateDataverseFromDTO(dataverse, updatedDataverseDTO);
+ }
+
+ return dataverse;
+ }
+
+ private void updateDataverseFromDTO(Dataverse dataverse, DataverseDTO dto) {
+ if (dto.getAlias() != null) {
+ dataverse.setAlias(dto.getAlias());
+ }
+ if (dto.getName() != null) {
+ dataverse.setName(dto.getName());
+ }
+ if (dto.getDescription() != null) {
+ dataverse.setDescription(dto.getDescription());
+ }
+ if (dto.getAffiliation() != null) {
+ dataverse.setAffiliation(dto.getAffiliation());
+ }
+ if (dto.getDataverseContacts() != null) {
+ dataverse.setDataverseContacts(dto.getDataverseContacts());
+ for (DataverseContact dc : dataverse.getDataverseContacts()) {
+ dc.setDataverse(dataverse);
}
-
- return result;
- }
-
+ }
+ if (dto.getDataverseType() != null) {
+ dataverse.setDataverseType(dto.getDataverseType());
+ }
+ }
+
@Override
public boolean onSuccess(CommandContext ctxt, Object r) {
-
+
// first kick of async index of datasets
// TODO: is this actually needed? Is there a better way to handle
// It appears that we at some point lost some extra logic here, where
// we only reindex the underlying datasets if one or more of the specific set
- // of fields have been changed (since these values are included in the
- // indexed solr documents for dataasets). So I'm putting that back. -L.A.
+ // of fields have been changed (since these values are included in the
+ // indexed solr documents for datasets). So I'm putting that back. -L.A.
Dataverse result = (Dataverse) r;
-
+
if (datasetsReindexRequired) {
List datasets = ctxt.datasets().findByOwnerId(result.getId());
ctxt.index().asyncIndexDatasetList(datasets, true);
}
-
- return ctxt.dataverses().index((Dataverse) r);
- }
+ return ctxt.dataverses().index((Dataverse) r);
+ }
}
-
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestedDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestedDatasetCommand.java
new file mode 100644
index 00000000000..09563686299
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestedDatasetCommand.java
@@ -0,0 +1,202 @@
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.DatasetVersion;
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.FileMetadata;
+import static edu.harvard.iq.dataverse.search.IndexServiceBean.solrDocIdentifierFile;
+import edu.harvard.iq.dataverse.util.StringUtil;
+import java.io.IOException;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.apache.solr.client.solrj.SolrServerException;
+
+/**
+ *
+ * @author landreev
+ *
+ * Much simplified version of UpdateDatasetVersionCommand,
+ * but with some extra twists. The goal is to avoid creating new Dataset and
+ * DataFile objects, and to instead preserve the database ids of the re-harvested
+ * datasets and files, whenever possible. This in turn allows us to avoid deleting
+ * and rebuilding from scratch the Solr documents for these objects.
+ */
+@RequiredPermissions(Permission.EditDataset)
+public class UpdateHarvestedDatasetCommand extends AbstractDatasetCommand {
+
+ private static final Logger logger = Logger.getLogger(UpdateHarvestedDatasetCommand.class.getCanonicalName());
+ private final DatasetVersion newHarvestedVersion;
+ final private boolean validateLenient = true;
+
+ public UpdateHarvestedDatasetCommand(Dataset theDataset, DatasetVersion newHarvestedVersion, DataverseRequest aRequest) {
+ super(aRequest, theDataset);
+ this.newHarvestedVersion = newHarvestedVersion;
+ }
+
+ public boolean isValidateLenient() {
+ return validateLenient;
+ }
+
+ @Override
+ public Dataset execute(CommandContext ctxt) throws CommandException {
+
+ Dataset existingDataset = getDataset();
+
+ if (existingDataset == null
+ || existingDataset.getId() == null
+ || !existingDataset.isHarvested()
+ || existingDataset.getVersions().size() != 1) {
+ throw new IllegalCommandException("The command can only be called on an existing harvested dataset with only 1 version", this);
+ }
+ DatasetVersion existingVersion = existingDataset.getVersions().get(0);
+
+ if (newHarvestedVersion == null || newHarvestedVersion.getId() != null) {
+ throw new IllegalCommandException("The command can only be called with a newly-harvested, not yet saved DatasetVersion supplied", this);
+ }
+
+ newHarvestedVersion.setCreateTime(getTimestamp());
+ newHarvestedVersion.setLastUpdateTime(getTimestamp());
+
+
+ Map existingFilesIndex = new HashMap<>();
+
+ /*
+ Create a map of the files that are currently part of this existing
+ harvested dataset. We assume that a harvested file can be uniquely
+ defined by its storageidentifier. Which, in the case of a datafile
+ harvested from another Dataverse should be its data access api url.
+ */
+ for (int i = 0; i < existingDataset.getFiles().size(); i++) {
+ String storageIdentifier = existingDataset.getFiles().get(i).getStorageIdentifier();
+ if (!StringUtil.isEmpty(storageIdentifier)) {
+ existingFilesIndex.put(storageIdentifier, i);
+ }
+ }
+
+ /*
+ Go through the files in the newly-harvested version and check if any of
+ them are (potentially new/updated) versions of files that we already
+ have, harvested previously from the same archive location.
+ */
+ for (FileMetadata newFileMetadata : newHarvestedVersion.getFileMetadatas()) {
+ // is it safe to assume that each new FileMetadata will be
+ // pointing to a non-null DataFile here?
+ String storageIdentifier = newFileMetadata.getDataFile().getStorageIdentifier();
+ if (!StringUtil.isEmpty(storageIdentifier) && existingFilesIndex.containsKey(storageIdentifier)) {
+ newFileMetadata.getDataFile().setFileMetadatas(new ArrayList<>());
+
+ int fileIndex = existingFilesIndex.get(storageIdentifier);
+
+ // Make sure to update the existing DataFiles that we are going
+ // to keep in case their newly-harvested versions have different
+ // checksums, mime types etc. These values are supposed to be
+ // immutable, normally - but who knows, errors happen, the source
+ // Dataverse may have had to fix these in their database to
+ // correct a data integrity issue (for ex.):
+ existingDataset.getFiles().get(fileIndex).setContentType(newFileMetadata.getDataFile().getContentType());
+ existingDataset.getFiles().get(fileIndex).setFilesize(newFileMetadata.getDataFile().getFilesize());
+ existingDataset.getFiles().get(fileIndex).setChecksumType(newFileMetadata.getDataFile().getChecksumType());
+ existingDataset.getFiles().get(fileIndex).setChecksumValue(newFileMetadata.getDataFile().getChecksumValue());
+
+ // Point the newly-harvested filemetadata to the existing datafile:
+ newFileMetadata.setDataFile(existingDataset.getFiles().get(fileIndex));
+
+ // Make sure this new FileMetadata is the only one attached to this existing file:
+ existingDataset.getFiles().get(fileIndex).setFileMetadatas(new ArrayList<>(1));
+ existingDataset.getFiles().get(fileIndex).getFileMetadatas().add(newFileMetadata);
+ // (we don't want any cascade relationships left between this existing
+ // dataset and this version, since we are going to attemp to delete it).
+
+ // Drop the file from the index map:
+ existingFilesIndex.remove(storageIdentifier);
+ }
+ }
+
+ // @todo? check if there's anything special that needs to be done for things
+ // like file categories
+
+ List solrIdsOfDocumentsToDelete = new ArrayList<>();
+
+ // Go through the existing files and delete the ones that are
+ // no longer present in the version that we have just harvesed:
+ for (FileMetadata oldFileMetadata : existingDataset.getVersions().get(0).getFileMetadatas()) {
+ DataFile oldDataFile = oldFileMetadata.getDataFile();
+ String storageIdentifier = oldDataFile.getStorageIdentifier();
+ // Is it still in the existing files map? - that means it is no longer
+ // present in the newly-harvested version
+ if (StringUtil.isEmpty(storageIdentifier) || existingFilesIndex.containsKey(storageIdentifier)) {
+ solrIdsOfDocumentsToDelete.add(solrDocIdentifierFile + oldDataFile.getId());
+ existingDataset.getFiles().remove(oldDataFile);
+ // Files from harvested datasets are removed unceremoniously,
+ // directly in the database. No need to bother calling the
+ // DeleteFileCommand on them. We'll just need to remember to purge
+ // them from Solr as well (right below)
+ ctxt.em().remove(ctxt.em().merge(oldDataFile));
+ // (no need to explicitly remove the oldFileMetadata; it will be
+ // removed with the entire old version is deleted)
+ }
+ }
+
+ // purge all the SOLR documents associated with the files
+ // we have just deleted:
+ if (!solrIdsOfDocumentsToDelete.isEmpty()) {
+ ctxt.index().deleteHarvestedDocuments(solrIdsOfDocumentsToDelete);
+ }
+
+ // ... And now delete the existing version itself:
+ existingDataset.setVersions(new ArrayList<>());
+ existingVersion.setDataset(null);
+
+ existingVersion = ctxt.em().merge(existingVersion);
+ ctxt.em().remove(existingVersion);
+
+ // Now attach the newly-harvested version to the dataset:
+ existingDataset.getVersions().add(newHarvestedVersion);
+ newHarvestedVersion.setDataset(existingDataset);
+
+ // ... There's one more thing to do - go through the new files,
+ // that are not in the database yet, and make sure they are
+ // attached to this existing dataset, instead of the dummy temp
+ // dataset into which they were originally imported:
+ for (FileMetadata newFileMetadata : newHarvestedVersion.getFileMetadatas()) {
+ if (newFileMetadata.getDataFile().getId() == null) {
+ existingDataset.getFiles().add(newFileMetadata.getDataFile());
+ newFileMetadata.getDataFile().setOwner(existingDataset);
+ }
+ }
+
+ ctxt.em().persist(newHarvestedVersion);
+
+ Dataset savedDataset = ctxt.em().merge(existingDataset);
+ ctxt.em().flush();
+
+ return savedDataset;
+ }
+
+ @Override
+ public boolean onSuccess(CommandContext ctxt, Object r) {
+ boolean retVal = true;
+ Dataset d = (Dataset) r;
+
+ try {
+ // Note that we index harvested datasets synchronously:
+ ctxt.index().indexDataset(d, true);
+ } catch (SolrServerException|IOException solrServerEx) {
+ logger.log(Level.WARNING, "Exception while trying to index the updated Harvested dataset " + d.getGlobalId().asString(), solrServerEx.getMessage());
+ retVal = false;
+ }
+
+ return retVal;
+ }
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java
index edd01ae98a3..d76020cb8d8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java
@@ -98,8 +98,10 @@ public class DDIExportServiceBean {
public static final String LEVEL_FILE = "file";
public static final String NOTE_TYPE_UNF = "VDC:UNF";
public static final String NOTE_TYPE_TAG = "DATAVERSE:TAG";
+ public static final String NOTE_TYPE_FILEDESCRIPTION = "DATAVERSE:FILEDESC";
public static final String NOTE_SUBJECT_UNF = "Universal Numeric Fingerprint";
public static final String NOTE_SUBJECT_TAG = "Data File Tag";
+ public static final String NOTE_SUBJECT_FILEDESCRIPTION = "DataFile Description";
/*
* Internal service objects:
@@ -742,11 +744,6 @@ private void createFileDscr(XMLStreamWriter xmlw, Set excludedFieldSet,
xmlw.writeEndElement(); // fileName
}
- /*
- xmlw.writeStartElement("fileCont");
- xmlw.writeCharacters( df.getContentType() );
- xmlw.writeEndElement(); // fileCont
- */
// dimensions
if (checkField("dimensns", excludedFieldSet, includedFieldSet)) {
if (dt.getCaseQuantity() != null || dt.getVarQuantity() != null || dt.getRecordsPerCase() != null) {
@@ -801,26 +798,6 @@ private void createFileDscr(XMLStreamWriter xmlw, Set excludedFieldSet,
xmlw.writeEndElement(); // notes
}
- /*
- xmlw.writeStartElement("notes");
- writeAttribute( xmlw, "type", "vdc:category" );
- xmlw.writeCharacters( fm.getCategory() );
- xmlw.writeEndElement(); // notes
- */
- // A special note for LOCKSS crawlers indicating the restricted
- // status of the file:
-
- /*
- if (tdf != null && isRestrictedFile(tdf)) {
- xmlw.writeStartElement("notes");
- writeAttribute( xmlw, "type", NOTE_TYPE_LOCKSS_CRAWL );
- writeAttribute( xmlw, "level", LEVEL_FILE );
- writeAttribute( xmlw, "subject", NOTE_SUBJECT_LOCKSS_PERM );
- xmlw.writeCharacters( "restricted" );
- xmlw.writeEndElement(); // notes
-
- }
- */
if (checkField("tags", excludedFieldSet, includedFieldSet) && df.getTags() != null) {
for (int i = 0; i < df.getTags().size(); i++) {
xmlw.writeStartElement("notes");
@@ -831,6 +808,17 @@ private void createFileDscr(XMLStreamWriter xmlw, Set excludedFieldSet,
xmlw.writeEndElement(); // notes
}
}
+
+ // A dedicated node for the Description entry
+ if (!StringUtilisEmpty(fm.getDescription())) {
+ xmlw.writeStartElement("notes");
+ xmlw.writeAttribute("level", LEVEL_FILE);
+ xmlw.writeAttribute("type", NOTE_TYPE_FILEDESCRIPTION);
+ xmlw.writeAttribute("subject", NOTE_SUBJECT_FILEDESCRIPTION);
+ xmlw.writeCharacters(fm.getDescription());
+ xmlw.writeEndElement(); // notes
+ }
+
xmlw.writeEndElement(); // fileDscr
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
index f5efc448090..05ddbe83e78 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
@@ -14,8 +14,10 @@
import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.LEVEL_FILE;
import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_SUBJECT_TAG;
import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_SUBJECT_UNF;
+import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_SUBJECT_FILEDESCRIPTION;
import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_TYPE_TAG;
import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_TYPE_UNF;
+import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_TYPE_FILEDESCRIPTION;
import edu.harvard.iq.dataverse.export.DDIExporter;
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
@@ -1901,6 +1903,8 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails)
xmlw.writeEndElement(); // notes
}
+ // If any tabular tags are present, each is formatted in a
+ // dedicated note:
if (fileJson.containsKey("tabularTags")) {
JsonArray tags = fileJson.getJsonArray("tabularTags");
for (int j = 0; j < tags.size(); j++) {
@@ -1912,6 +1916,17 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails)
xmlw.writeEndElement(); // notes
}
}
+
+ // Adding a dedicated node for the description entry (for
+ // non-tabular files we format it under the field)
+ if (fileJson.containsKey("description")) {
+ xmlw.writeStartElement("notes");
+ xmlw.writeAttribute("level", LEVEL_FILE);
+ xmlw.writeAttribute("type", NOTE_TYPE_FILEDESCRIPTION);
+ xmlw.writeAttribute("subject", NOTE_SUBJECT_FILEDESCRIPTION);
+ xmlw.writeCharacters(fileJson.getString("description"));
+ xmlw.writeEndElement(); // notes
+ }
// TODO: add the remaining fileDscr elements!
xmlw.writeEndElement(); // fileDscr
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index ac3c81622fc..58992805dc8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -74,6 +74,7 @@
import edu.harvard.iq.dataverse.util.URLTokenUtil;
import edu.harvard.iq.dataverse.util.UrlSignerUtil;
import edu.harvard.iq.dataverse.util.json.JsonUtil;
+import jakarta.json.JsonNumber;
import jakarta.json.JsonReader;
import jakarta.persistence.EntityManager;
import jakarta.persistence.PersistenceContext;
@@ -284,6 +285,52 @@ private int makeDir(GlobusEndpoint endpoint, String dir) {
return result.status;
}
+ private Map lookupFileSizes(GlobusEndpoint endpoint, String dir) {
+ MakeRequestResponse result;
+
+ try {
+ logger.fine("Attempting to look up the contents of the Globus folder "+dir);
+ URL url = new URL(
+ "https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint.getId()
+ + "/ls?path=" + dir);
+ result = makeRequest(url, "Bearer", endpoint.getClientToken(), "GET", null);
+
+ switch (result.status) {
+ case 200:
+ logger.fine("Looked up directory " + dir + " successfully.");
+ break;
+ default:
+ logger.warning("Status " + result.status + " received when looking up dir " + dir);
+ logger.fine("Response: " + result.jsonResponse);
+ return null;
+ }
+ } catch (MalformedURLException ex) {
+ // Misconfiguration
+ logger.warning("Failed to list the contents of the directory "+ dir + " on endpoint " + endpoint.getId());
+ return null;
+ }
+
+ Map ret = new HashMap<>();
+
+ JsonObject listObject = JsonUtil.getJsonObject(result.jsonResponse);
+ JsonArray dataArray = listObject.getJsonArray("DATA");
+
+ if (dataArray != null && !dataArray.isEmpty()) {
+ for (int i = 0; i < dataArray.size(); i++) {
+ String dataType = dataArray.getJsonObject(i).getString("DATA_TYPE", null);
+ if (dataType != null && dataType.equals("file")) {
+ // is it safe to assume that any entry with a valid "DATA_TYPE": "file"
+ // will also have valid "name" and "size" entries?
+ String fileName = dataArray.getJsonObject(i).getString("name");
+ long fileSize = dataArray.getJsonObject(i).getJsonNumber("size").longValueExact();
+ ret.put(fileName, fileSize);
+ }
+ }
+ }
+
+ return ret;
+ }
+
private int requestPermission(GlobusEndpoint endpoint, Dataset dataset, Permissions permissions) {
Gson gson = new GsonBuilder().create();
MakeRequestResponse result = null;
@@ -938,9 +985,20 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut
inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName);
}
+
+ Map fileSizeMap = null;
+
+ if (filesJsonArray.size() >= systemConfig.getGlobusBatchLookupSize()) {
+ // Look up the sizes of all the files in the dataset folder, to avoid
+ // looking them up one by one later:
+ // @todo: we should only be doing this if this is a managed store, probably (?)
+ GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+ fileSizeMap = lookupFileSizes(endpoint, endpoint.getBasePath());
+ }
// calculateMissingMetadataFields: checksum, mimetype
JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, myLogger);
+
JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files");
logger.fine("Size: " + newfilesJsonArray.size());
logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0)));
@@ -964,20 +1022,26 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut
if (newfileJsonObject != null) {
logger.fine("List Size: " + newfileJsonObject.size());
// if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) {
- JsonPatch path = Json.createPatchBuilder()
+ JsonPatch patch = Json.createPatchBuilder()
.add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build();
- fileJsonObject = path.apply(fileJsonObject);
- path = Json.createPatchBuilder()
+ fileJsonObject = patch.apply(fileJsonObject);
+ patch = Json.createPatchBuilder()
.add("/mimeType", newfileJsonObject.get(0).getString("mime")).build();
- fileJsonObject = path.apply(fileJsonObject);
+ fileJsonObject = patch.apply(fileJsonObject);
+ // If we already know the size of this file on the Globus end,
+ // we'll pass it to /addFiles, to avoid looking up file sizes
+ // one by one:
+ if (fileSizeMap != null && fileSizeMap.get(fileId) != null) {
+ Long uploadedFileSize = fileSizeMap.get(fileId);
+ myLogger.info("Found size for file " + fileId + ": " + uploadedFileSize + " bytes");
+ patch = Json.createPatchBuilder()
+ .add("/fileSize", Json.createValue(uploadedFileSize)).build();
+ fileJsonObject = patch.apply(fileJsonObject);
+ } else {
+ logger.fine("No file size entry found for file "+fileId);
+ }
addFilesJsonData.add(fileJsonObject);
countSuccess++;
- // } else {
- // globusLogger.info(fileName
- // + " will be skipped from adding to dataset by second API due to missing
- // values ");
- // countError++;
- // }
} else {
myLogger.info(fileName
+ " will be skipped from adding to dataset in the final AddReplaceFileHelper.addFiles() call. ");
@@ -1029,7 +1093,7 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut
// The old code had 2 sec. of sleep, so ...
Thread.sleep(2000);
- Response addFilesResponse = addFileHelper.addFiles(newjsonData, dataset, authUser);
+ Response addFilesResponse = addFileHelper.addFiles(newjsonData, dataset, authUser, true);
if (addFilesResponse == null) {
logger.info("null response from addFiles call");
@@ -1211,7 +1275,7 @@ private GlobusTaskState globusStatusCheck(GlobusEndpoint endpoint, String taskId
return task;
}
- public JsonObject calculateMissingMetadataFields(List inputList, Logger globusLogger)
+ private JsonObject calculateMissingMetadataFields(List inputList, Logger globusLogger)
throws InterruptedException, ExecutionException, IOException {
List> hashvalueCompletableFutures = inputList.stream()
@@ -1230,7 +1294,7 @@ public JsonObject calculateMissingMetadataFields(List inputList, Logger
});
JsonArrayBuilder filesObject = (JsonArrayBuilder) completableFuture.get();
-
+
JsonObject output = Json.createObjectBuilder().add("files", filesObject).build();
return output;
diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java
index 0667f5594ce..7280b6af129 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java
@@ -252,8 +252,16 @@ public void setAllowHarvestingMissingCVV(boolean allowHarvestingMissingCVV) {
this.allowHarvestingMissingCVV = allowHarvestingMissingCVV;
}
- // TODO: do we need "orphanRemoval=true"? -- L.A. 4.4
- // TODO: should it be @OrderBy("startTime")? -- L.A. 4.4
+ private boolean useOaiIdAsPid;
+
+ public boolean isUseOaiIdentifiersAsPids() {
+ return useOaiIdAsPid;
+ }
+
+ public void setUseOaiIdentifiersAsPids(boolean useOaiIdAsPid) {
+ this.useOaiIdAsPid = useOaiIdAsPid;
+ }
+
@OneToMany(mappedBy="harvestingClient", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST})
@OrderBy("id")
private List harvestHistory;
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index b42fd950528..71c498a4d0b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -344,10 +344,20 @@ public List saveAndAddFilesToDataset(DatasetVersion version,
try {
StorageIO dataAccess = DataAccess.getStorageIO(dataFile);
//Populate metadata
- dataAccess.open(DataAccessOption.READ_ACCESS);
- // (the .open() above makes a remote call to check if
- // the file exists and obtains its size)
- confirmedFileSize = dataAccess.getSize();
+
+ // There are direct upload sub-cases where the file size
+ // is already known at this point. For example, direct uploads
+ // to S3 that go through the jsf dataset page. Or the Globus
+ // uploads, where the file sizes are looked up in bulk on
+ // the completion of the remote upload task.
+ if (dataFile.getFilesize() >= 0) {
+ confirmedFileSize = dataFile.getFilesize();
+ } else {
+ dataAccess.open(DataAccessOption.READ_ACCESS);
+ // (the .open() above makes a remote call to check if
+ // the file exists and obtains its size)
+ confirmedFileSize = dataAccess.getSize();
+ }
// For directly-uploaded files, we will perform the file size
// limit and quota checks here. Perform them *again*, in
@@ -362,13 +372,16 @@ public List saveAndAddFilesToDataset(DatasetVersion version,
if (fileSizeLimit == null || confirmedFileSize < fileSizeLimit) {
//set file size
- logger.fine("Setting file size: " + confirmedFileSize);
- dataFile.setFilesize(confirmedFileSize);
+ if (dataFile.getFilesize() < 0) {
+ logger.fine("Setting file size: " + confirmedFileSize);
+ dataFile.setFilesize(confirmedFileSize);
+ }
if (dataAccess instanceof S3AccessIO) {
((S3AccessIO) dataAccess).removeTempTag();
}
savedSuccess = true;
+ logger.info("directly uploaded file successfully saved. file size: "+dataFile.getFilesize());
}
} catch (IOException ioex) {
logger.warning("Failed to get file size, storage id, or failed to remove the temp tag on the saved S3 object" + dataFile.getStorageIdentifier() + " ("
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java
index 8c5dad237b1..27a2ab99376 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java
@@ -100,71 +100,48 @@ public IngestServiceShapefileHelper(File zippedShapefile, File rezipFolder){
//this.processFile(zippedShapefile, rezipFolder);
}
-
- private FileInputStream getFileInputStream(File fileObject){
- if (fileObject==null){
- return null;
- }
- try {
+
+ private FileInputStream getFileInputStream(File fileObject){
+ if (fileObject==null){
+ return null;
+ }
+ try {
return new FileInputStream(fileObject);
} catch (FileNotFoundException ex) {
logger.severe("Failed to create FileInputStream from File: " + fileObject.getAbsolutePath());
return null;
}
- }
-
- private void closeFileInputStream(FileInputStream fis){
- if (fis==null){
- return;
- }
+ }
+
+ private void closeFileInputStream(FileInputStream fis){
+ if (fis==null){
+ return;
+ }
try {
- fis.close();
+ fis.close();
} catch (IOException ex) {
logger.info("Failed to close FileInputStream");
}
- }
-
+ }
+
public boolean processFile() {
if ((!isValidFile(this.zippedShapefile))||(!isValidFolder(this.rezipFolder))){
return false;
}
-
- // (1) Use the ShapefileHandler to the .zip for a shapefile
- //
- FileInputStream shpfileInputStream = this.getFileInputStream(zippedShapefile);
- if (shpfileInputStream==null){
- return false;
- }
-
- this.shpHandler = new ShapefileHandler(shpfileInputStream);
- if (!shpHandler.containsShapefile()){
- logger.severe("Shapefile was incorrectly detected upon Ingest (FileUtil) and passed here");
- return false;
- }
-
- this.closeFileInputStream(shpfileInputStream);
-
- // (2) Rezip the shapefile pieces
- logger.info("rezipFolder: " + rezipFolder.getAbsolutePath());
- shpfileInputStream = this.getFileInputStream(zippedShapefile);
- if (shpfileInputStream==null){
- return false;
- }
-
- boolean rezipSuccess;
try {
- rezipSuccess = shpHandler.rezipShapefileSets(shpfileInputStream, rezipFolder);
+ this.shpHandler = new ShapefileHandler(zippedShapefile);
+ if (!shpHandler.containsShapefile()){
+ logger.severe("Shapefile was incorrectly detected upon Ingest (FileUtil) and passed here");
+ return false;
+ }
+ logger.info("rezipFolder: " + rezipFolder.getAbsolutePath());
+ return shpHandler.rezipShapefileSets(rezipFolder);
} catch (IOException ex) {
logger.severe("Shapefile was not correctly unpacked/repacked");
logger.severe("shpHandler message: " + shpHandler.errorMessage);
return false;
}
-
- this.closeFileInputStream(shpfileInputStream);
-
- return rezipSuccess;
-
// return createDataFiles(rezipFolder);
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java
index 50c24274bb2..fa56432cc3c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java
@@ -7,6 +7,9 @@
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetServiceBean;
+import edu.harvard.iq.dataverse.GlobalId;
+import edu.harvard.iq.dataverse.pidproviders.PidUtil;
+
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -40,7 +43,8 @@ public class DatasetExternalCitationsServiceBean implements java.io.Serializable
Arrays.asList(
"cites",
"references",
- "supplements"));
+ "supplements",
+ "is-supplement-to"));
static ArrayList outboundRelationships = new ArrayList(
Arrays.asList(
"is-cited-by",
@@ -59,12 +63,11 @@ public List parseCitations(JsonArray citations) {
if (inboundRelationships.contains(relationship)) {
Dataset localDs = null;
if (objectUri.contains("doi")) {
- String globalId = objectUri.replace("https://", "").replace("doi.org/", "doi:").toUpperCase().replace("DOI:", "doi:");
- localDs = datasetService.findByGlobalId(globalId);
+ localDs = datasetService.findByGlobalId(objectUri);
exCit.setDataset(localDs);
}
exCit.setCitedByUrl(subjectUri);
-
+
if (localDs != null && !exCit.getCitedByUrl().isEmpty()) {
datasetExternalCitations.add(exCit);
}
@@ -72,9 +75,9 @@ public List parseCitations(JsonArray citations) {
if (outboundRelationships.contains(relationship)) {
Dataset localDs = null;
if (subjectUri.contains("doi")) {
- String globalId = subjectUri.replace("https://", "").replace("doi.org/", "doi:").toUpperCase().replace("DOI:", "doi:");
- localDs = datasetService.findByGlobalId(globalId);
+ localDs = datasetService.findByGlobalId(subjectUri);
exCit.setDataset(localDs);
+
}
exCit.setCitedByUrl(objectUri);
diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
index a74474efa15..5bdbeac031d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
@@ -168,25 +168,12 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
}
}
- // Note that this SQL line in the code below:
- // datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))
- // behaves somewhat counter-intuitively if the versionnumber and/or
- // minorversionnumber is/are NULL - it results in an empty string
- // (NOT the string "{dataset_id}:", in other words). Some harvested
- // versions do not have version numbers (only the ones harvested from
- // other Dataverses!) It works fine
- // for our purposes below, because we are simply counting the selected
- // lines - i.e. we don't care if some of these lines are empty.
- // But do not use this notation if you need the values returned to
- // meaningfully identify the datasets!
-
-
Query query = em.createNativeQuery(
"select count(*)\n"
+ "from (\n"
- + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n"
+ + "select DISTINCT ON (datasetversion.dataset_id) datasetversion.dataset_id \n"
+ "from datasetversion\n"
+ "join dataset on dataset.id = datasetversion.dataset_id\n"
+ ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
@@ -194,7 +181,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
+ ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ")
+ "and \n"
+ dataLocationLine // be careful about adding more and statements after this line.
- + "group by dataset_id \n"
+ + " order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber desc\n"
+") sub_temp"
);
logger.log(Level.FINE, "Metric query: {0}", query);
@@ -207,15 +194,15 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio
// A published local datasets may have more than one released version!
// So that's why we have to jump through some extra hoops below
// in order to select the latest one:
- String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" +
+ String originClause = "(datasetversion.id in\n" +
"(\n" +
- "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" +
+ "select DISTINCT ON (datasetversion.dataset_id) datasetversion.id\n" +
" from datasetversion\n" +
" join dataset on dataset.id = datasetversion.dataset_id\n" +
" where versionstate='RELEASED'\n" +
" and dataset.harvestingclient_id is null\n" +
" and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" +
- " group by dataset_id\n" +
+ " order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber desc\n" +
"))\n";
if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
@@ -273,7 +260,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) {
Query query = em.createNativeQuery(
"select count(*)\n"
+ "from (\n"
- + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max\n"
+ + "select DISTINCT ON (datasetversion.dataset_id) datasetversion.id\n"
+ "from datasetversion\n"
+ "join dataset on dataset.id = datasetversion.dataset_id\n"
+ ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
@@ -281,7 +268,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) {
+ ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
+ "and \n"
+ dataLocationLine // be careful about adding more and statements after this line.
- + "group by dataset_id \n"
+ + " order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber desc \n"
+") sub_temp"
);
logger.log(Level.FINE, "Metric query: {0}", query);
@@ -322,9 +309,9 @@ public long filesToMonth(String yyyymm, Dataverse d) {
+ "select count(*)\n"
+ "from filemetadata\n"
+ "join datasetversion on datasetversion.id = filemetadata.datasetversion_id\n"
- + "where datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in \n"
+ + "where datasetversion.id in \n"
+ "(\n"
- + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
+ + "select DISTINCT ON (datasetversion.dataset_id) datasetversion.id \n"
+ "from datasetversion\n"
+ "join dataset on dataset.id = datasetversion.dataset_id\n"
+ ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
@@ -332,7 +319,7 @@ public long filesToMonth(String yyyymm, Dataverse d) {
+ ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
+ "and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n"
+ "and dataset.harvestingclient_id is null\n"
- + "group by dataset_id \n"
+ + "order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber \n"
+ ");"
);
logger.log(Level.FINE, "Metric query: {0}", query);
@@ -345,9 +332,9 @@ public long filesPastDays(int days, Dataverse d) {
+ "select count(*)\n"
+ "from filemetadata\n"
+ "join datasetversion on datasetversion.id = filemetadata.datasetversion_id\n"
- + "where datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in \n"
+ + "where datasetversion.id in \n"
+ "(\n"
- + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
+ + "select DISTINCT ON (datasetversion.dataset_id) datasetversion.id \n"
+ "from datasetversion\n"
+ "join dataset on dataset.id = datasetversion.dataset_id\n"
+ ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
@@ -355,7 +342,7 @@ public long filesPastDays(int days, Dataverse d) {
+ "and releasetime > current_date - interval '" + days + "' day\n"
+ ((d == null) ? "" : "AND dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
+ "and dataset.harvestingclient_id is null\n"
- + "group by dataset_id \n"
+ + "order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber desc \n"
+ ");"
);
logger.log(Level.FINE, "Metric query: {0}", query);
diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java
index f6d142aac96..250eae7e5fc 100644
--- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java
+++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java
@@ -36,9 +36,9 @@ public abstract class AbstractPidProvider implements PidProvider {
private String datafilePidFormat = null;
- private HashSet managedSet;
+ protected HashSet managedSet = new HashSet();
- private HashSet excludedSet;
+ protected HashSet excludedSet = new HashSet();
private String id;
private String label;
@@ -47,8 +47,6 @@ protected AbstractPidProvider(String id, String label, String protocol) {
this.id = id;
this.label = label;
this.protocol = protocol;
- this.managedSet = new HashSet();
- this.excludedSet = new HashSet();
}
protected AbstractPidProvider(String id, String label, String protocol, String authority, String shoulder,
@@ -60,8 +58,12 @@ protected AbstractPidProvider(String id, String label, String protocol, String a
this.shoulder = shoulder;
this.identifierGenerationStyle = identifierGenerationStyle;
this.datafilePidFormat = datafilePidFormat;
- this.managedSet = new HashSet(Arrays.asList(managedList.split(",\\s")));
- this.excludedSet = new HashSet(Arrays.asList(excludedList.split(",\\s")));
+ if(!managedList.isEmpty()) {
+ this.managedSet.addAll(Arrays.asList(managedList.split(",\\s")));
+ }
+ if(!excludedList.isEmpty()) {
+ this.excludedSet.addAll(Arrays.asList(excludedList.split(",\\s")));
+ }
if (logger.isLoggable(Level.FINE)) {
Iterator iter = managedSet.iterator();
while (iter.hasNext()) {
@@ -313,10 +315,17 @@ protected GlobalId parsePersistentId(String protocol, String identifierString) {
}
public GlobalId parsePersistentId(String protocol, String authority, String identifier) {
+ return parsePersistentId(protocol, authority, identifier, false);
+ }
+
+ public GlobalId parsePersistentId(String protocol, String authority, String identifier, boolean isCaseInsensitive) {
logger.fine("Parsing: " + protocol + ":" + authority + getSeparator() + identifier + " in " + getId());
if (!PidProvider.isValidGlobalId(protocol, authority, identifier)) {
return null;
}
+ if(isCaseInsensitive) {
+ identifier = identifier.toUpperCase();
+ }
// Check authority/identifier if this is a provider that manages specific
// identifiers
// /is not one of the unmanaged providers that has null authority
@@ -333,7 +342,7 @@ public GlobalId parsePersistentId(String protocol, String authority, String iden
logger.fine("managed in " + getId() + ": " + getManagedSet().contains(cleanIdentifier));
logger.fine("excluded from " + getId() + ": " + getExcludedSet().contains(cleanIdentifier));
- if (!(((authority.equals(getAuthority()) && identifier.startsWith(getShoulder()))
+ if (!(((authority.equals(getAuthority()) && identifier.startsWith(getShoulder().toUpperCase()))
|| getManagedSet().contains(cleanIdentifier)) && !getExcludedSet().contains(cleanIdentifier))) {
return null;
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java
index 02a7dedce47..70ce1ec4c14 100644
--- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java
+++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java
@@ -1,6 +1,7 @@
package edu.harvard.iq.dataverse.pidproviders.doi;
import java.util.Arrays;
+import java.util.HashSet;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -26,9 +27,24 @@ public abstract class AbstractDOIProvider extends AbstractPidProvider {
public AbstractDOIProvider(String id, String label, String providerAuthority, String providerShoulder, String identifierGenerationStyle, String datafilePidFormat, String managedList, String excludedList) {
super(id, label, DOI_PROTOCOL, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, managedList, excludedList);
+ //Create case insensitive (converted toUpperCase) managedSet and excludedSet
+ managedSet = clean(managedSet, "managed");
+ excludedSet = clean(excludedSet, "excluded");
}
- //For Unmanged provider
+ private HashSet clean(HashSet originalSet, String setName) {
+ HashSet cleanSet = new HashSet();
+ for(String entry: originalSet) {
+ if(entry.startsWith(DOI_PROTOCOL)) {
+ cleanSet.add(DOI_PROTOCOL + entry.substring(DOI_PROTOCOL.length()).toUpperCase());
+ } else {
+ logger.warning("Non-DOI found in " + setName + " set of pidProvider id: " + getId() + ": " + entry + ". Entry is being dropped.");
+ }
+ }
+ return cleanSet;
+ }
+
+ //For Unmanaged provider
public AbstractDOIProvider(String name, String label) {
super(name, label, DOI_PROTOCOL);
}
@@ -67,7 +83,7 @@ public GlobalId parsePersistentId(String protocol, String authority, String iden
if (!DOI_PROTOCOL.equals(protocol)) {
return null;
}
- return super.parsePersistentId(protocol, authority, identifier);
+ return super.parsePersistentId(protocol, authority, identifier, true);
}
public String getUrlPrefix() {
diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java
index a74a9f34bc9..8199b7d9c9f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java
+++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java
@@ -1317,8 +1317,8 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject, boolean
}
if (StringUtils.isNotBlank(softwareName)) {
if (StringUtils.isNotBlank(softwareVersion)) {
+ softwareName = softwareName + ", " + softwareVersion;
}
- softwareName = softwareName + ", " + softwareVersion;
descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten);
XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, softwareName);
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java
index 5630844fb32..b07cd027a01 100644
--- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java
+++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java
@@ -226,8 +226,7 @@ protected String getProviderKeyName() {
@Override
public String getProviderType() {
- // TODO Auto-generated method stub
- return null;
+ return TYPE;
}
public String getMdsUrl() {
diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java
index 9d61663d034..1f03d8a6cfb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java
+++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java
@@ -59,6 +59,11 @@
* service.
* As of now, it only does the registration updates, to accommodate
* the modifyRegistration datasets API sub-command.
+ *
+ * Note that while Handles are nominally case sensitive, handle.net is
+ * configured to be case-insensitive and Dataverse makes case-insensitve
+ * database look-ups to find Handles (See #11003). That said, database
+ * entries are stored in the case matching the configuration of the provider.
*/
public class HandlePidProvider extends AbstractPidProvider {
diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java
index 7b55292350f..2cc0d41ede7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java
+++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java
@@ -24,6 +24,9 @@
* overridable by a configurable parameter to support use of an external
* resolver.
*
+ * Note that while PermaLinks are nominally case sensitive, Dataverse makes
+ * case-insensitve database look-ups to find them (See #11003). That said, database
+ * entries are stored in the case matching the configuration of the provider.
*/
public class PermaLinkPidProvider extends AbstractPidProvider {
diff --git a/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrl.java b/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrl.java
index beb676f60d1..63b5bf03ea7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrl.java
+++ b/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrl.java
@@ -30,7 +30,7 @@ public class PrivateUrl {
public PrivateUrl(RoleAssignment roleAssignment, Dataset dataset, String dataverseSiteUrl) {
this.token = roleAssignment.getPrivateUrlToken();
- this.link = dataverseSiteUrl + "/privateurl.xhtml?token=" + token;
+ this.link = dataverseSiteUrl + "/previewurl.xhtml?token=" + token;
this.dataset = dataset;
this.roleAssignment = roleAssignment;
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlPage.java b/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlPage.java
index 9af4bb6af9e..17c622be9e2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlPage.java
@@ -1,6 +1,10 @@
package edu.harvard.iq.dataverse.privateurl;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.DatasetServiceBean;
+import edu.harvard.iq.dataverse.DataverseRequestServiceBean;
import edu.harvard.iq.dataverse.DataverseSession;
+import edu.harvard.iq.dataverse.PermissionsWrapper;
import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
import java.io.Serializable;
import java.util.logging.Logger;
@@ -20,8 +24,14 @@ public class PrivateUrlPage implements Serializable {
@EJB
PrivateUrlServiceBean privateUrlService;
+ @EJB
+ DatasetServiceBean datasetServiceBean;
@Inject
DataverseSession session;
+ @Inject
+ PermissionsWrapper permissionsWrapper;
+ @Inject
+ DataverseRequestServiceBean dvRequestService;
/**
* The unique string used to look up a PrivateUrlUser and the associated
@@ -34,7 +44,16 @@ public String init() {
PrivateUrlRedirectData privateUrlRedirectData = privateUrlService.getPrivateUrlRedirectDataFromToken(token);
String draftDatasetPageToBeRedirectedTo = privateUrlRedirectData.getDraftDatasetPageToBeRedirectedTo() + "&faces-redirect=true";
PrivateUrlUser privateUrlUser = privateUrlRedirectData.getPrivateUrlUser();
- session.setUser(privateUrlUser);
+ boolean sessionUserCanViewUnpublishedDataset = false;
+ if (session.getUser().isAuthenticated()){
+ Long datasetId = privateUrlUser.getDatasetId();
+ Dataset dataset = datasetServiceBean.find(datasetId);
+ sessionUserCanViewUnpublishedDataset = permissionsWrapper.canViewUnpublishedDataset(dvRequestService.getDataverseRequest(), dataset);
+ }
+ if(!sessionUserCanViewUnpublishedDataset){
+ //Only Reset if user cannot view this Draft Version
+ session.setUser(privateUrlUser);
+ }
logger.info("Redirecting PrivateUrlUser '" + privateUrlUser.getIdentifier() + "' to " + draftDatasetPageToBeRedirectedTo);
return draftDatasetPageToBeRedirectedTo;
} catch (Exception ex) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/privateurl/package-info.java b/src/main/java/edu/harvard/iq/dataverse/privateurl/package-info.java
index 6e939c1bb6d..1310e0eb199 100644
--- a/src/main/java/edu/harvard/iq/dataverse/privateurl/package-info.java
+++ b/src/main/java/edu/harvard/iq/dataverse/privateurl/package-info.java
@@ -1,19 +1,19 @@
/**
- * Private URL for unpublished datasets.
+ * Preview URL for unpublished datasets.
*
- * The Private URL feature has been implemented as a specialized role assignment
+ * The Preview (formerly Private) URL feature has been implemented as a specialized role assignment
* with an associated token that permits read-only access to the metadata and
* all files (regardless of if the files are restricted or not) of a draft
* version of a dataset.
*
- * As of this note, a second option - to create a Private URL that provides an
+ * As of this note, a second option - to create a Preview URL that provides an
* anonymized view of the dataset has been added. This option works the same as
* the original except that it hides author names in the citation block, hides
* the values for an admin specified list of metadata fields, disables citation
* downloads, and disables API access (except for file and file thumbnail
* downloads which are used by the UI).
*
- * The primary use case for a Private URL is for journal editors to send a link
+ * The primary use case for a Preview URL is for journal editors to send a link
* to reviewers of a dataset before publication. In most cases, these journal
* editors do not permit depositors to publish on their own, which is to say
* they only allow depositors to have the "Contributor" role on the datasets
@@ -24,42 +24,42 @@
* the depositor, who is in charge of both the security of the dataset and the
* timing of when the dataset is published.
*
- * A secondary use case for a Private URL is for depositors who have the ability
+ * A secondary use case for a Preview URL is for depositors who have the ability
* to manage permissions on their dataset (depositors who have the "Curator" or
* "Admin" role, which grants much more power than the "Contributor" role) to
* send a link to coauthors or other trusted parties to preview the dataset
* before the depositors publish the dataset on their own. For better security,
* these depositors could ask their coauthors to create Dataverse accounts and
- * assign roles to them directly, rather than using a Private URL which requires
+ * assign roles to them directly, rather than using a Preview URL which requires
* no username or password.
*
* As of this note, a second option aimed specifically at the review use case -
- * to create a Private URL that provides an anonymized view of the dataset - has
+ * to create a Preview URL that provides an anonymized view of the dataset - has
* been added. This option works the same as the original except that it hides
* author names in the citation block, hides the values for an admin specified
* list of metadata fields, disables citation downloads, and disables API access
* (except for file and file thumbnail downloads which are used by the UI).
*
- * The token associated with the Private URL role assignment that can be used
+ * The token associated with the Preview URL role assignment that can be used
* either in the GUI or, for the non-anonymized-access option, via the API to
* elevate privileges beyond what a "Guest" can see. The ability to use a
- * Private URL token via API was added mostly to facilitate automated testing of
- * the feature but the far more common case is expected to be use of the Private
+ * Preview URL token via API was added mostly to facilitate automated testing of
+ * the feature but the far more common case is expected to be use of the Preview
* URL token in a link that is clicked to open a browser, similar to links
* shared via Dropbox, Google, etc.
*
- * When reviewers click a Private URL their browser sessions are set to the
+ * When reviewers click a Preview URL their browser sessions are set to the
* "{@link edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser}" that
* has the "Member" role only on the dataset in question and redirected to that
* dataset, where they will see an indication in blue at the top of the page
* that they are viewing an unpublished dataset. If the reviewer happens to be
* logged into Dataverse already, clicking the link will log them out because
* the review is meant to be blind. Because the dataset is always in draft when
- * a Private URL is in effect, no downloads or any other activity by the
- * reviewer are logged to the guestbook. All reviewers click the same Private
+ * a Preview URL is in effect, no downloads or any other activity by the
+ * reviewer are logged to the guestbook. All reviewers click the same Preview
* URL containing the same token, and with the exception of an IP address being
* logged, it should be impossible to trace which reviewers have clicked a
- * Private URL. If the reviewer navigates to the home page, the session is set
+ * Preview URL. If the reviewer navigates to the home page, the session is set
* to the Guest user and they will see what a Guest would see.
*
* The "Member" role is used because it contains the necessary read-only
@@ -76,51 +76,51 @@
* version. A Member can also download restricted files that have been deleted
* from previously published versions.
*
- * Likewise, when a Private URL token is used via API, commands are executed
+ * Likewise, when a Preview URL token is used via API, commands are executed
* using the "PrivateUrlUser" that has the "Member" role only on the dataset in
* question. This means that read-only operations such as downloads of the
- * dataset's files are permitted. The Search API does not respect the Private
+ * dataset's files are permitted. The Search API does not respect the Preview
* URL token but you can download files using the Access API, and, with the
* non-anonymized-access option, download unpublished metadata using the Native
* API.
*
- * A Private URL cannot be created for a published version of a dataset. In the
+ * A Preview URL cannot be created for a published version of a dataset. In the
* GUI, you will be reminded of this fact with a popup. The API will explain
* this as well.
*
- * An anonymized-access Private URL can't be created if any published dataset
+ * An anonymized-access Preview URL can't be created if any published dataset
* version exists. The primary reason for this is that, since datasets have
* DOIs, the full metadata about published versions is available directly from
* the DOI provider. (While the metadata for that version could be somewhat
* different, in practice it would probably provide a means of identifying
* some/all of the authors).
*
- * If a draft dataset containing a Private URL is
- * published, the Private URL is deleted. This means that reviewers who click
+ * If a draft dataset containing a Preview URL is
+ * published, the Preview URL is deleted. This means that reviewers who click
* the link after publication will see a 404.
*
- * If a post-publication draft containing a Private URL is deleted, the Private
+ * If a post-publication draft containing a Preview URL is deleted, the Preview
* URL is deleted. This is to ensure that if a new draft is created in the
* future, a new token will be used.
*
- * The creation and deletion of a Private URL are limited to the "Curator" and
+ * The creation and deletion of a Preview URL are limited to the "Curator" and
* "Admin" roles because only those roles have the permission called
* "ManageDatasetPermissions", which is the permission used by the
* "AssignRoleCommand" and "RevokeRoleCommand" commands. If you have the
- * permission to create or delete a Private URL, the fact that a Private URL is
+ * permission to create or delete a Preview URL, the fact that a Preview URL is
* enabled for a dataset will be indicated in blue at the top of the page.
* Success messages are shown at the top of the page when you create or delete a
- * Private URL. In the GUI, deleting a Private URL is called "disabling" and you
+ * Preview URL. In the GUI, deleting a Preview URL is called "disabling" and you
* will be prompted for a confirmation. No matter what you call it the role is
- * revoked. You can also delete a Private URL by revoking the role.
+ * revoked. You can also delete a Preview URL by revoking the role.
*
* A "Contributor" does not have the "ManageDatasetPermissions" permission and
- * cannot see "Permissions" nor "Private URL" under the "Edit" menu of their
- * dataset. When a Curator or Admin has enabled a Private URL on a Contributor's
- * dataset, the Contributor does not see a visual indication that a Private URL
+ * cannot see "Permissions" nor "Preview URL" under the "Edit" menu of their
+ * dataset. When a Curator or Admin has enabled a Preview URL on a Contributor's
+ * dataset, the Contributor does not see a visual indication that a Preview URL
* has been enabled for their dataset.
*
- * There is no way for an "Admin" or "Curator" to see when a Private URL was
+ * There is no way for an "Admin" or "Curator" to see when a Preview URL was
* created or deleted for a dataset but someone who has access to the database
* can see that the following commands are logged to the "actionlogrecord"
* database table:
@@ -129,7 +129,7 @@
*
{@link edu.harvard.iq.dataverse.engine.command.impl.CreatePrivateUrlCommand}
* {@link edu.harvard.iq.dataverse.engine.command.impl.DeletePrivateUrlCommand}
*
- * See also the Private URL To Unpublished Dataset BRD at
* https://docs.google.com/document/d/1FT47QkZKcmjSgRnePaJO2g1nzcotLyN3Yb2ORvBr6cs/edit?usp=sharing
*/
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
index a8cf9ed519b..4efd339ee46 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
@@ -135,6 +135,9 @@ public class IndexServiceBean {
@EJB
DatasetFieldServiceBean datasetFieldService;
+ @Inject
+ DatasetVersionFilesServiceBean datasetVersionFilesServiceBean;
+
public static final String solrDocIdentifierDataverse = "dataverse_";
public static final String solrDocIdentifierFile = "datafile_";
public static final String solrDocIdentifierDataset = "dataset_";
@@ -420,7 +423,7 @@ synchronized private static Dataset getNextToIndex(Long id, Dataset d) {
public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) {
try {
acquirePermitFromSemaphore();
- doAyncIndexDataset(dataset, doNormalSolrDocCleanUp);
+ doAsyncIndexDataset(dataset, doNormalSolrDocCleanUp);
} catch (InterruptedException e) {
String failureLogText = "Indexing failed: interrupted. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString();
failureLogText += "\r\n" + e.getLocalizedMessage();
@@ -430,7 +433,7 @@ public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) {
}
}
- private void doAyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) {
+ private void doAsyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) {
Long id = dataset.getId();
Dataset next = getNextToIndex(id, dataset); // if there is an ongoing index job for this dataset, next is null (ongoing index job will reindex the newest version after current indexing finishes)
while (next != null) {
@@ -451,7 +454,7 @@ public void asyncIndexDatasetList(List datasets, boolean doNormalSolrDo
for(Dataset dataset : datasets) {
try {
acquirePermitFromSemaphore();
- doAyncIndexDataset(dataset, true);
+ doAsyncIndexDataset(dataset, true);
} catch (InterruptedException e) {
String failureLogText = "Indexing failed: interrupted. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString();
failureLogText += "\r\n" + e.getLocalizedMessage();
@@ -1018,6 +1021,8 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, SetFeature Request/Idea: Harvest metadata values that aren't from a list of controlled values #9992
@@ -1296,7 +1299,6 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set findPermissionsInSolrOnly() throws SearchException {
String dtype = dvObjectService.getDtype(id);
if (dtype == null) {
permissionInSolrOnly.add(docId);
- }
- if (dtype.equals(DType.Dataset.getDType())) {
+ }else if (dtype.equals(DType.Dataset.getDType())) {
List states = datasetService.getVersionStates(id);
if (states != null) {
String latestState = states.get(states.size() - 1);
@@ -2252,7 +2253,7 @@ public List findPermissionsInSolrOnly() throws SearchException {
} else if (dtype.equals(DType.DataFile.getDType())) {
List states = dataFileService.findVersionStates(id);
Set strings = states.stream().map(VersionState::toString).collect(Collectors.toSet());
- logger.fine("States for " + docId + ": " + String.join(", ", strings));
+ logger.finest("States for " + docId + ": " + String.join(", ", strings));
if (docId.endsWith("draft_permission")) {
if (!states.contains(VersionState.DRAFT)) {
permissionInSolrOnly.add(docId);
@@ -2266,7 +2267,7 @@ public List findPermissionsInSolrOnly() throws SearchException {
permissionInSolrOnly.add(docId);
} else {
if (!dataFileService.isInReleasedVersion(id)) {
- logger.fine("Adding doc " + docId + " to list of permissions in Solr only");
+ logger.finest("Adding doc " + docId + " to list of permissions in Solr only");
permissionInSolrOnly.add(docId);
}
}
@@ -2407,6 +2408,11 @@ public void deleteHarvestedDocuments(Dataset harvestedDataset) {
solrIdsOfDocumentsToDelete.add(solrDocIdentifierFile + datafile.getId());
}
+ deleteHarvestedDocuments(solrIdsOfDocumentsToDelete);
+ }
+
+ public void deleteHarvestedDocuments(List solrIdsOfDocumentsToDelete) {
+
logger.fine("attempting to delete the following documents from the index: " + StringUtils.join(solrIdsOfDocumentsToDelete, ","));
IndexResponse resultOfAttemptToDeleteDocuments = solrIndexService.deleteMultipleSolrIds(solrIdsOfDocumentsToDelete);
logger.fine("result of attempt to delete harvested documents: " + resultOfAttemptToDeleteDocuments + "\n");
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java
index ef27a5eefaf..1f1137016f2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java
@@ -291,5 +291,6 @@ more targeted results for just datasets. The format is YYYY (i.e.
public static final String DATASET_VALID = "datasetValid";
public static final String DATASET_LICENSE = "license";
+ public static final String FILE_COUNT = "fileCount";
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index 4f3f6e46e48..9328dd03ca2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -34,6 +34,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.MissingResourceException;
import java.util.Optional;
import java.util.Set;
import java.util.logging.Logger;
@@ -1231,40 +1232,33 @@ public String getTypeFromFilterQuery(String filterQuery) {
}
public List getFriendlyNamesFromFilterQuery(String filterQuery) {
-
-
- if ((filterQuery == null)||
- (datasetfieldFriendlyNamesBySolrField == null)||
- (staticSolrFieldFriendlyNamesBySolrField==null)){
+
+ if ((filterQuery == null) ||
+ (datasetfieldFriendlyNamesBySolrField == null) ||
+ (staticSolrFieldFriendlyNamesBySolrField == null)) {
return null;
}
-
- if(!filterQuery.contains(":")) {
+
+ if (!filterQuery.contains(":")) {
return null;
}
-
+
int index = filterQuery.indexOf(":");
String key = filterQuery.substring(0,index);
String value = filterQuery.substring(index+1);
- List friendlyNames = new ArrayList<>();
+ // friendlyNames get 2 entries : key and value
+ List friendlyNames = new ArrayList<>(2);
+ // Get dataset field friendly name from default ressource bundle file
String datasetfieldFriendyName = datasetfieldFriendlyNamesBySolrField.get(key);
if (datasetfieldFriendyName != null) {
friendlyNames.add(datasetfieldFriendyName);
} else {
+ // Get non dataset field friendly name from "staticSearchFields" ressource bundle file
String nonDatasetSolrField = staticSolrFieldFriendlyNamesBySolrField.get(key);
if (nonDatasetSolrField != null) {
friendlyNames.add(nonDatasetSolrField);
- } else if (key.equals(SearchFields.PUBLICATION_STATUS)) {
- /**
- * @todo Refactor this quick fix for
- * https://github.com/IQSS/dataverse/issues/618 . We really need
- * to get rid of all the reflection that's happening with
- * solrQueryResponse.getStaticSolrFieldFriendlyNamesBySolrField()
- * and
- */
- friendlyNames.add("Publication Status");
} else {
// meh. better than nuthin'
friendlyNames.add(key);
@@ -1276,9 +1270,13 @@ public List getFriendlyNamesFromFilterQuery(String filterQuery) {
String valueWithoutQuotes = noTrailingQuote;
if (key.equals(SearchFields.METADATA_TYPES) && getDataverse() != null && getDataverse().getMetadataBlockFacets() != null) {
- Optional friendlyName = getDataverse().getMetadataBlockFacets().stream().filter(block -> block.getMetadataBlock().getName().equals(valueWithoutQuotes)).findFirst().map(block -> block.getMetadataBlock().getLocaleDisplayFacet());
+ Optional friendlyName = getDataverse().getMetadataBlockFacets()
+ .stream()
+ .filter(block -> block.getMetadataBlock().getName().equals(valueWithoutQuotes))
+ .findFirst()
+ .map(block -> block.getMetadataBlock().getLocaleDisplayFacet());
logger.fine(String.format("action=getFriendlyNamesFromFilterQuery key=%s value=%s friendlyName=%s", key, value, friendlyName));
- if(friendlyName.isPresent()) {
+ if (friendlyName.isPresent()) {
friendlyNames.add(friendlyName.get());
return friendlyNames;
}
@@ -1290,7 +1288,15 @@ public List getFriendlyNamesFromFilterQuery(String filterQuery) {
}
}
- friendlyNames.add(valueWithoutQuotes);
+ // Get value friendly name from default ressource bundle file
+ String valueFriendlyName;
+ try {
+ valueFriendlyName = BundleUtil.getStringFromPropertyFile(noTrailingQuote, "Bundle");
+ } catch (MissingResourceException e) {
+ valueFriendlyName = noTrailingQuote;
+ }
+
+ friendlyNames.add(valueFriendlyName);
return friendlyNames;
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
index ee93c49ad34..3fd97d418c0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
@@ -35,6 +35,8 @@
import jakarta.inject.Inject;
import jakarta.inject.Named;
import jakarta.persistence.NoResultException;
+
+import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.SortClause;
import org.apache.solr.client.solrj.SolrServerException;
@@ -52,6 +54,8 @@ public class SearchServiceBean {
private static final Logger logger = Logger.getLogger(SearchServiceBean.class.getCanonicalName());
+ private static final String ALL_GROUPS = "*";
+
/**
* We're trying to make the SearchServiceBean lean, mean, and fast, with as
* few injections of EJBs as possible.
@@ -182,6 +186,7 @@ public SolrQueryResponse search(
SolrQuery solrQuery = new SolrQuery();
query = SearchUtil.sanitizeQuery(query);
+
solrQuery.setQuery(query);
if (sortField != null) {
// is it ok not to specify any sort? - there are cases where we
@@ -323,24 +328,13 @@ public SolrQueryResponse search(
}
}
- //I'm not sure if just adding null here is good for hte permissions system... i think it needs something
- if(dataverses != null) {
- for(Dataverse dataverse : dataverses) {
- // -----------------------------------
- // PERMISSION FILTER QUERY
- // -----------------------------------
- String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe, addFacets);
- if (permissionFilterQuery != null) {
- solrQuery.addFilterQuery(permissionFilterQuery);
- }
- }
- } else {
- String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe, addFacets);
- if (permissionFilterQuery != null) {
- solrQuery.addFilterQuery(permissionFilterQuery);
- }
+ // -----------------------------------
+ // PERMISSION FILTER QUERY
+ // -----------------------------------
+ String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, onlyDatatRelatedToMe, addFacets);
+ if (!StringUtils.isBlank(permissionFilterQuery)) {
+ solrQuery.addFilterQuery(permissionFilterQuery);
}
-
/**
* @todo: do sanity checking... throw error if negative
@@ -503,7 +497,8 @@ public SolrQueryResponse search(
Long retentionEndDate = (Long) solrDocument.getFieldValue(SearchFields.RETENTION_END_DATE);
//
Boolean datasetValid = (Boolean) solrDocument.getFieldValue(SearchFields.DATASET_VALID);
-
+ Long fileCount = (Long) solrDocument.getFieldValue(SearchFields.FILE_COUNT);
+
List matchedFields = new ArrayList<>();
SolrSearchResult solrSearchResult = new SolrSearchResult(query, name);
@@ -576,6 +571,7 @@ public SolrQueryResponse search(
solrSearchResult.setDeaccessionReason(deaccessionReason);
solrSearchResult.setDvTree(dvTree);
solrSearchResult.setDatasetValid(datasetValid);
+ solrSearchResult.setFileCount(fileCount);
if (Boolean.TRUE.equals((Boolean) solrDocument.getFieldValue(SearchFields.IS_HARVESTED))) {
solrSearchResult.setHarvested(true);
@@ -994,7 +990,7 @@ public String getCapitalizedName(String name) {
*
* @return
*/
- private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe, boolean addFacets) {
+ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, boolean onlyDatatRelatedToMe, boolean addFacets) {
User user = dataverseRequest.getUser();
if (user == null) {
@@ -1003,38 +999,22 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ
if (solrQuery == null) {
throw new NullPointerException("solrQuery cannot be null");
}
- /**
- * @todo For people who are not logged in, should we show stuff indexed
- * with "AllUsers" group or not? If so, uncomment the allUsersString
- * stuff below.
- */
-// String allUsersString = IndexServiceBean.getGroupPrefix() + AllUsers.get().getAlias();
-// String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + allUsersString + ")";
- String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + ")";
-// String publicOnly = "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getPublicGroupString();
- // initialize to public only to be safe
- String dangerZoneNoSolrJoin = null;
-
+
if (user instanceof PrivateUrlUser) {
user = GuestUser.get();
}
- AuthenticatedUser au = null;
+ ArrayList groupList = new ArrayList();
+ AuthenticatedUser au = null;
Set groups;
-
- if (user instanceof GuestUser) {
- // Yes, GuestUser may be part of one or more groups; such as IP Groups.
- groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest));
- } else {
- if (!(user instanceof AuthenticatedUser)) {
- logger.severe("Should never reach here. A User must be an AuthenticatedUser or a Guest");
- throw new IllegalStateException("A User must be an AuthenticatedUser or a Guest");
- }
+ boolean avoidJoin = FeatureFlags.AVOID_EXPENSIVE_SOLR_JOIN.enabled();
+
+ if (user instanceof AuthenticatedUser) {
au = (AuthenticatedUser) user;
-
+
// ----------------------------------------------------
- // (3) Is this a Super User?
+ // Is this a Super User?
// If so, they can see everything
// ----------------------------------------------------
if (au.isSuperuser()) {
@@ -1042,187 +1022,76 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ
// to see everything in Solr with no regard to permissions. But it's
// been this way since Dataverse 4.0. So relax. :)
- return dangerZoneNoSolrJoin;
+ return buildPermissionFilterQuery(avoidJoin, ALL_GROUPS);
}
-
+
// ----------------------------------------------------
- // (4) User is logged in AND onlyDatatRelatedToMe == true
+ // User is logged in AND onlyDatatRelatedToMe == true
// Yes, give back everything -> the settings will be in
- // the filterqueries given to search
+ // the filterqueries given to search
// ----------------------------------------------------
if (onlyDatatRelatedToMe == true) {
if (systemConfig.myDataDoesNotUsePermissionDocs()) {
logger.fine("old 4.2 behavior: MyData is not using Solr permission docs");
- return dangerZoneNoSolrJoin;
+ return buildPermissionFilterQuery(avoidJoin, ALL_GROUPS);
} else {
// fall-through
logger.fine("new post-4.2 behavior: MyData is using Solr permission docs");
}
}
-
// ----------------------------------------------------
- // (5) Work with Authenticated User who is not a Superuser
+ // Work with Authenticated User who is not a Superuser
// ----------------------------------------------------
-
- groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest));
+ groupList.add(IndexServiceBean.getGroupPerUserPrefix() + au.getId());
}
- if (FeatureFlags.AVOID_EXPENSIVE_SOLR_JOIN.enabled()) {
- /**
- * Instead of doing a super expensive join, we will rely on the
- * new boolean field PublicObject:true for public objects. This field
- * is indexed on the content document itself, rather than a permission
- * document. An additional join will be added only for any extra,
- * more restricted groups that the user may be part of.
- * **Note the experimental nature of this optimization**.
- */
- StringBuilder sb = new StringBuilder();
- StringBuilder sbgroups = new StringBuilder();
-
- // All users, guests and authenticated, should see all the
- // documents marked as publicObject_b:true, at least:
- sb.append(SearchFields.PUBLIC_OBJECT + ":" + true);
+ // In addition to the user referenced directly, we will also
+ // add joins on all the non-public groups that may exist for the
+ // user:
- // One or more groups *may* also be available for this user. Once again,
- // do note that Guest users may be part of some groups, such as
- // IP groups.
-
- int groupCounter = 0;
+ // Authenticated users, *and the GuestUser*, may be part of one or more groups; such
+ // as IP Groups.
+ groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest));
- // An AuthenticatedUser should also be able to see all the content
- // on which they have direct permissions:
- if (au != null) {
- groupCounter++;
- sbgroups.append(IndexServiceBean.getGroupPerUserPrefix() + au.getId());
- }
-
- // In addition to the user referenced directly, we will also
- // add joins on all the non-public groups that may exist for the
- // user:
- for (Group group : groups) {
- String groupAlias = group.getAlias();
- if (groupAlias != null && !groupAlias.isEmpty() && !groupAlias.startsWith("builtIn")) {
- groupCounter++;
- if (groupCounter > 1) {
- sbgroups.append(" OR ");
- }
- sbgroups.append(IndexServiceBean.getGroupPrefix() + groupAlias);
- }
- }
-
- if (groupCounter > 1) {
- // If there is more than one group, the parentheses must be added:
- sbgroups.insert(0, "(");
- sbgroups.append(")");
- }
-
- if (groupCounter > 0) {
- // If there are any groups for this user, an extra join must be
- // added to the query, and the extra sub-query must be added to
- // the combined Solr query:
- sb.append(" OR {!join from=" + SearchFields.DEFINITION_POINT + " to=id v=$q1}");
- // Add the subquery to the combined Solr query:
- solrQuery.setParam("q1", SearchFields.DISCOVERABLE_BY + ":" + sbgroups.toString());
- logger.info("The sub-query q1 set to " + SearchFields.DISCOVERABLE_BY + ":" + sbgroups.toString());
- }
-
- String ret = sb.toString();
- logger.fine("Returning experimental query: " + ret);
- return ret;
- }
-
- // END OF EXPERIMENTAL OPTIMIZATION
-
- // Old, un-optimized way of handling permissions.
- // Largely left intact, minus the lookups that have already been performed
- // above.
-
- // ----------------------------------------------------
- // (1) Is this a GuestUser?
- // ----------------------------------------------------
- if (user instanceof GuestUser) {
-
- StringBuilder sb = new StringBuilder();
-
- String groupsFromProviders = "";
- for (Group group : groups) {
- logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias());
- String groupAlias = group.getAlias();
- if (groupAlias != null && !groupAlias.isEmpty()) {
- sb.append(" OR ");
- // i.e. group_builtIn/all-users, ip/ipGroup3
- sb.append(IndexServiceBean.getGroupPrefix()).append(groupAlias);
- }
- }
- groupsFromProviders = sb.toString();
- logger.fine("groupsFromProviders:" + groupsFromProviders);
- String guestWithGroups = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + groupsFromProviders + ")";
- logger.fine(guestWithGroups);
- return guestWithGroups;
- }
-
- // ----------------------------------------------------
- // (5) Work with Authenticated User who is not a Superuser
- // ----------------------------------------------------
- // It was already confirmed, that if the user is not GuestUser, we
- // have an AuthenticatedUser au which is not null.
- /**
- * @todo all this code needs cleanup and clarification.
- */
- /**
- * Every AuthenticatedUser is part of a "User Private Group" (UGP), a
- * concept we borrow from RHEL:
- * https://access.redhat.com/site/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Deployment_Guide/ch-Managing_Users_and_Groups.html#s2-users-groups-private-groups
- */
- /**
- * @todo rename this from publicPlusUserPrivateGroup. Confusing
- */
- // safe default: public only
- String publicPlusUserPrivateGroup = publicOnly;
-// + (onlyDatatRelatedToMe ? "" : (publicOnly + " OR "))
-// + "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + ")";
-
-// /**
-// * @todo add onlyDatatRelatedToMe option into the experimental JOIN
-// * before enabling it.
-// */
- /**
- * From a search perspective, we don't care about if the group was
- * created within one dataverse or another. We just want a list of *all*
- * the groups the user is part of. We are greedy. We want all BuiltIn
- * Groups, Shibboleth Groups, IP Groups, "system" groups, everything.
- *
- * A JOIN on "permission documents" will determine if the user can find
- * a given "content document" (dataset version, etc) in Solr.
- */
- String groupsFromProviders = "";
- StringBuilder sb = new StringBuilder();
for (Group group : groups) {
- logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias());
String groupAlias = group.getAlias();
- if (groupAlias != null && !groupAlias.isEmpty()) {
- sb.append(" OR ");
- // i.e. group_builtIn/all-users, group_builtIn/authenticated-users, group_1-explictGroup1, group_shib/2
- sb.append(IndexServiceBean.getGroupPrefix() + groupAlias);
+ if (groupAlias != null && !groupAlias.isEmpty() && (!avoidJoin || !groupAlias.startsWith("builtIn"))) {
+ groupList.add(IndexServiceBean.getGroupPrefix() + groupAlias);
}
}
- groupsFromProviders = sb.toString();
- logger.fine(groupsFromProviders);
- if (true) {
- /**
- * @todo get rid of "experimental" in name
- */
- String experimentalJoin = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + groupsFromProviders + ")";
- publicPlusUserPrivateGroup = experimentalJoin;
+ if (!avoidJoin) {
+ // Add the public group
+ groupList.add(0, IndexServiceBean.getPublicGroupString());
+ }
+
+ String groupString = null;
+ //If we have additional groups, format them correctly into a search string, with parens if there is more than one
+ if (groupList.size() > 1) {
+ groupString = "(" + StringUtils.join(groupList, " OR ") + ")";
+ } else if (groupList.size() == 1) {
+ groupString = groupList.get(0);
}
-
- //permissionFilterQuery = publicPlusUserPrivateGroup;
- logger.fine(publicPlusUserPrivateGroup);
-
- return publicPlusUserPrivateGroup;
-
+ logger.fine("Groups: " + groupString);
+ String permissionQuery = buildPermissionFilterQuery(avoidJoin, groupString);
+ logger.fine("Permission Query: " + permissionQuery);
+ return permissionQuery;
}
+ private String buildPermissionFilterQuery(boolean avoidJoin, String permissionFilterGroups) {
+ String query = (avoidJoin&& !isAllGroups(permissionFilterGroups)) ? SearchFields.PUBLIC_OBJECT + ":" + true : "";
+ if (permissionFilterGroups != null && !isAllGroups(permissionFilterGroups)) {
+ if (!query.isEmpty()) {
+ query = "(" + query + " OR " + "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":" + permissionFilterGroups + ")";
+ } else {
+ query = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":" + permissionFilterGroups;
+ }
+ }
+ return query;
+ }
+
+ private boolean isAllGroups(String groups) {
+ return (groups!=null &&groups.equals(ALL_GROUPS));
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
index cfe29ea08c7..e4d885276d0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
@@ -34,7 +34,7 @@
public class SolrIndexServiceBean {
private static final Logger logger = Logger.getLogger(SolrIndexServiceBean.class.getCanonicalName());
-
+
@EJB
DvObjectServiceBean dvObjectService;
@EJB
@@ -149,7 +149,7 @@ private List constructDatasetSolrDocs(Dataset dataset) {
return solrDocs;
}
-// private List constructDatafileSolrDocs(DataFile dataFile) {
+ // private List constructDatafileSolrDocs(DataFile dataFile) {
private List constructDatafileSolrDocs(DataFile dataFile, Map> permStringByDatasetVersion) {
List datafileSolrDocs = new ArrayList<>();
Map desiredCards = searchPermissionsService.getDesiredCards(dataFile.getOwner());
@@ -166,14 +166,14 @@ private List constructDatafileSolrDocs(DataFile dataFile, Map constructDatafileSolrDocsFromDataset(Dataset datas
} else {
perms = searchPermissionsService.findDatasetVersionPerms(datasetVersionFileIsAttachedTo);
}
+
for (FileMetadata fileMetadata : datasetVersionFileIsAttachedTo.getFileMetadatas()) {
Long fileId = fileMetadata.getDataFile().getId();
String solrIdStart = IndexServiceBean.solrDocIdentifierFile + fileId;
String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState());
String solrId = solrIdStart + solrIdEnd;
DvObjectSolrDoc dataFileSolrDoc = new DvObjectSolrDoc(fileId.toString(), solrId, datasetVersionFileIsAttachedTo.getId(), fileMetadata.getLabel(), perms);
- logger.fine("adding fileid " + fileId);
+ logger.finest("adding fileid " + fileId);
datafileSolrDocs.add(dataFileSolrDoc);
}
}
@@ -361,20 +362,19 @@ private void persistToSolr(Collection docs) throws SolrServer
public IndexResponse indexPermissionsOnSelfAndChildren(long definitionPointId) {
DvObject definitionPoint = dvObjectService.findDvObject(definitionPointId);
- if ( definitionPoint == null ) {
+ if (definitionPoint == null) {
logger.log(Level.WARNING, "Cannot find a DvOpbject with id of {0}", definitionPointId);
return null;
} else {
return indexPermissionsOnSelfAndChildren(definitionPoint);
}
}
-
+
/**
* We use the database to determine direct children since there is no
* inheritance
*/
public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) {
- List dvObjectsToReindexPermissionsFor = new ArrayList<>();
List filesToReindexAsBatch = new ArrayList<>();
/**
* @todo Re-indexing the definition point itself seems to be necessary
@@ -383,27 +383,47 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
// We don't create a Solr "primary/content" doc for the root dataverse
// so don't create a Solr "permission" doc either.
+ int i = 0;
+ int numObjects = 0;
if (definitionPoint.isInstanceofDataverse()) {
Dataverse selfDataverse = (Dataverse) definitionPoint;
if (!selfDataverse.equals(dataverseService.findRootDataverse())) {
- dvObjectsToReindexPermissionsFor.add(definitionPoint);
+ indexPermissionsForOneDvObject(definitionPoint);
+ numObjects++;
}
List directChildDatasetsOfDvDefPoint = datasetService.findByOwnerId(selfDataverse.getId());
for (Dataset dataset : directChildDatasetsOfDvDefPoint) {
- dvObjectsToReindexPermissionsFor.add(dataset);
+ indexPermissionsForOneDvObject(dataset);
+ numObjects++;
for (DataFile datafile : filesToReIndexPermissionsFor(dataset)) {
filesToReindexAsBatch.add(datafile);
+ i++;
+ if (i % 100 == 0) {
+ reindexFilesInBatches(filesToReindexAsBatch);
+ filesToReindexAsBatch.clear();
+ }
+ if (i % 1000 == 0) {
+ logger.fine("Progress: " +i + " files permissions reindexed");
+ }
}
+ logger.fine("Progress : dataset " + dataset.getId() + " permissions reindexed");
}
} else if (definitionPoint.isInstanceofDataset()) {
- dvObjectsToReindexPermissionsFor.add(definitionPoint);
+ indexPermissionsForOneDvObject(definitionPoint);
+ numObjects++;
// index files
Dataset dataset = (Dataset) definitionPoint;
for (DataFile datafile : filesToReIndexPermissionsFor(dataset)) {
filesToReindexAsBatch.add(datafile);
+ i++;
+ if (i % 100 == 0) {
+ reindexFilesInBatches(filesToReindexAsBatch);
+ filesToReindexAsBatch.clear();
+ }
}
} else {
- dvObjectsToReindexPermissionsFor.add(definitionPoint);
+ indexPermissionsForOneDvObject(definitionPoint);
+ numObjects++;
}
/**
@@ -412,64 +432,64 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
* @todo Should update timestamps, probably, even thought these are
* files, see https://github.com/IQSS/dataverse/issues/2421
*/
- String response = reindexFilesInBatches(filesToReindexAsBatch);
-
- for (DvObject dvObject : dvObjectsToReindexPermissionsFor) {
- /**
- * @todo do something with this response
- */
- IndexResponse indexResponse = indexPermissionsForOneDvObject(dvObject);
- }
-
+ reindexFilesInBatches(filesToReindexAsBatch);
+ logger.fine("Reindexed permissions for " + i + " files and " + numObjects + " datasets/collections");
return new IndexResponse("Number of dvObject permissions indexed for " + definitionPoint
- + ": " + dvObjectsToReindexPermissionsFor.size()
- );
+ + ": " + numObjects);
}
private String reindexFilesInBatches(List filesToReindexPermissionsFor) {
List docs = new ArrayList<>();
Map> byParentId = new HashMap<>();
Map> permStringByDatasetVersion = new HashMap<>();
- for (DataFile file : filesToReindexPermissionsFor) {
- Dataset dataset = (Dataset) file.getOwner();
- Map desiredCards = searchPermissionsService.getDesiredCards(dataset);
- for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataset)) {
- boolean cardShouldExist = desiredCards.get(datasetVersionFileIsAttachedTo.getVersionState());
- if (cardShouldExist) {
- List cachedPermission = permStringByDatasetVersion.get(datasetVersionFileIsAttachedTo.getId());
- if (cachedPermission == null) {
- logger.fine("no cached permission! Looking it up...");
- List fileSolrDocs = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion);
- for (DvObjectSolrDoc fileSolrDoc : fileSolrDocs) {
- Long datasetVersionId = fileSolrDoc.getDatasetVersionId();
- if (datasetVersionId != null) {
- permStringByDatasetVersion.put(datasetVersionId, fileSolrDoc.getPermissions());
+ int i = 0;
+ try {
+ for (DataFile file : filesToReindexPermissionsFor) {
+ Dataset dataset = (Dataset) file.getOwner();
+ Map desiredCards = searchPermissionsService.getDesiredCards(dataset);
+ for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataset)) {
+ boolean cardShouldExist = desiredCards.get(datasetVersionFileIsAttachedTo.getVersionState());
+ if (cardShouldExist) {
+ List cachedPermission = permStringByDatasetVersion.get(datasetVersionFileIsAttachedTo.getId());
+ if (cachedPermission == null) {
+ logger.finest("no cached permission! Looking it up...");
+ List fileSolrDocs = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion);
+ for (DvObjectSolrDoc fileSolrDoc : fileSolrDocs) {
+ Long datasetVersionId = fileSolrDoc.getDatasetVersionId();
+ if (datasetVersionId != null) {
+ permStringByDatasetVersion.put(datasetVersionId, fileSolrDoc.getPermissions());
+ SolrInputDocument solrDoc = SearchUtil.createSolrDoc(fileSolrDoc);
+ docs.add(solrDoc);
+ i++;
+ }
+ }
+ } else {
+ logger.finest("cached permission is " + cachedPermission);
+ List fileSolrDocsBasedOnCachedPermissions = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion);
+ for (DvObjectSolrDoc fileSolrDoc : fileSolrDocsBasedOnCachedPermissions) {
SolrInputDocument solrDoc = SearchUtil.createSolrDoc(fileSolrDoc);
docs.add(solrDoc);
+ i++;
}
}
- } else {
- logger.fine("cached permission is " + cachedPermission);
- List fileSolrDocsBasedOnCachedPermissions = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion);
- for (DvObjectSolrDoc fileSolrDoc : fileSolrDocsBasedOnCachedPermissions) {
- SolrInputDocument solrDoc = SearchUtil.createSolrDoc(fileSolrDoc);
- docs.add(solrDoc);
+ if (i % 20 == 0) {
+ persistToSolr(docs);
+ docs = new ArrayList<>();
}
}
}
+ Long parent = file.getOwner().getId();
+ List existingList = byParentId.get(parent);
+ if (existingList == null) {
+ List empty = new ArrayList<>();
+ byParentId.put(parent, empty);
+ } else {
+ List updatedList = existingList;
+ updatedList.add(file.getId());
+ byParentId.put(parent, updatedList);
+ }
}
- Long parent = file.getOwner().getId();
- List existingList = byParentId.get(parent);
- if (existingList == null) {
- List empty = new ArrayList<>();
- byParentId.put(parent, empty);
- } else {
- List updatedList = existingList;
- updatedList.add(file.getId());
- byParentId.put(parent, updatedList);
- }
- }
- try {
+
persistToSolr(docs);
return " " + filesToReindexPermissionsFor.size() + " files indexed across " + docs.size() + " Solr documents ";
} catch (SolrServerException | IOException ex) {
@@ -517,29 +537,26 @@ public JsonObjectBuilder deleteAllFromSolrAndResetIndexTimes() throws SolrServer
}
/**
- *
- *
* @return A list of dvobject ids that should have their permissions
- * re-indexed because Solr was down when a permission was added. The permission
- * should be added to Solr. The id of the permission contains the type of
- * DvObject and the primary key of the dvObject.
- * DvObjects of type DataFile are currently skipped because their index
- * time isn't stored in the database, since they are indexed along
- * with their parent dataset (this may change).
+ * re-indexed because Solr was down when a permission was added. The
+ * permission should be added to Solr. The id of the permission contains the
+ * type of DvObject and the primary key of the dvObject. DvObjects of type
+ * DataFile are currently skipped because their index time isn't stored in
+ * the database, since they are indexed along with their parent dataset
+ * (this may change).
*/
public List findPermissionsInDatabaseButStaleInOrMissingFromSolr() {
List indexingRequired = new ArrayList<>();
long rootDvId = dataverseService.findRootDataverse().getId();
List missingDataversePermissionIds = dataverseService.findIdStalePermission();
List missingDatasetPermissionIds = datasetService.findIdStalePermission();
- for (Long id : missingDataversePermissionIds) {
+ for (Long id : missingDataversePermissionIds) {
if (!id.equals(rootDvId)) {
- indexingRequired.add(id);
+ indexingRequired.add(id);
}
}
indexingRequired.addAll(missingDatasetPermissionIds);
return indexingRequired;
}
-
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java
index 27900bac63f..8802555affd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java
@@ -78,6 +78,10 @@ public class SolrSearchResult {
private String citation;
private String citationHtml;
private String datasetType;
+ /**
+ * Only Dataset can have a file count
+ */
+ private Long fileCount;
/**
* Files and datasets might have a UNF. Dataverses don't.
*/
@@ -456,10 +460,10 @@ public JsonObjectBuilder getJsonForMyData(boolean isValid) {
} // getJsonForMydata
public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, boolean showApiUrls) {
- return json(showRelevance, showEntityIds, showApiUrls, null, null);
+ return json(showRelevance, showEntityIds, showApiUrls, null);
}
- public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, boolean showApiUrls, List metadataFields, Long datasetFileCount) {
+ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, boolean showApiUrls, List metadataFields) {
if (this.type == null) {
return jsonObjectBuilder();
}
@@ -597,7 +601,7 @@ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, bool
subjects.add(subject);
}
nullSafeJsonBuilder.add("subjects", subjects);
- nullSafeJsonBuilder.add("fileCount", datasetFileCount);
+ nullSafeJsonBuilder.add("fileCount", this.fileCount);
nullSafeJsonBuilder.add("versionId", dv.getId());
nullSafeJsonBuilder.add("versionState", dv.getVersionState().toString());
if (this.isPublishedState()) {
@@ -1348,4 +1352,12 @@ public boolean isValid(Predicate canUpdateDataset) {
}
return !canUpdateDataset.test(this);
}
+
+ public Long getFileCount() {
+ return fileCount;
+ }
+
+ public void setFileCount(Long fileCount) {
+ this.fileCount = fileCount;
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java
index 33e828e619d..20632c170e4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java
@@ -97,12 +97,16 @@ public enum FeatureFlags {
* for the dataset.
*
* @apiNote Raise flag by setting
- * "dataverse.feature.enable-dataset-thumbnail-autoselect"
+ * "dataverse.feature.disable-dataset-thumbnail-autoselect"
* @since Dataverse 6.4
*/
DISABLE_DATASET_THUMBNAIL_AUTOSELECT("disable-dataset-thumbnail-autoselect"),
/**
* Feature flag for the new Globus upload framework.
+ *
+ * @apiNote Raise flag by setting
+ * "dataverse.feature.globus-use-experimental-async-framework"
+ * @since Dataverse 6.4
*/
GLOBUS_USE_EXPERIMENTAL_ASYNC_FRAMEWORK("globus-use-experimental-async-framework"),
;
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
index 8ed96690e84..b5eb483c2c8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
@@ -539,6 +539,12 @@ Whether Harvesting (OAI) service is enabled
*
*/
GlobusSingleFileTransfer,
+ /** Lower limit of the number of files in a Globus upload task where
+ * the batch mode should be utilized in looking up the file information
+ * on the remote end node (file sizes, primarily), instead of individual
+ * lookups.
+ */
+ GlobusBatchLookupSize,
/**
* Optional external executables to run on the metadata for dataverses
* and datasets being published; as an extra validation step, to
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java
index 922e6ff5d28..771cf5fd0f0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java
@@ -111,7 +111,7 @@ public static ResourceBundle getResourceBundle(String propertyFileName, Locale c
ClassLoader loader = getClassLoader(filesRootDirectory);
bundle = ResourceBundle.getBundle(propertyFileName, currentLocale, loader);
} catch (MissingResourceException mre) {
- logger.warning("No property file named " + propertyFileName + "_" + currentLocale.getLanguage()
+ logger.fine("No property file named " + propertyFileName + "_" + currentLocale.getLanguage()
+ " found in " + filesRootDirectory + ", using untranslated values");
bundle = ResourceBundle.getBundle("propertyFiles/" + propertyFileName, currentLocale);
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
index a0c32d5c8ce..991682ec8e8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
@@ -525,15 +525,18 @@ public static String determineFileType(File f, String fileName) throws IOExcepti
// Check for shapefile extensions as described here: http://en.wikipedia.org/wiki/Shapefile
//logger.info("Checking for shapefile");
- ShapefileHandler shp_handler = new ShapefileHandler(new FileInputStream(f));
+ ShapefileHandler shp_handler = new ShapefileHandler(f);
if (shp_handler.containsShapefile()){
// logger.info("------- shapefile FOUND ----------");
fileType = ShapefileHandler.SHAPEFILE_FILE_TYPE; //"application/zipped-shapefile";
}
-
- Optional bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler();
- if(bagItFileHandler.isPresent() && bagItFileHandler.get().isBagItPackage(fileName, f)) {
- fileType = BagItFileHandler.FILE_TYPE;
+ try {
+ Optional bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler();
+ if (bagItFileHandler.isPresent() && bagItFileHandler.get().isBagItPackage(fileName, f)) {
+ fileType = BagItFileHandler.FILE_TYPE;
+ }
+ } catch (Exception e) {
+ logger.warning("Error checking for BagIt package: " + e.getMessage());
}
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java b/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java
index f1440cc3c02..2b54f7a3bfe 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java
@@ -1,23 +1,21 @@
package edu.harvard.iq.dataverse.util;
import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
import java.util.Date;
import java.util.ArrayList;
import java.util.List;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipInputStream;
-import java.util.zip.ZipException;
+import java.util.zip.ZipFile;
import java.util.HashMap;
import java.util.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
-import java.util.logging.Level;
+
import java.util.logging.Logger;
import org.apache.commons.io.FileUtils;
@@ -43,11 +41,10 @@
* "shape1.pdf", "README.md", "shape_notes.txt"
*
* Code Example:
- * FileInputStream shp_file_input_stream = new FileInputStream(new File("zipped_shapefile.zip"))
- * ShapefileHandler shp_handler = new ShapefileHandler(shp_file_input_stream);
+ * ShapefileHandler shp_handler = new ShapefileHandler(new File("zipped_shapefile.zip"));
* if (shp_handler.containsShapefile()){
* File rezip_folder = new File("~/folder_for_rezipping");
- * boolean rezip_success = shp_handler.rezipShapefileSets(shp_file_input_stream, rezip_folder );
+ * boolean rezip_success = shp_handler.rezipShapefileSets(rezip_folder );
* if (!rezip_success){
* // rezip failed, should be an error message (String) available
System.out.println(shp_handler.error_message);
@@ -68,13 +65,13 @@ public class ShapefileHandler{
private static final Logger logger = Logger.getLogger(ShapefileHandler.class.getCanonicalName());
// Reference for these extensions: http://en.wikipedia.org/wiki/Shapefile
- public final static String SHAPEFILE_FILE_TYPE = "application/zipped-shapefile";
- public final static String SHAPEFILE_FILE_TYPE_FRIENDLY_NAME = "Shapefile as ZIP Archive";
- public final static List SHAPEFILE_MANDATORY_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj");
- public final static String SHP_XML_EXTENSION = "shp.xml";
- public final static String BLANK_EXTENSION = "__PLACEHOLDER-FOR-BLANK-EXTENSION__";
- public final static List SHAPEFILE_ALL_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj", "sbn", "sbx", "fbn", "fbx", "ain", "aih", "ixs", "mxs", "atx", "cpg", "qpj", "qmd", SHP_XML_EXTENSION);
-
+ public static final String SHAPEFILE_FILE_TYPE = "application/zipped-shapefile";
+ public static final String SHAPEFILE_FILE_TYPE_FRIENDLY_NAME = "Shapefile as ZIP Archive";
+ public static final List SHAPEFILE_MANDATORY_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj");
+ public static final String SHP_XML_EXTENSION = "shp.xml";
+ public static final String BLANK_EXTENSION = "__PLACEHOLDER-FOR-BLANK-EXTENSION__";
+ public static final List SHAPEFILE_ALL_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj", "sbn", "sbx", "fbn", "fbx", "ain", "aih", "ixs", "mxs", "atx", "cpg", "qpj", "qmd", SHP_XML_EXTENSION);
+ private final File zipFile;
public boolean DEBUG = false;
private boolean zipFileProcessed = false;
@@ -97,9 +94,6 @@ public class ShapefileHandler{
private Map> fileGroups = new HashMap<>();
private List finalRezippedFiles = new ArrayList<>();
-
- private String outputFolder = "unzipped";
- private String rezippedFolder = "rezipped";
// Debug helper
private void msg(String s){
@@ -116,40 +110,28 @@ private void msgt(String s){
}
/*
- Constructor, start with filename
- */
- public ShapefileHandler(String filename){
-
- if (filename==null){
- this.addErrorMessage("The filename was null");
- return;
- }
-
- FileInputStream zip_file_stream;
- try {
- zip_file_stream = new FileInputStream(new File(filename));
- } catch (FileNotFoundException ex) {
- this.addErrorMessage("The file was not found");
+ Constructor, start with File
+ */
+ public ShapefileHandler(File zip_file) throws IOException {
+ zipFile = zip_file;
+ if (zip_file == null) {
+ this.addErrorMessage("The file was null");
return;
}
-
- this.examineZipfile(zip_file_stream);
- }
-
-
- /*
- Constructor, start with FileInputStream
- */
- public ShapefileHandler(FileInputStream zip_file_stream){
-
- if (zip_file_stream==null){
- this.addErrorMessage("The zip_file_stream was null");
- return;
+ try (var zip_file_object = new ZipFile(zip_file)) {
+ this.examineZipfile(zip_file_object);
+ }
+ catch (FileNotFoundException ex) {
+ // While this constructor had a FileInputStream as argument:
+ // FileUtil.determineFileType threw this exception before calling the constructor with a FileInputStream
+ // IngestServiceShapefileHelper.processFile won´t call this constructor if the file is not valid hence does not exist.
+ // When the file would have disappeared in the meantime, it would have produced a slightly different error message.
+ logger.severe("File not found: " + zip_file.getAbsolutePath());
+ throw ex;
}
- this.examineZipfile(zip_file_stream);
}
-
+
public List getFinalRezippedFiles(){
return this.finalRezippedFiles;
}
@@ -291,26 +273,19 @@ inside the uploaded zip file (issue #6873). To achieve this, we recreate
subfolders in the FileMetadata of the newly created DataFiles.
(-- L.A. 09/2020)
*/
- private boolean unzipFilesToDirectory(FileInputStream zipfile_input_stream, File target_directory){
+ private boolean unzipFilesToDirectory(ZipFile zipfileInput, File target_directory){
logger.fine("unzipFilesToDirectory: " + target_directory.getAbsolutePath() );
- if (zipfile_input_stream== null){
- this.addErrorMessage("unzipFilesToDirectory. The zipfile_input_stream is null.");
- return false;
- }
if (!target_directory.isDirectory()){
this.addErrorMessage("This directory does not exist: " + target_directory.getAbsolutePath());
return false;
}
- List unzippedFileNames = new ArrayList<>();
-
- ZipInputStream zipStream = new ZipInputStream(zipfile_input_stream);
+ List unzippedFileNames = new ArrayList<>();
+
- ZipEntry origEntry;
- byte[] buffer = new byte[2048];
try {
- while((origEntry = zipStream.getNextEntry())!=null){
+ for(var origEntry : Collections.list(zipfileInput.entries())){
String zentryFileName = origEntry.getName();
logger.fine("\nOriginal entry name: " + origEntry);
@@ -360,15 +335,10 @@ private boolean unzipFilesToDirectory(FileInputStream zipfile_input_stream, File
unzippedFileNames.add(outpath);
}
logger.fine("Write zip file: " + outpath);
- FileOutputStream fileOutputStream;
- long fsize = 0;
- fileOutputStream = new FileOutputStream(outpath);
- int len;// = 0;
- while ((len = zipStream.read(buffer)) > 0){
- fileOutputStream.write(buffer, 0, len);
- fsize+=len;
- } // end while
- fileOutputStream.close();
+ try(var inputStream = zipfileInput.getInputStream(origEntry)) {
+ Files.createDirectories(new File(outpath).getParentFile().toPath());
+ Files.copy(inputStream, Path.of(outpath), StandardCopyOption.REPLACE_EXISTING);
+ }
} // end outer while
} catch (IOException ex) {
for (StackTraceElement el : ex.getStackTrace()){
@@ -377,19 +347,13 @@ private boolean unzipFilesToDirectory(FileInputStream zipfile_input_stream, File
this.addErrorMessage("Failed to open ZipInputStream entry" + ex.getMessage());
return false;
}
-
- try {
- zipStream.close();
- } catch (IOException ex) {
- Logger.getLogger(ShapefileHandler.class.getName()).log(Level.SEVERE, null, ex);
- }
- return true;
+ return true;
}
/*
Rezip the shapefile(s) into a given directory
Assumes that the zipfile_input_stream has already been checked!
*/
- public boolean rezipShapefileSets(FileInputStream zipfile_input_stream, File rezippedFolder) throws IOException{
+ public boolean rezipShapefileSets(File rezippedFolder) throws IOException{
logger.fine("rezipShapefileSets");
//msgt("rezipShapefileSets");
if (!this.zipFileProcessed){
@@ -400,10 +364,6 @@ public boolean rezipShapefileSets(FileInputStream zipfile_input_stream, File rez
this.addErrorMessage("There are no shapefiles here!");
return false;
}
- if (zipfile_input_stream== null){
- this.addErrorMessage("The zipfile_input_stream is null.");
- return false;
- }
if (rezippedFolder == null){
this.addErrorMessage("The rezippedFolder is null.");
return false;
@@ -433,9 +393,11 @@ public boolean rezipShapefileSets(FileInputStream zipfile_input_stream, File rez
// Unzip files!
- if (!this.unzipFilesToDirectory(zipfile_input_stream, dir_for_unzipping)){
- this.addErrorMessage("Failed to unzip files.");
- return false;
+ try(var zipfileObject = new ZipFile(zipFile)) {
+ if (!this.unzipFilesToDirectory(zipfileObject, dir_for_unzipping)) {
+ this.addErrorMessage("Failed to unzip files.");
+ return false;
+ }
}
// Redistribute files!
String target_dirname = rezippedFolder.getAbsolutePath();
@@ -681,27 +643,19 @@ private boolean isFileToSkip(String fname){
/**************************************
* Iterate through the zip file contents.
* Does it contain any shapefiles?
- *
- * @param FileInputStream zip_file_stream
*/
- private boolean examineZipfile(FileInputStream zip_file_stream){
+ private boolean examineZipfile(ZipFile zip_file){
// msgt("examineZipfile");
-
- if (zip_file_stream==null){
- this.addErrorMessage("The zip file stream was null");
- return false;
- }
-
+
// Clear out file lists
this.filesListInDir.clear();
this.filesizeHash.clear();
this.fileGroups.clear();
- try{
- ZipInputStream zipStream = new ZipInputStream(zip_file_stream);
- ZipEntry entry;
- List hiddenDirectories = new ArrayList<>();
- while((entry = zipStream.getNextEntry())!=null){
+ try{
+ List