Skip to content

Commit

Permalink
Update of metadata working.
Browse files Browse the repository at this point in the history
  • Loading branch information
janvanmansum committed Nov 12, 2024
1 parent c07a0d8 commit e636bb9
Show file tree
Hide file tree
Showing 13 changed files with 171 additions and 14 deletions.
1 change: 1 addition & 0 deletions src/main/assembly/dist/cfg/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ ingest:
inbox: /var/opt/dans.knaw.nl/tmp/import/inbox
outbox: /var/opt/dans.knaw.nl/tmp/import/outbox
tempDir: /var/opt/dans.knaw.nl/tmp/zip-wrapping
metadataKeys: {}
waitForReleasedState:
# 10s * 360 = 1 hour
maxNumberOfRetries: 360
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import io.dropwizard.core.setup.Bootstrap;
import io.dropwizard.core.setup.Environment;
import nl.knaw.dans.dvingest.config.DdDataverseIngestConfiguration;
import nl.knaw.dans.dvingest.core.DatasetTaskFactory;
import nl.knaw.dans.dvingest.core.DataverseServiceImpl;
import nl.knaw.dans.dvingest.core.IngestArea;
import nl.knaw.dans.dvingest.core.UtilityServicesImpl;
Expand Down Expand Up @@ -47,6 +48,7 @@ public void run(final DdDataverseIngestConfiguration configuration, final Enviro
var dataverseClient = configuration.getDataverse().build();
var dataverseService = DataverseServiceImpl.builder()
.dataverseClient(dataverseClient)
.metadataKeys(configuration.getIngest().getMetadataKeys())
.millisecondsBetweenChecks(configuration.getIngest().getWaitForReleasedState().getTimeBetweenChecks().toMilliseconds())
.maxNumberOfRetries(configuration.getIngest().getWaitForReleasedState().getMaxNumberOfRetries())
.build();
Expand All @@ -58,6 +60,7 @@ public void run(final DdDataverseIngestConfiguration configuration, final Enviro
.executorService(environment.lifecycle().executorService("import").minThreads(1).maxThreads(1).build())
.dataverseService(dataverseService)
.utilityServices(utilityServices)
.datasetTaskFactory(new DatasetTaskFactory(dataverseService, utilityServices))
.inbox(configuration.getIngest().getImportConfig().getInbox())
.outbox(configuration.getIngest().getImportConfig().getOutbox()).build();
environment.jersey().register(new IngestApiResource(importArea));
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/nl/knaw/dans/dvingest/config/IngestConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import javax.validation.Valid;
import javax.validation.constraints.NotNull;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;

@Data
public class IngestConfig {
Expand All @@ -34,6 +36,7 @@ public class IngestConfig {

private int maxNumberOfFilesPerUpload = 1000;

private Map<String, String> metadataKeys = new HashMap<>();

@Valid
@NotNull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

@Slf4j
@AllArgsConstructor
public class IngestTask implements Runnable {
public class CreateNewDatasetTask implements Runnable {
private final Deposit deposit;
private final DataverseService dataverseService;
private final UtilityServices utilityServices;
Expand Down
36 changes: 36 additions & 0 deletions src/main/java/nl/knaw/dans/dvingest/core/DatasetTaskFactory.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Copyright (C) 2024 DANS - Data Archiving and Networked Services ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.dvingest.core;

import lombok.AllArgsConstructor;

import java.nio.file.Path;

@AllArgsConstructor
public class DatasetTaskFactory {
private final DataverseService dataverseService;
private final UtilityServices utilityServices;

public Runnable createIngestTask(Deposit deposit, Path outputDir) {
if (deposit.isUpdate()) {
return new UpdateDatasetTask(deposit, dataverseService, utilityServices, outputDir);
}
else {
return new CreateNewDatasetTask(deposit, dataverseService, utilityServices, outputDir);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import nl.knaw.dans.lib.dataverse.model.dataset.Dataset;
import nl.knaw.dans.lib.dataverse.model.dataset.DatasetCreationResult;
import nl.knaw.dans.lib.dataverse.model.dataset.DatasetPublicationResult;
import nl.knaw.dans.lib.dataverse.model.dataset.DatasetVersion;
import nl.knaw.dans.lib.dataverse.model.dataset.FileList;
import nl.knaw.dans.lib.dataverse.model.file.FileMeta;

Expand All @@ -35,4 +36,6 @@ public interface DataverseService {
DataverseHttpResponse<DatasetPublicationResult> publishDataset(String persistentId) throws DataverseException, IOException;

void waitForState(String persistentId, String state) throws DataverseException;

DataverseHttpResponse<DatasetVersion> updateMetadata(String targetDatasetPid, DatasetVersion datasetMetadata) throws DataverseException, IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@
import nl.knaw.dans.lib.dataverse.model.dataset.Dataset;
import nl.knaw.dans.lib.dataverse.model.dataset.DatasetCreationResult;
import nl.knaw.dans.lib.dataverse.model.dataset.DatasetPublicationResult;
import nl.knaw.dans.lib.dataverse.model.dataset.DatasetVersion;
import nl.knaw.dans.lib.dataverse.model.dataset.FileList;
import nl.knaw.dans.lib.dataverse.model.file.FileMeta;

import java.io.IOException;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;

@Builder
@Slf4j
Expand All @@ -42,19 +45,29 @@ public class DataverseServiceImpl implements DataverseService {
@Builder.Default
private long millisecondsBetweenChecks = 3000;

@Builder.Default
private Map<String, String> metadataKeys = new HashMap<>();

public DataverseHttpResponse<DatasetCreationResult> createDataset(Dataset datasetMetadata) throws DataverseException, IOException {
return dataverseClient.dataverse("root").createDataset(datasetMetadata);
return dataverseClient.dataverse("root").createDataset(datasetMetadata, metadataKeys);
}

@Override
public DataverseHttpResponse<FileList> addFile(String persistentId, Path file, FileMeta fileMeta) throws DataverseException, IOException {
return dataverseClient.dataset(persistentId).addFile(file, fileMeta);
}

@Override
public DataverseHttpResponse<DatasetPublicationResult> publishDataset(String persistentId) throws DataverseException, IOException {
return dataverseClient.dataset(persistentId).publish();
}

@Override
public DataverseHttpResponse<DatasetVersion> updateMetadata(String targetDatasetPid, DatasetVersion datasetMetadata) throws DataverseException, IOException {
return dataverseClient.dataset(targetDatasetPid).updateMetadata(datasetMetadata, metadataKeys);
}

// TODO: move this to dans-dataverse-client-lib; it is similar to awaitLockState.
public void waitForState(String datasetId, String expectedState) {
var numberOfTimesTried = 0;
var state = "";
Expand Down
11 changes: 10 additions & 1 deletion src/main/java/nl/knaw/dans/dvingest/core/Deposit.java
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,17 @@ public int getSequenceNumber() {
}

public OffsetDateTime getCreationTimestamp() {
var creationTimestamp = depositProperties.getProperty("creation-timestamp");
var creationTimestamp = depositProperties.getProperty("creation.timestamp");
if (creationTimestamp == null) {
return null;
}
return OffsetDateTime.parse(creationTimestamp);
}

public boolean isUpdate() {
return Files.exists(getBagDir().resolve("update.yml"));
}

public void moveTo(Path targetDir) throws IOException {
Files.move(location, targetDir.resolve(location.getFileName()));
location = targetDir.resolve(location.getFileName());
Expand All @@ -127,4 +131,9 @@ else if (getCreationTimestamp() != null && deposit.getCreationTimestamp() != nul
throw new IllegalStateException("Deposit " + getId() + " should contain either a sequence number or a creation timestamp");
}
}

public String getTargetDatasetPid() throws IOException {
var updateInstructions = MAPPER.readValue(FileUtils.readFileToString(getBagDir().resolve("update.yml").toFile(), "UTF-8"), UpdateInstructions.class);
return updateInstructions.getTargetDatasetPid();
}
}
21 changes: 14 additions & 7 deletions src/main/java/nl/knaw/dans/dvingest/core/ImportJob.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,14 @@
import lombok.Builder;
import lombok.Getter;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import nl.knaw.dans.dvingest.api.ImportCommandDto;
import nl.knaw.dans.dvingest.api.ImportJobStatusDto;
import nl.knaw.dans.dvingest.api.ImportJobStatusDto.StatusEnum;
import nl.knaw.dans.lib.dataverse.DataverseClient;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeSet;

@Slf4j
Expand All @@ -44,17 +40,27 @@ public class ImportJob implements Runnable {
private final DataverseService dataverseService;
@NonNull
private final UtilityServices utilityServices;

@NonNull
private final DatasetTaskFactory datasetTaskFactory;

@NonNull
private final CompletionHandler completionHandler;

@Getter
private final ImportJobStatusDto status = new ImportJobStatusDto();

private ImportJob(ImportCommandDto importCommand, Path outputDir, DataverseService dataverseService, UtilityServices utilityServices, CompletionHandler completionHandler) {
private ImportJob(@NonNull ImportCommandDto importCommand,
@NonNull Path outputDir,
@NonNull DataverseService dataverseService,
@NonNull UtilityServices utilityServices,
@NonNull DatasetTaskFactory datasetTaskFactory,
@NonNull CompletionHandler completionHandler) {
this.importCommand = importCommand;
this.outputDir = outputDir;
this.dataverseService = dataverseService;
this.utilityServices = utilityServices;
this.datasetTaskFactory = datasetTaskFactory;
this.completionHandler = completionHandler;
}

Expand Down Expand Up @@ -84,7 +90,7 @@ public void run() {
// Process deposits
for (Deposit deposit : deposits) {
log.info("START Processing deposit: {}", deposit.getId());
new IngestTask(deposit, dataverseService, utilityServices, outputDir).run();
datasetTaskFactory.createIngestTask(deposit, outputDir).run();
log.info("END Processing deposit: {}", deposit.getId());
// TODO: record number of processed/rejected/failed deposits in ImportJob status
}
Expand All @@ -94,7 +100,8 @@ public void run() {
catch (Exception e) {
log.error("Failed to process import job", e);
status.setStatus(StatusEnum.FAILED);
} finally {
}
finally {
completionHandler.handle(this);
}
}
Expand Down
6 changes: 5 additions & 1 deletion src/main/java/nl/knaw/dans/dvingest/core/IngestArea.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,20 @@ public class IngestArea {
@NonNull
private final UtilityServices utilityServices;
@NonNull
private final DatasetTaskFactory datasetTaskFactory;
@NonNull
private final Path inbox;
@NonNull
private final Path outbox;

private final Map<String, ImportJob> importJobs = new ConcurrentHashMap<>();

private IngestArea(ExecutorService executorService, DataverseService dataverseService, UtilityServices utilityServices, Path inbox, Path outbox) {
private IngestArea(ExecutorService executorService, DataverseService dataverseService, UtilityServices utilityServices, DatasetTaskFactory datasetTaskFactory, Path inbox, Path outbox) {
try {
this.executorService = executorService;
this.dataverseService = dataverseService;
this.utilityServices = utilityServices;
this.datasetTaskFactory = datasetTaskFactory;
this.inbox = inbox.toAbsolutePath().toRealPath();
this.outbox = outbox.toAbsolutePath().toRealPath();
}
Expand Down Expand Up @@ -97,6 +100,7 @@ private ImportJob createImportJob(ImportCommandDto importCommand) {
.outputDir(outbox.resolve(relativePath))
.dataverseService(dataverseService)
.utilityServices(utilityServices)
.datasetTaskFactory(datasetTaskFactory)
.completionHandler(job -> importJobs.remove(job.getImportCommand().getPath()))
.build();
}
Expand Down
52 changes: 52 additions & 0 deletions src/main/java/nl/knaw/dans/dvingest/core/UpdateDatasetTask.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (C) 2024 DANS - Data Archiving and Networked Services ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.dvingest.core;

import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;

import java.io.IOException;
import java.nio.file.Path;

@AllArgsConstructor
@Slf4j
public class UpdateDatasetTask implements Runnable {
private final Deposit deposit;
private final DataverseService dataverseService;
private final UtilityServices utilityServices;
private final Path outputDir;

@Override
public void run() {
try {
var targetDatasetPid = deposit.getTargetDatasetPid();
dataverseService.updateMetadata(targetDatasetPid, deposit.getDatasetMetadata().getDatasetVersion());

dataverseService.publishDataset(targetDatasetPid);
dataverseService.waitForState(targetDatasetPid, "RELEASED");
}
catch (Exception e) {
try {
log.error("Failed to ingest deposit", e);
deposit.moveTo(outputDir.resolve("failed"));
}
catch (IOException ioException) {
log.error("Failed to move deposit to failed directory", ioException);
}
}

}
}
26 changes: 26 additions & 0 deletions src/main/java/nl/knaw/dans/dvingest/core/UpdateInstructions.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright (C) 2024 DANS - Data Archiving and Networked Services ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.dvingest.core;

import lombok.Data;
import nl.knaw.dans.lib.dataverse.model.dataset.UpdateType;

@Data
public class UpdateInstructions {
private String targetDatasetPid;
// TODO: implement minor update
//private UpdateType updateType = UpdateType.major;
}
6 changes: 3 additions & 3 deletions src/test/java/nl/knaw/dans/dvingest/core/DepositTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -102,19 +102,19 @@ public void deposits_should_be_ordered_by_creation_timestamp() throws Exception
var dir1 = testDir.resolve(id1);
Files.createDirectories(dir1);
var props1 = new Properties();
props1.setProperty("creation-timestamp", "2023-01-01T10:00:00Z");
props1.setProperty("creation.timestamp", "2023-01-01T10:00:00Z");
props1.store(Files.newBufferedWriter(dir1.resolve("deposit.properties")), "");

var dir2 = testDir.resolve(id2);
Files.createDirectories(dir2);
var props2 = new Properties();
props2.setProperty("creation-timestamp", "2023-01-02T10:00:00Z");
props2.setProperty("creation.timestamp", "2023-01-02T10:00:00Z");
props2.store(Files.newBufferedWriter(dir2.resolve("deposit.properties")), "");

var dir3 = testDir.resolve(id3);
Files.createDirectories(dir3);
var props3 = new Properties();
props3.setProperty("creation-timestamp", "2023-01-03T10:00:00Z");
props3.setProperty("creation.timestamp", "2023-01-03T10:00:00Z");
props3.store(Files.newBufferedWriter(dir3.resolve("deposit.properties")), "");

var deposit1 = new Deposit(dir1);
Expand Down

0 comments on commit e636bb9

Please sign in to comment.