Skip to content

Commit

Permalink
Merge pull request #45 from DiSSCo/feature/ignore-media-without-uri
Browse files Browse the repository at this point in the history
Ignore media object when no access uri present
  • Loading branch information
samleeflang authored Oct 23, 2023
2 parents 2e92f98 + 5ab154d commit b9c83c6
Show file tree
Hide file tree
Showing 9 changed files with 420 additions and 34 deletions.
31 changes: 20 additions & 11 deletions src/main/java/eu/dissco/core/translator/service/BioCaseService.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import eu.dissco.core.translator.domain.Enrichment;
import eu.dissco.core.translator.exception.DiSSCoDataException;
import eu.dissco.core.translator.exception.DisscoEfgParsingException;
import eu.dissco.core.translator.exception.OrganisationNotRorId;
import eu.dissco.core.translator.properties.EnrichmentProperties;
import eu.dissco.core.translator.properties.FdoProperties;
import eu.dissco.core.translator.properties.WebClientProperties;
Expand Down Expand Up @@ -193,11 +192,10 @@ private void processUnit(DataSet dataset, Unit unit)
attributes.getDwcInstitutionId());
log.debug("Result digital Specimen: {}", digitalSpecimen);
kafkaService.sendMessage("digital-specimen",
mapper.writeValueAsString(
new DigitalSpecimenEvent(
enrichmentServices(false),
digitalSpecimen,
digitalMediaObjects)));
new DigitalSpecimenEvent(
enrichmentServices(false),
digitalSpecimen,
digitalMediaObjects));
} catch (DiSSCoDataException e) {
log.error("Encountered data issue with record: {}", unitAttributes, e);
}
Expand Down Expand Up @@ -334,26 +332,37 @@ private JsonNode cleanupRedundantFields(JsonNode unitData) {
}

private List<DigitalMediaObjectEvent> processDigitalMediaObjects(String physicalSpecimenId,
Unit unit, String organisationId) throws OrganisationNotRorId {
Unit unit, String organisationId) {
var digitalMediaObjectEvents = new ArrayList<DigitalMediaObjectEvent>();
if (unit.getMultiMediaObjects() != null && !unit.getMultiMediaObjects().getMultiMediaObject()
.isEmpty()) {
for (MultiMediaObject media : unit.getMultiMediaObjects().getMultiMediaObject()) {
digitalMediaObjectEvents.add(
processDigitalMediaObject(physicalSpecimenId, media, organisationId));
try {
digitalMediaObjectEvents.add(
processDigitalMediaObject(physicalSpecimenId, media, organisationId));
} catch (DiSSCoDataException e) {
log.error("Failed to process digital media object for digital specimen: {}",
physicalSpecimenId, e);
}
}
}
return digitalMediaObjectEvents;
}

private DigitalMediaObjectEvent processDigitalMediaObject(String physicalSpecimenId,
MultiMediaObject media, String organisationId) throws OrganisationNotRorId {
MultiMediaObject media, String organisationId) throws DiSSCoDataException {
var attributes = getData(mapper.valueToTree(media));
var digitalEntity = digitalSpecimenDirector.assembleDigitalMediaObjects(false, attributes,
organisationId);
if (digitalEntity.getAcAccessUri() == null) {
throw new DiSSCoDataException(
"Digital media object for specimen does not have an access uri, ignoring record");
}
var digitalMediaObjectEvent = new DigitalMediaObjectEvent(enrichmentServices(true),
new DigitalMediaObject(
fdoProperties.getDigitalMediaObjectType(),
physicalSpecimenId,
digitalSpecimenDirector.assembleDigitalMediaObjects(false, attributes, organisationId),
digitalEntity,
attributes
));
log.debug("Result digital media object: {}", digitalMediaObjectEvent);
Expand Down
32 changes: 21 additions & 11 deletions src/main/java/eu/dissco/core/translator/service/DwcaService.java
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,7 @@ private void processDigitalSpecimen(Collection<ObjectNode> fullRecords)
log.debug("Digital Specimen: {}", digitalObjects);
var translatorEvent = new DigitalSpecimenEvent(enrichmentServices(false),
digitalObjects.getLeft(), digitalObjects.getRight());
kafkaService.sendMessage("digital-specimen",
mapper.writeValueAsString(translatorEvent));
kafkaService.sendMessage("digital-specimen", translatorEvent);
} catch (DiSSCoDataException e) {
log.error("Encountered data issue with record: {}", fullRecord, e);
}
Expand Down Expand Up @@ -173,16 +172,27 @@ private List<DigitalMediaObjectEvent> processMedia(String recordId, JsonNode ful
}

private List<DigitalMediaObjectEvent> extractMultiMedia(String recordId, JsonNode imageArray,
String organisationId) throws OrganisationNotRorId {
String organisationId) {
var digitalMediaObjectEvents = new ArrayList<DigitalMediaObjectEvent>();
for (var image : imageArray) {
var digitalMediaObject = new DigitalMediaObjectEvent(enrichmentServices(true),
new DigitalMediaObject(
fdoProperties.getDigitalMediaObjectType(),
recordId,
digitalSpecimenDirector.assembleDigitalMediaObjects(true, image, organisationId),
image));
digitalMediaObjectEvents.add(digitalMediaObject);
try {
var digitalEntity = digitalSpecimenDirector.assembleDigitalMediaObjects(true, image,
organisationId);
if (digitalEntity.getAcAccessUri() == null) {
throw new DiSSCoDataException(
"Digital media object for specimen does not have an access uri, ignoring record");
}
var digitalMediaObject = new DigitalMediaObjectEvent(enrichmentServices(true),
new DigitalMediaObject(
fdoProperties.getDigitalMediaObjectType(),
recordId,
digitalEntity,
image));
digitalMediaObjectEvents.add(digitalMediaObject);
} catch (DiSSCoDataException e) {
log.error("Failed to process digital media object for digital specimen: {}",
recordId, e);
}
}
return digitalMediaObjectEvents;
}
Expand All @@ -200,7 +210,7 @@ private List<DigitalMediaObjectEvent> publishAssociatedMedia(String recordId,
fdoProperties.getDigitalMediaObjectType(),
recordId,
digitalSpecimenDirector.assembleDigitalMediaObjects(true,
mapper.valueToTree(mediaUrl),
mapper.createObjectNode().put("ac:accessUri", mediaUrl),
organisationId),
null));
digitalMediaObjects.add(digitalMediaObject);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package eu.dissco.core.translator.service;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dissco.core.translator.domain.DigitalSpecimenEvent;
import java.util.concurrent.CompletableFuture;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
Expand All @@ -13,9 +16,11 @@
public class KafkaService {

private final KafkaTemplate<String, String> kafkaTemplate;
private final ObjectMapper mapper;

public void sendMessage(String topic, String event) {
CompletableFuture<SendResult<String, String>> future = kafkaTemplate.send(topic, event);
public void sendMessage(String topic, DigitalSpecimenEvent event) throws JsonProcessingException {
CompletableFuture<SendResult<String, String>> future = kafkaTemplate.send(topic,
mapper.writeValueAsString(event));
future.whenComplete((result, ex) -> {
if (ex != null) {
log.error("Unable to send message: {}", event, ex);
Expand Down
5 changes: 5 additions & 0 deletions src/test/java/eu/dissco/core/translator/TestUtils.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package eu.dissco.core.translator;

import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dissco.core.translator.schema.DigitalEntity;
import eu.dissco.core.translator.schema.DigitalSpecimen;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
Expand Down Expand Up @@ -70,6 +71,10 @@ public static DigitalSpecimen givenDigitalSpecimen() {
.withDwcInstitutionId(INSTITUTION_ID);
}

public static DigitalEntity givenDigitalMediaObjects() {
return new DigitalEntity().withAcAccessUri("https://accessuri.eu/image_1");
}

public static Stream<Arguments> provideInvalidDigitalSpecimen() {
return Stream.of(
Arguments.of(new DigitalSpecimen().withOdsNormalisedPhysicalSpecimenId(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package eu.dissco.core.translator.service;

import static eu.dissco.core.translator.TestUtils.givenDigitalMediaObjects;
import static eu.dissco.core.translator.TestUtils.givenDigitalSpecimen;
import static eu.dissco.core.translator.TestUtils.loadResourceFile;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.ArgumentMatchers.anyString;
Expand All @@ -12,10 +14,12 @@

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dissco.core.translator.domain.DigitalSpecimenEvent;
import eu.dissco.core.translator.properties.EnrichmentProperties;
import eu.dissco.core.translator.properties.FdoProperties;
import eu.dissco.core.translator.properties.WebClientProperties;
import eu.dissco.core.translator.repository.SourceSystemRepository;
import eu.dissco.core.translator.schema.DigitalEntity;
import eu.dissco.core.translator.schema.DigitalSpecimen;
import eu.dissco.core.translator.terms.BaseDigitalObjectDirector;
import freemarker.cache.FileTemplateLoader;
Expand All @@ -27,6 +31,7 @@
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
import org.mockito.ArgumentCaptor;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.core.io.ClassPathResource;
Expand Down Expand Up @@ -93,7 +98,8 @@ void testRetrieveData206() throws Exception {

// Then
then(webClient).should(times(2)).get();
then(kafkaService).should(times(99)).sendMessage(eq("digital-specimen"), anyString());
then(kafkaService).should(times(99)).sendMessage(eq("digital-specimen"), any(
DigitalSpecimenEvent.class));
}

@Test
Expand All @@ -108,13 +114,41 @@ void testRetrieveDataWithMedia206() throws Exception {
.willReturn(givenDigitalSpecimen());
given(fdoProperties.getDigitalSpecimenType()).willReturn("Doi of the digital specimen");
given(fdoProperties.getDigitalMediaObjectType()).willReturn("Doi of the digital media object");
given(digitalSpecimenDirector.assembleDigitalMediaObjects(anyBoolean(), any(JsonNode.class),
anyString()))
.willReturn(givenDigitalMediaObjects());

// When
service.retrieveData();

// Then
then(webClient).should(times(1)).get();
then(kafkaService).should(times(100)).sendMessage(eq("digital-specimen"), anyString());
then(kafkaService).should(times(100)).sendMessage(eq("digital-specimen"), any(
DigitalSpecimenEvent.class));
}

@Test
void testRetrieveDataInvalidMedia() throws Exception {
// Given
given(properties.getSourceSystemId()).willReturn("ABC-DDD-ASD");
given(repository.getEndpoint(anyString())).willReturn("https://endpoint.com");
given(responseSpec.bodyToMono(any(Class.class))).willReturn(
Mono.just(loadResourceFile("biocase/biocase-206-with-invalid-media.xml")));
given(properties.getItemsPerRequest()).willReturn(101);
given(digitalSpecimenDirector.assembleDigitalSpecimenTerm(any(JsonNode.class), anyBoolean()))
.willReturn(givenDigitalSpecimen());
given(fdoProperties.getDigitalSpecimenType()).willReturn("Doi of the digital specimen");
given(digitalSpecimenDirector.assembleDigitalMediaObjects(anyBoolean(), any(JsonNode.class),
anyString())).willReturn(new DigitalEntity());

// When
service.retrieveData();

// Then
var captor = ArgumentCaptor.forClass(DigitalSpecimenEvent.class);
then(webClient).should(times(1)).get();
then(kafkaService).should(times(1)).sendMessage(eq("digital-specimen"), captor.capture());
assertThat(captor.getValue().digitalMediaObjectEvents()).isEmpty();
}

@ParameterizedTest
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package eu.dissco.core.translator.service;

import static eu.dissco.core.translator.TestUtils.MAPPER;
import static eu.dissco.core.translator.TestUtils.givenDigitalMediaObjects;
import static eu.dissco.core.translator.TestUtils.givenDigitalSpecimen;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.ArgumentMatchers.anyList;
Expand All @@ -14,12 +16,14 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import eu.dissco.core.translator.TestUtils;
import eu.dissco.core.translator.domain.DigitalSpecimenEvent;
import eu.dissco.core.translator.properties.DwcaProperties;
import eu.dissco.core.translator.properties.EnrichmentProperties;
import eu.dissco.core.translator.properties.FdoProperties;
import eu.dissco.core.translator.properties.WebClientProperties;
import eu.dissco.core.translator.repository.DwcaRepository;
import eu.dissco.core.translator.repository.SourceSystemRepository;
import eu.dissco.core.translator.schema.DigitalEntity;
import eu.dissco.core.translator.schema.DigitalSpecimen;
import eu.dissco.core.translator.terms.BaseDigitalObjectDirector;
import java.io.File;
Expand All @@ -35,6 +39,7 @@
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
import org.mockito.ArgumentCaptor;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.core.io.ClassPathResource;
Expand Down Expand Up @@ -108,7 +113,8 @@ void testRetrieveData() throws Exception {
// Then
then(dwcaRepository).should(times(2)).createTable(anyString());
then(dwcaRepository).should(times(2)).postRecords(anyString(), anyList());
then(kafkaService).should(times(9)).sendMessage(eq("digital-specimen"), anyString());
then(kafkaService).should(times(9)).sendMessage(eq("digital-specimen"), any(
DigitalSpecimenEvent.class));
cleanup("src/test/resources/dwca/test/dwca-rbins.zip");
}

Expand Down Expand Up @@ -155,6 +161,8 @@ void testRetrieveDataWithGbifMedia() throws Exception {
givenImageMap(19));
given(digitalSpecimenDirector.assembleDigitalSpecimenTerm(any(JsonNode.class), anyBoolean()))
.willReturn(givenDigitalSpecimen());
given(digitalSpecimenDirector.assembleDigitalMediaObjects(anyBoolean(), any(JsonNode.class),
anyString())).willReturn(givenDigitalMediaObjects());
given(fdoProperties.getDigitalMediaObjectType()).willReturn("Doi of the digital media object");
given(fdoProperties.getDigitalSpecimenType()).willReturn("Doi of the digital specimen");

Expand All @@ -164,7 +172,8 @@ void testRetrieveDataWithGbifMedia() throws Exception {
// Then
then(dwcaRepository).should(times(3)).createTable(anyString());
then(dwcaRepository).should(times(2)).postRecords(anyString(), anyList());
then(kafkaService).should(times(19)).sendMessage(eq("digital-specimen"), anyString());
then(kafkaService).should(times(19)).sendMessage(eq("digital-specimen"), any(
DigitalSpecimenEvent.class));
cleanup("src/test/resources/dwca/test/dwca-kew-gbif-media.zip");
}

Expand All @@ -188,6 +197,8 @@ void testRetrieveDataWithAcMedia() throws Exception {
eq("ABC-DDD-ASD_http://rs.tdwg.org/ac/terms/Multimedia"))).willReturn(givenImageMap(14));
given(digitalSpecimenDirector.assembleDigitalSpecimenTerm(any(JsonNode.class), anyBoolean()))
.willReturn(givenDigitalSpecimen());
given(digitalSpecimenDirector.assembleDigitalMediaObjects(anyBoolean(), any(JsonNode.class),
anyString())).willReturn(givenDigitalMediaObjects());
given(fdoProperties.getDigitalMediaObjectType()).willReturn("Doi of the digital media object");
given(fdoProperties.getDigitalSpecimenType()).willReturn("Doi of the digital specimen");

Expand All @@ -197,10 +208,36 @@ void testRetrieveDataWithAcMedia() throws Exception {
// Then
then(dwcaRepository).should(times(2)).createTable(anyString());
then(dwcaRepository).should(times(2)).postRecords(anyString(), anyList());
then(kafkaService).should(times(14)).sendMessage(eq("digital-specimen"), anyString());
then(kafkaService).should(times(14)).sendMessage(eq("digital-specimen"), any(
DigitalSpecimenEvent.class));
cleanup("src/test/resources/dwca/test/dwca-naturalis-ac-media.zip");
}

@Test
void testRetrieveDataWithInvalidAcMedia() throws Exception {
// Given
givenDWCA("/dwca-invalid-ac-media.zip");
given(dwcaRepository.getCoreRecords(anyList(), anyString())).willReturn(givenSpecimenMap(1));
given(dwcaRepository.getRecords(anyList(),
eq("ABC-DDD-ASD_http://rs.tdwg.org/ac/terms/Multimedia"))).willReturn(givenImageMap(1));
given(digitalSpecimenDirector.assembleDigitalSpecimenTerm(any(JsonNode.class), anyBoolean()))
.willReturn(givenDigitalSpecimen());
given(digitalSpecimenDirector.assembleDigitalMediaObjects(anyBoolean(), any(JsonNode.class),
anyString())).willReturn(new DigitalEntity());
given(fdoProperties.getDigitalSpecimenType()).willReturn("Doi of the digital specimen");

// When
service.retrieveData();

// Then
var captor = ArgumentCaptor.forClass(DigitalSpecimenEvent.class);
then(dwcaRepository).should(times(2)).createTable(anyString());
then(dwcaRepository).should(times(2)).postRecords(anyString(), anyList());
then(kafkaService).should(times(1)).sendMessage(eq("digital-specimen"), captor.capture());
assertThat(captor.getValue().digitalMediaObjectEvents()).isEmpty();
cleanup("src/test/resources/dwca/test/dwca-invalid-ac-media.zip");
}

@Test
void testRetrieveOnlyOccurrence() throws Exception {
// Given
Expand All @@ -224,6 +261,8 @@ void testRetrieveDataWithAssociatedMedia() throws Exception {
givenSpecimenMapWithMedia(20));
given(digitalSpecimenDirector.assembleDigitalSpecimenTerm(any(JsonNode.class), anyBoolean()))
.willReturn(TestUtils.givenDigitalSpecimen());
given(digitalSpecimenDirector.assembleDigitalMediaObjects(anyBoolean(), any(JsonNode.class),
anyString())).willReturn(givenDigitalMediaObjects());
given(fdoProperties.getDigitalMediaObjectType()).willReturn("Doi of the digital media object");
given(fdoProperties.getDigitalSpecimenType()).willReturn("Doi of the digital specimen");

Expand All @@ -233,7 +272,8 @@ void testRetrieveDataWithAssociatedMedia() throws Exception {
// Then
then(dwcaRepository).should(times(1)).createTable(anyString());
then(dwcaRepository).should(times(1)).postRecords(anyString(), anyList());
then(kafkaService).should(times(20)).sendMessage(eq("digital-specimen"), anyString());
then(kafkaService).should(times(20)).sendMessage(eq("digital-specimen"), any(
DigitalSpecimenEvent.class));
cleanup("src/test/resources/dwca/test/dwca-lux-associated-media.zip");
}

Expand Down
Loading

0 comments on commit b9c83c6

Please sign in to comment.