Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Harmonise stratigraphy DD-179 #19

Merged
merged 2 commits into from
Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 27 additions & 19 deletions src/main/java/eu/dissco/core/translator/terms/Term.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,7 @@


import com.fasterxml.jackson.databind.JsonNode;

import efg.DataSets.DataSet;
import eu.dissco.core.translator.terms.specimen.CollectingNumber;
import eu.dissco.core.translator.terms.specimen.Collector;
import eu.dissco.core.translator.terms.specimen.DatasetId;
import eu.dissco.core.translator.terms.specimen.DateCollected;
import eu.dissco.core.translator.terms.specimen.Modified;
import eu.dissco.core.translator.terms.specimen.ObjectType;
import eu.dissco.core.translator.terms.specimen.PhysicalSpecimenCollection;
import eu.dissco.core.translator.terms.specimen.SpecimenName;
import eu.dissco.core.translator.terms.specimen.TypeStatus;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.gbif.dwc.ArchiveFile;
Expand All @@ -29,38 +19,56 @@ public String retrieveFromDWCA(ArchiveFile archiveFile, Record rec) {
return "";
}

protected String searchDWCAForTerm(ArchiveFile archiveFile, Record rec, List<String> originalTerms){
protected String searchDWCAForTerm(ArchiveFile archiveFile, Record rec,
List<String> originalTerms) {
for (var originalTerm : originalTerms) {
if (archiveFile.getField(originalTerm) != null){
if (archiveFile.getField(originalTerm) != null) {
var value = rec.value(archiveFile.getField(originalTerm).getTerm());
if (value != null){
if (value != null) {
return value;
}
}
}
log.info("Term not found in any of these search fields: {}", originalTerms);
log.debug("Term not found in any of these search fields: {}", originalTerms);
return null;
}

protected String searchAbcdForTerm(JsonNode attributes, List<String> originalTerms){
protected String searchAbcdForTerm(JsonNode attributes, List<String> originalTerms) {
for (var originalTerm : originalTerms) {
if (attributes.get(originalTerm) != null){
if (attributes.get(originalTerm) != null) {
return attributes.get(originalTerm).asText();
}
}
log.info("Term not found in any of these search fields: {}", originalTerms);
log.debug("Term not found in any of these search fields: {}", originalTerms);
return null;
}

protected String combineABCDTerms(JsonNode unit, List<String> abcdTerms) {
var builder = new StringBuilder();
for (var abcdTerm : abcdTerms) {
if (unit.get(abcdTerm) != null) {
if (builder.length() != 0) {
builder.append(" | ");
}
builder.append(unit.get(abcdTerm).asText());
}
}
if (builder.isEmpty()) {
return null;
} else {
return builder.toString();
}
}

public abstract String getTerm();

public String retrieveFromABCD(JsonNode unit) {
log.info("No specific attributes retrieve specified for field: {}", getTerm());
log.debug("No specific attributes retrieve specified for field: {}", getTerm());
return null;
}

public String retrieveFromABCD(DataSet datasets) {
log.info("No specific attributes retrieve specified for field: {}", getTerm());
log.debug("No specific attributes retrieve specified for field: {}", getTerm());
return null;
}
}
38 changes: 38 additions & 0 deletions src/main/java/eu/dissco/core/translator/terms/TermMapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,22 @@
import eu.dissco.core.translator.terms.specimen.location.Locality;
import eu.dissco.core.translator.terms.specimen.location.StateProvince;
import eu.dissco.core.translator.terms.specimen.location.WaterBody;
import eu.dissco.core.translator.terms.specimen.stratigraphy.biostratigraphic.HighestBiostratigraphicZone;
import eu.dissco.core.translator.terms.specimen.stratigraphy.biostratigraphic.LowestBiostratigraphicZone;
import eu.dissco.core.translator.terms.specimen.stratigraphy.chronostratigraphic.EarliestAgeOrLowestStage;
import eu.dissco.core.translator.terms.specimen.stratigraphy.chronostratigraphic.EarliestEonOrLowestEonothem;
import eu.dissco.core.translator.terms.specimen.stratigraphy.chronostratigraphic.EarliestEpochOrLowestSeries;
import eu.dissco.core.translator.terms.specimen.stratigraphy.chronostratigraphic.EarliestEraOrLowestErathem;
import eu.dissco.core.translator.terms.specimen.stratigraphy.chronostratigraphic.EarliestPeriodOrLowestSystem;
import eu.dissco.core.translator.terms.specimen.stratigraphy.chronostratigraphic.LatestAgeOrHighestStage;
import eu.dissco.core.translator.terms.specimen.stratigraphy.chronostratigraphic.LatestEonOrHighestEonothem;
import eu.dissco.core.translator.terms.specimen.stratigraphy.chronostratigraphic.LatestEpochOrHighestSeries;
import eu.dissco.core.translator.terms.specimen.stratigraphy.chronostratigraphic.LatestEraOrHighestErathem;
import eu.dissco.core.translator.terms.specimen.stratigraphy.chronostratigraphic.LatestPeriodOrHighestSystem;
import eu.dissco.core.translator.terms.specimen.stratigraphy.lithostratigraphic.Bed;
import eu.dissco.core.translator.terms.specimen.stratigraphy.lithostratigraphic.Formation;
import eu.dissco.core.translator.terms.specimen.stratigraphy.lithostratigraphic.Group;
import eu.dissco.core.translator.terms.specimen.stratigraphy.lithostratigraphic.Member;
import java.util.ArrayList;
import java.util.List;
import lombok.RequiredArgsConstructor;
Expand Down Expand Up @@ -56,6 +72,27 @@ public static List<Term> locationTerms() {
return list;
}

private static List<Term> stratigraphyTerms() {
var list = new ArrayList<Term>();
list.add(new EarliestAgeOrLowestStage());
list.add(new EarliestEonOrLowestEonothem());
list.add(new EarliestEpochOrLowestSeries());
list.add(new EarliestEraOrLowestErathem());
list.add(new EarliestPeriodOrLowestSystem());
list.add(new LatestAgeOrHighestStage());
list.add(new LatestEonOrHighestEonothem());
list.add(new LatestEpochOrHighestSeries());
list.add(new LatestEraOrHighestErathem());
list.add(new LatestPeriodOrHighestSystem());
list.add(new Bed());
list.add(new Formation());
list.add(new Group());
list.add(new Member());
list.add(new HighestBiostratigraphicZone());
list.add(new LowestBiostratigraphicZone());
return list;
}

public static List<Term> harmonisedTerms() {
var list = new ArrayList<Term>();
list.add(new SpecimenName());
Expand All @@ -68,6 +105,7 @@ public static List<Term> harmonisedTerms() {
list.add(new Collector());
list.add(new TypeStatus());
list.addAll(locationTerms());
list.addAll(stratigraphyTerms());
return list;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,20 @@ public String retrieveFromDWCA(ArchiveFile archiveFile, Record rec) {

@Override
public String retrieveFromABCD(JsonNode unit) {
StringBuilder builder = new StringBuilder();
combinePossibleValues(unit, builder);
if (builder.length() == 0){
var value = combinePossibleValues(unit);
if (value == null){
if (unit.get("abcd:gathering/agents/gatheringAgentsText") != null){
builder.append(unit.get("abcd:gathering/agents/gatheringAgentsText").asText());
return builder.toString();
return unit.get("abcd:gathering/agents/gatheringAgentsText").asText();
} else {
return null;
}
} else {
return builder.toString();
return value;
}
}

private void combinePossibleValues(JsonNode unit, StringBuilder builder) {
private String combinePossibleValues(JsonNode unit) {
var builder = new StringBuilder();
var iterateOverElements = true;
var numberFound = 0;
while (iterateOverElements) {
Expand All @@ -59,6 +58,11 @@ private void combinePossibleValues(JsonNode unit, StringBuilder builder) {
iterateOverElements = false;
}
}
if (builder.length() != 0){
return builder.toString();
} else {
return null;
}
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,12 @@ public class TypeStatus extends Term {
private final List<String> dwcaTerms = List.of(TERM);
southeo marked this conversation as resolved.
Show resolved Hide resolved

// Pick the first TypeStatus from ABCD
private final List<String> abcdTermsTypeStatus =
private final List<String> abcdTerms =
List.of(
"abcd:specimenUnit/nomenclaturalTypeDesignations/nomenclaturalTypeDesignation/0/typeStatus");
private final List<String> abcdTermsTypeName =
List.of(
"abcd:specimenUnit/nomenclaturalTypeDesignations/nomenclaturalTypeDesignation/0/typifiedName/fullScientificNameString");
private final List<String> abcdTermsCitation =
List.of(
"abcd:specimenUnit/nomenclaturalTypeDesignations/nomenclaturalTypeDesignation/0/nomenclaturalReference/titleCitation");
"abcd:specimenUnit/nomenclaturalTypeDesignations/nomenclaturalTypeDesignation/0/typeStatus",
"abcd:specimenUnit/nomenclaturalTypeDesignations/nomenclaturalTypeDesignation/0/typifiedName/fullScientificNameString",
"abcd:specimenUnit/nomenclaturalTypeDesignations/nomenclaturalTypeDesignation/0/nomenclaturalReference/titleCitation"
);

@Override
public String retrieveFromDWCA(ArchiveFile archiveFile, Record rec) {
Expand All @@ -30,10 +27,7 @@ public String retrieveFromDWCA(ArchiveFile archiveFile, Record rec) {

@Override
public String retrieveFromABCD(JsonNode unit) {
var status = super.searchAbcdForTerm(unit, abcdTermsTypeStatus);
var typeName = super.searchAbcdForTerm(unit, abcdTermsTypeName);
var citation = super.searchAbcdForTerm(unit, abcdTermsCitation);
return status + " | " + typeName + " | " + citation;
return super.combineABCDTerms(unit, abcdTerms);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package eu.dissco.core.translator.terms.specimen.stratigraphy.biostratigraphic;

import com.fasterxml.jackson.databind.JsonNode;
import eu.dissco.core.translator.terms.Term;
import java.util.List;
import org.gbif.dwc.ArchiveFile;
import org.gbif.dwc.record.Record;

public class HighestBiostratigraphicZone extends Term {

public static final String TERM = DWC_PREFIX + "highestBiostratigraphicZone";

southeo marked this conversation as resolved.
Show resolved Hide resolved
private final List<String> dwcaTerms = List.of(TERM);
private final List<String> abcdTerms = List.of(
"abcd-efg:earthScienceSpecimen/unitStratigraphicDetermination/biostratigraphicAttributionsType/biostratigraphicAttribution/0/zonalFossilType",
"abcd-efg:earthScienceSpecimen/unitStratigraphicDetermination/biostratigraphicAttributionsType/biostratigraphicAttribution/0/fossilZoneName",
"abcd-efg:earthScienceSpecimen/unitStratigraphicDetermination/biostratigraphicAttributionsType/biostratigraphicAttribution/0/fossilSubzoneName");

@Override
public String retrieveFromDWCA(ArchiveFile archiveFile, Record rec) {
return super.searchDWCAForTerm(archiveFile, rec, dwcaTerms);
}

@Override
public String retrieveFromABCD(JsonNode unit) {
return super.combineABCDTerms(unit, abcdTerms);
}

@Override
public String getTerm() {
return TERM;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package eu.dissco.core.translator.terms.specimen.stratigraphy.biostratigraphic;

import com.fasterxml.jackson.databind.JsonNode;
import eu.dissco.core.translator.terms.Term;
import java.util.List;
import org.gbif.dwc.ArchiveFile;
import org.gbif.dwc.record.Record;

public class LowestBiostratigraphicZone extends Term {

public static final String TERM = DWC_PREFIX + "lowestBiostratigraphicZone";

private final List<String> dwcaTerms = List.of(TERM);
private final List<String> abcdTerms = List.of(
"abcd-efg:earthScienceSpecimen/unitStratigraphicDetermination/biostratigraphicAttributionsType/biostratigraphicAttribution/0/zonalFossilType",
"abcd-efg:earthScienceSpecimen/unitStratigraphicDetermination/biostratigraphicAttributionsType/biostratigraphicAttribution/0/fossilZoneName",
"abcd-efg:earthScienceSpecimen/unitStratigraphicDetermination/biostratigraphicAttributionsType/biostratigraphicAttribution/0/fossilSubzoneName");

@Override
public String retrieveFromDWCA(ArchiveFile archiveFile, Record rec) {
return super.searchDWCAForTerm(archiveFile, rec, dwcaTerms);
}

@Override
public String retrieveFromABCD(JsonNode unit) {
return super.combineABCDTerms(unit, abcdTerms);
}

@Override
public String getTerm() {
return TERM;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package eu.dissco.core.translator.terms.specimen.stratigraphy.chronostratigraphic;

import com.fasterxml.jackson.databind.JsonNode;
import eu.dissco.core.translator.terms.Term;
import java.util.List;
import lombok.extern.slf4j.Slf4j;

@Slf4j
public abstract class AbstractChronoStratigraphy extends Term {

private static final String ABCD_DIVISION =
"abcd-efg:earthScienceSpecimen/unitStratigraphicDetermination/chronostratigraphicAttributions/chronostratigraphicAttribution/%s/chronoStratigraphicDivision";
private static final String ABCD_VALUE =
"abcd-efg:earthScienceSpecimen/unitStratigraphicDetermination/chronostratigraphicAttributions/chronostratigraphicAttribution/%s/chronostratigraphicName";

protected String searchABCDChronostratigraphy(JsonNode unit, List<String> divisionSearched) {
for (var divisionSearch : divisionSearched) {
var iterateOverElements = true;
var numberFound = 0;
while (iterateOverElements) {
var divisionNode = unit.get(String.format(ABCD_DIVISION, numberFound));
if (divisionNode != null) {
var division = divisionNode.asText();
if (division.equalsIgnoreCase(divisionSearch)
&& unit.get(String.format(ABCD_VALUE, numberFound)) != null) {
return unit.get(String.format(ABCD_VALUE, numberFound)).asText();
}
numberFound++;
} else {
iterateOverElements = false;
}
}
}
log.debug("No stratigraphy found for division: {}", divisionSearched);
return null;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package eu.dissco.core.translator.terms.specimen.stratigraphy.chronostratigraphic;

import com.fasterxml.jackson.databind.JsonNode;
import java.util.List;
import org.gbif.dwc.ArchiveFile;
import org.gbif.dwc.record.Record;

public class EarliestAgeOrLowestStage extends AbstractChronoStratigraphy {

public static final String TERM = DWC_PREFIX + "earliestAgeOrLowestStage";
private final List<String> dwcaTerms = List.of(TERM);


@Override
public String retrieveFromDWCA(ArchiveFile archiveFile, Record rec) {
return super.searchDWCAForTerm(archiveFile, rec, dwcaTerms);
}

@Override
public String retrieveFromABCD(JsonNode unit) {
return super.searchABCDChronostratigraphy(unit, List.of("SubStage", "Stage"));
}

@Override
public String getTerm() {
return TERM;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package eu.dissco.core.translator.terms.specimen.stratigraphy.chronostratigraphic;

import com.fasterxml.jackson.databind.JsonNode;
import java.util.List;
import org.gbif.dwc.ArchiveFile;
import org.gbif.dwc.record.Record;

public class EarliestEonOrLowestEonothem extends AbstractChronoStratigraphy {

public static final String TERM = DWC_PREFIX + "earliestEonOrLowestEonothem";
private final List<String> dwcaTerms = List.of(TERM);


@Override
public String retrieveFromDWCA(ArchiveFile archiveFile, Record rec) {
return super.searchDWCAForTerm(archiveFile, rec, dwcaTerms);
}

@Override
public String retrieveFromABCD(JsonNode unit) {
return super.searchABCDChronostratigraphy(unit, List.of("Eonothem"));
}

@Override
public String getTerm() {
return TERM;
}

}
Loading