Skip to content

Commit

Permalink
feat: relax timebased fragmentation (#1403)
Browse files Browse the repository at this point in the history
* feat: extend converter

* feat: time based fragmentation relaxed

* chore: tests extended to verify absence of type
  • Loading branch information
jobulcke authored Oct 25, 2024
1 parent 6344971 commit 52f932d
Show file tree
Hide file tree
Showing 13 changed files with 194 additions and 32 deletions.
Original file line number Diff line number Diff line change
@@ -1,22 +1,51 @@
package be.vlaanderen.informatievlaanderen.ldes.server.domain.converter;

import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.datatypes.xsd.XSDDateTime;
import org.apache.jena.rdf.model.Literal;

import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.temporal.TemporalAccessor;
import java.util.Calendar;
import java.util.TimeZone;

public class LocalDateTimeConverter {

public LocalDateTime getLocalDateTime(Literal literal) {
RDFDatatype datatype = literal.getDatatype();
XSDDateTime parse = (XSDDateTime) datatype.parse(literal.getValue().toString());
Calendar calendar = parse.asCalendar();
if (XSDDatatype.XSDdateTime.equals(datatype)) {
XSDDateTime dateTime = (XSDDateTime) literal.getValue();
return fromXsdDateTime(dateTime);
}
if (XSDDatatype.XSDstring.equals(datatype)) {
return fromString(literal.getString());
}
throw new IllegalArgumentException("Provided datatype cannot be used for conversion: " + datatype);
}

private LocalDateTime fromXsdDateTime(XSDDateTime dateTime) {
Calendar calendar = dateTime.asCalendar();
TimeZone tz = calendar.getTimeZone();
ZoneId zoneId = tz.toZoneId();
return LocalDateTime.ofInstant(calendar.toInstant(), zoneId);
}

private LocalDateTime fromString(String dateTime) {
final DateTimeFormatter formatter = new DateTimeFormatterBuilder()
.append(DateTimeFormatter.ISO_LOCAL_DATE_TIME)
.appendPattern("[XXX][X]")
.toFormatter();
TemporalAccessor temporalAccessor = formatter.parseBest(dateTime, ZonedDateTime::from, LocalDateTime::from);
return switch (temporalAccessor) {
case ZonedDateTime zonedDateTime -> zonedDateTime.withZoneSameInstant(ZoneOffset.UTC).toLocalDateTime();
case LocalDateTime localDateTime -> localDateTime;
default -> throw new IllegalArgumentException("Could not parse date time: " + dateTime);
};
}
}
Original file line number Diff line number Diff line change
@@ -1,37 +1,86 @@
package be.vlaanderen.informatievlaanderen.ldes.server.domain.converter;

import org.apache.jena.datatypes.TypeMapper;
import org.apache.jena.rdf.model.impl.LiteralImpl;
import org.junit.jupiter.api.Test;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.rdf.model.Literal;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.ValueSource;

import java.time.LocalDateTime;
import java.time.format.DateTimeParseException;
import java.time.temporal.ChronoUnit;
import java.util.stream.Stream;

import static org.apache.jena.rdf.model.ResourceFactory.createTypedLiteral;
import static org.junit.jupiter.api.Assertions.*;
import static org.assertj.core.api.Assertions.*;

class LocalDateTimeConverterTest {

private final LocalDateTimeConverter localDateTimeConverter = new LocalDateTimeConverter();

@Test
void test_conversionOfXsdTimeToLocalTime() {
LiteralImpl typedLiteral = (LiteralImpl) createTypedLiteral("2022-05-20T09:58:15Z",
TypeMapper.getInstance().getTypeByName("http://www.w3.org/2001/XMLSchema#dateTime"));
static Stream<RDFDatatype> datatypes() {
return Stream.of(XSDDatatype.XSDdateTime, XSDDatatype.XSDstring);
}

LocalDateTime actualLocalDateTime = localDateTimeConverter.getLocalDateTime(typedLiteral);
static Stream<Literal> invalidDatatypes() {
return Stream.of(
createTypedLiteral("1729774515", XSDDatatype.XSDint),
createTypedLiteral("2022-05-20", XSDDatatype.XSDdate),
createTypedLiteral("09:58:15Z", XSDDatatype.XSDtime),
createTypedLiteral("true", XSDDatatype.XSDboolean)
);
}

@ParameterizedTest
@MethodSource("datatypes")
void test_conversionOfXsdTimeToLocalTime(RDFDatatype dataType) {
LocalDateTime expectedLocalDateTime = LocalDateTime.of(2022, 5, 20, 9, 58, 15);
assertEquals(expectedLocalDateTime, actualLocalDateTime);
Literal typedLiteral = createTypedLiteral("2022-05-20T09:58:15Z", dataType);

LocalDateTime actualLocalDateTime = localDateTimeConverter.getLocalDateTime(typedLiteral);

assertThat(actualLocalDateTime).isEqualTo(expectedLocalDateTime);
}

@Test
void test_conversionOfXsdTimeWithoutTimeZone() {
LiteralImpl typedLiteral = (LiteralImpl) createTypedLiteral("2023-04-14T12:10:30.629238",
TypeMapper.getInstance().getTypeByName("http://www.w3.org/2001/XMLSchema#dateTime"));
@ParameterizedTest
@MethodSource("datatypes")
void test_conversionOfXsdTimeToLocalTimeWithOffset(RDFDatatype dataType) {
LocalDateTime expectedLocalDateTime = LocalDateTime.of(2022, 5, 20, 8, 58, 15);
Literal typedLiteral = createTypedLiteral("2022-05-20T09:58:15+01:00", dataType);

LocalDateTime actualLocalDateTime = localDateTimeConverter.getLocalDateTime(typedLiteral);

assertThat(actualLocalDateTime).isEqualTo(expectedLocalDateTime);
}

@ParameterizedTest
@MethodSource("datatypes")
void test_conversionOfXsdTimeWithoutTimeZone(RDFDatatype dataType) {
LocalDateTime expectedLocalDateTime = LocalDateTime.of(2023, 4, 14, 12, 10, 30, 629000000);
assertEquals(expectedLocalDateTime, actualLocalDateTime);
Literal typedLiteral = createTypedLiteral("2023-04-14T12:10:30.629238", dataType);

LocalDateTime actualLocalDateTime = localDateTimeConverter.getLocalDateTime(typedLiteral);

assertThat(actualLocalDateTime)
.isCloseTo(expectedLocalDateTime, within(238000, ChronoUnit.NANOS));
}

@ParameterizedTest
@MethodSource("invalidDatatypes")
void test_conversionOfInvalidTimestamp(Literal literalWithInvalidDatatype) {
assertThatThrownBy(() -> localDateTimeConverter.getLocalDateTime(literalWithInvalidDatatype))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Provided datatype cannot be used for conversion: " + literalWithInvalidDatatype.getDatatype());
}

@ParameterizedTest
@ValueSource(strings = {"not-a-timestamp", "2022-05-20T09:58:15Z+1:00", "2022-05-20T09:58:15Z[UTC]", "2022-05-20T09:58:15.999999999999999"})
void test_conversionOfInvalidValues(String value) {
Literal literalWithInvalidValue = createTypedLiteral(value, XSDDatatype.XSDstring);

assertThatThrownBy(() -> localDateTimeConverter.getLocalDateTime(literalWithInvalidValue))
.isInstanceOf(DateTimeParseException.class);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import org.apache.jena.rdf.model.Model;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.InOrder;
import org.mockito.Mockito;

Expand Down Expand Up @@ -51,9 +53,10 @@ void setUp() {
childBucket = new Bucket(new BucketDescriptor(List.of(new BucketDescriptorPair("is", "child"))), VIEW_NAME);
}

@Test
void when_BucketisationCalled_Then_FunctionsAreCalled() {
Model model = loadModel("member_with_created_property.nq");
@ParameterizedTest
@ValueSource(strings = {"member_with_created_property.nq", "member_with_string_created_property.nq", "member_without_created_property.nq"})
void when_BucketisationCalled_Then_FunctionsAreCalled(String filename) {
Model model = loadModel(filename);
FragmentationMember member = new FragmentationMember(1, "subject", "versionOf", TIME, EVENT_STREAM_PROPERTIES, model);
FragmentationTimestamp fragmentationTimestamp = new FragmentationTimestamp(TIME, GRANULARITY);
when(bucketFinder.getLowestBucket(parentBucket, fragmentationTimestamp, Granularity.YEAR))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
_:b0 <http://purl.org/dc/terms/created> "2023-01-01T00:00:00.000Z"^^<http://www.w3.org/2001/XMLSchema#string> .
_:b0 <http://schema.org/jobTitle> "Professor" .
_:b0 <http://schema.org/name> "Jane Doe" .
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
_:b0 <http://purl.org/dc/terms/created> "2023-01-01T00:00:00.000Z" .
_:b0 <http://schema.org/jobTitle> "Professor" .
_:b0 <http://schema.org/name> "Jane Doe" .
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package be.vlaanderen.informatievlaanderen.ldes.server.ingest.rest.validators.ingestreportvalidator;

import be.vlaanderen.informatievlaanderen.ldes.server.domain.model.EventStream;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.rdf.model.*;
import org.springframework.core.annotation.Order;
import org.springframework.stereotype.Component;

import java.util.List;
import java.util.Objects;

@Order(2)
@Component
Expand All @@ -18,7 +18,6 @@ public void validate(Model model, EventStream eventStream, ShaclReportManager re
List<Resource> memberSubjects = model.listSubjects().filterDrop(RDFNode::isAnon).toList();

if (memberSubjects.size() > 1 && !eventStream.isVersionCreationEnabled()) {
// To be removed when bulk ingest is allowed when version creation is disabled
memberSubjects.forEach(subject -> reportManager.addEntry(subject,
"Only 1 member is allowed per request on collection with version creation disabled"
)
Expand All @@ -31,6 +30,7 @@ public void validate(Model model, EventStream eventStream, ShaclReportManager re

private void validateTimestampPath(List<Resource> memberSubjects, Model model, EventStream eventStream, ShaclReportManager reportManager) {
int expectedNumber = eventStream.isVersionCreationEnabled() ? 0 : 1;
List<RDFDatatype> validTypes = List.of(XSDDatatype.XSDdateTime, XSDDatatype.XSDstring);
memberSubjects.forEach(subject -> {
List<Statement> timestampStatements = getStatementsOfPath(subject, model, eventStream.getTimestampPath());
if (timestampStatements.size() != expectedNumber) {
Expand All @@ -40,9 +40,9 @@ private void validateTimestampPath(List<Resource> memberSubjects, Model model, E
}

timestampStatements.forEach(statement -> {
if (!statement.getObject().isLiteral() || !Objects.equals(statement.getObject().asLiteral().getDatatype(), XSDDatatype.XSDdateTime)) {
if (!statement.getObject().isLiteral() || !validTypes.contains(statement.getLiteral().getDatatype())) {
reportManager.addEntry(subject,
String.format(String.format("Object of statement with predicate: %s should be a literal of type %s", eventStream.getTimestampPath(), XSDDatatype.XSDdateTime.getURI()))
String.format(String.format("Object of statement with predicate: %s should be a literal either of type %s or %s", eventStream.getTimestampPath(), XSDDatatype.XSDdateTime.getURI(), XSDDatatype.XSDstring.getURI()))
);
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.riot.RDFDataMgr;
import org.assertj.core.api.Condition;
import org.assertj.core.api.InstanceOfAssertFactories;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -46,7 +47,7 @@ void setup() {
void when_IncorrectMemberReceived_Then_ValidationThrowsException(String modelName, String collectionName, List<String> expectedMessages) {
Model model = RDFDataMgr.loadModel(modelName);
String actualMessage = assertThrows(ShaclValidationException.class, () -> validator.validate(model, collectionName)).getMessage();
expectedMessages.forEach(expectedMessage -> assertTrue(actualMessage.contains(expectedMessage)));
assertThat(actualMessage).has(errorMessages(expectedMessages));
}

@ParameterizedTest
Expand Down Expand Up @@ -110,7 +111,7 @@ public Stream<? extends Arguments> provideArguments(ExtensionContext extensionCo
Arguments.of("example-ldes-member-wrong-type-version-of.nq", VERSION,
List.of("Object of statement with predicate: " + VERSIONOF_PATH + " should be a resource")),
Arguments.of("example-ldes-member-wrong-type-timestamp.nq", VERSION,
List.of("Object of statement with predicate: " + TIMESTAMP_PATH + " should be a literal of type " + XSDDatatype.XSDdateTime.getURI())),
List.of("Object of statement with predicate: " + TIMESTAMP_PATH + " should be a literal either of type " + XSDDatatype.XSDdateTime.getURI() + " or " + XSDDatatype.XSDstring.getURI())),
Arguments.of("example-ldes-member-dangling-nodes.nq", VERSION, List.of("Object graphs don't allow blank nodes to occur outside of a named object.")),
Arguments.of("example-ldes-member-blank-node.nq", VERSION, List.of("Object graphs don't allow blank nodes to occur outside of a named object.")),
Arguments.of("example-ldes-member-shared-blank-node.nq", VERSION, List.of("Blank nodes must be scoped to one object.")));
Expand All @@ -122,7 +123,15 @@ static class CorrectMemberArgumentsProvider implements ArgumentsProvider {
public Stream<? extends Arguments> provideArguments(ExtensionContext extensionContext) {
return Stream.of(
Arguments.of(RDFDataMgr.loadModel("example-ldes-member-state.nq"), STATE),
Arguments.of(RDFDataMgr.loadModel("example-ldes-member.nq"), VERSION));
Arguments.of(RDFDataMgr.loadModel("example-ldes-member-typeless-time.nq"), VERSION),
Arguments.of(RDFDataMgr.loadModel("example-ldes-member-string-time.nq"), VERSION),
Arguments.of(RDFDataMgr.loadModel("example-ldes-member.nq"), VERSION)
);
}
}

private Condition<String> errorMessages(List<String> expectedMessages) {
return new Condition<>(actual -> expectedMessages.stream().allMatch(actual::contains), "contained by %s".formatted(expectedMessages));
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <http://purl.org/dc/elements/1.1/contributor> <https://private-api.gipod.beta-vlaanderen.be/api/v1/organisations/a7eadbde-86a6-076c-bd2f-7f5596ba3730> .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <http://purl.org/dc/elements/1.1/creator> <https://private-api.gipod.beta-vlaanderen.be/api/v1/organisations/a7eadbde-86a6-076c-bd2f-7f5596ba3730> .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <http://purl.org/dc/terms/created> "2022-05-20T09:58:15.8610896Z"^^<http://www.w3.org/2001/XMLSchema#string> .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <http://purl.org/dc/terms/description> "omschrijving" .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <http://purl.org/dc/terms/isVersionOf> <https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464> .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <http://purl.org/dc/terms/modified> "2022-05-20T09:58:15.8646433Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://data.vlaanderen.be/ns/mobiliteit#Mobiliteitshinder> .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <http://www.w3.org/ns/adms#identifier> _:genid1 .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <http://www.w3.org/ns/adms#versionNotes> "MobilityHindranceZoneWasAdded"@nl-be .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <http://www.w3.org/ns/prov#generatedAtTime> "2022-05-20T09:58:15.867Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <https://data.vlaanderen.be/ns/mobiliteit#Inname.status> <https://private-api.gipod.beta-vlaanderen.be/api/v1/taxonomies/statuses/0a4ee99b-8b8a-47c8-913f-117220febee0> .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <https://data.vlaanderen.be/ns/mobiliteit#beheerder> <https://private-api.gipod.beta-vlaanderen.be/api/v1/organisations/a7eadbde-86a6-076c-bd2f-7f5596ba3730> .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <https://data.vlaanderen.be/ns/mobiliteit#periode> _:genid2 .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <https://data.vlaanderen.be/ns/mobiliteit#zone> <https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/zones/2af888b0-de0c-48c2-956b-8b36d01c1232> .
<https://private-api.gipod.beta-vlaanderen.be/api/v1/mobility-hindrances/10810464/1> <https://gipod.vlaanderen.be/ns/gipod#gipodId> "10810464"^^<http://www.w3.org/2001/XMLSchema#integer> .
_:genid1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/adms#Identifier> .
_:genid1 <http://www.w3.org/2004/02/skos/core#notation> "10810464"^^<https://gipod.vlaanderen.be/ns/gipod#gipodId> .
_:genid1 <http://www.w3.org/ns/adms#schemaAgency> "https://gipod.vlaanderen.be"@nl-be .
_:genid2 <http://data.europa.eu/m8g/endTime> "2022-05-27T17:00:00Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
_:genid2 <http://data.europa.eu/m8g/startTime> "2022-05-27T07:00:00Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
_:genid2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://data.europa.eu/m8g/PeriodOfTime> .
Loading

0 comments on commit 52f932d

Please sign in to comment.